{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# ML with Tensorflow\n",
    "\n",
    "Tensorflow the name's enough!\n",
    "Let's dig in!\n",
    "\n",
    "We'll use a pretrained model to classify Iris flower dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "\n",
    "TRAIN_URL = \"http://download.tensorflow.org/data/iris_training.csv\"\n",
    "TEST_URL = \"http://download.tensorflow.org/data/iris_test.csv\"\n",
    "\n",
    "CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',\n",
    "                    'PetalLength', 'PetalWidth', 'Species']\n",
    "SPECIES = ['Sentosa', 'Versicolor', 'Virginica']\n",
    "\n",
    "def maybe_download():\n",
    "    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)\n",
    "    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)\n",
    "\n",
    "    return train_path, test_path\n",
    "\n",
    "def load_data(y_name='Species'):\n",
    "    \"\"\"Returns the iris dataset as (train_x, train_y), (test_x, test_y).\"\"\"\n",
    "    train_path, test_path = maybe_download()\n",
    "\n",
    "    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)\n",
    "    train_x, train_y = train, train.pop(y_name)\n",
    "\n",
    "    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)\n",
    "    test_x, test_y = test, test.pop(y_name)\n",
    "\n",
    "    return (train_x, train_y), (test_x, test_y)\n",
    "\n",
    "\n",
    "def train_input_fn(features, labels, batch_size):\n",
    "    \"\"\"An input function for training\"\"\"\n",
    "    # Convert the inputs to a Dataset.\n",
    "    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))\n",
    "\n",
    "    # Shuffle, repeat, and batch the examples.\n",
    "    dataset = dataset.shuffle(1000).repeat().batch(batch_size)\n",
    "\n",
    "    # Return the read end of the pipeline.\n",
    "    return dataset.make_one_shot_iterator().get_next()\n",
    "\n",
    "\n",
    "def eval_input_fn(features, labels, batch_size):\n",
    "    \"\"\"An input function for evaluation or prediction\"\"\"\n",
    "    features=dict(features)\n",
    "    if labels is None:\n",
    "        # No labels, use only features.\n",
    "        inputs = features\n",
    "    else:\n",
    "        inputs = (features, labels)\n",
    "\n",
    "    # Convert the inputs to a Dataset.\n",
    "    dataset = tf.data.Dataset.from_tensor_slices(inputs)\n",
    "\n",
    "    # Batch the examples\n",
    "    assert batch_size is not None, \"batch_size must not be None\"\n",
    "    dataset = dataset.batch(batch_size)\n",
    "\n",
    "    # Return the read end of the pipeline.\n",
    "    return dataset.make_one_shot_iterator().get_next()\n",
    "\n",
    "\n",
    "# The remainder of this file contains a simple example of a csv parser,\n",
    "#     implemented using a the `Dataset` class.\n",
    "\n",
    "# `tf.parse_csv` sets the types of the outputs to match the examples given in\n",
    "#     the `record_defaults` argument.\n",
    "CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]\n",
    "\n",
    "# def _parse_line(line):\n",
    "#     # Decode the line into its fields\n",
    "#     fields = tf.decode_csv(line, record_defaults=CSV_TYPES)\n",
    "\n",
    "#     # Pack the result into a dictionary\n",
    "#     features = dict(zip(CSV_COLUMN_NAMES, fields))\n",
    "\n",
    "#     # Separate the label from the features\n",
    "#     label = features.pop('Species')\n",
    "\n",
    "#     return features, label\n",
    "\n",
    "\n",
    "# def csv_input_fn(csv_path, batch_size):\n",
    "#     # Create a dataset containing the text lines.\n",
    "#     dataset = tf.data.TextLineDataset(csv_path).skip(1)\n",
    "\n",
    "#     # Parse each line.\n",
    "#     dataset = dataset.map(_parse_line)\n",
    "\n",
    "#     # Shuffle, repeat, and batch the examples.\n",
    "#     dataset = dataset.shuffle(1000).repeat().batch(batch_size)\n",
    "\n",
    "#     # Return the read end of the pipeline.\n",
    "#     return dataset.make_one_shot_iterator().get_next()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:tensorflow:Using default config.\n",
      "WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpevAffN\n",
      "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd6545063d0>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmpevAffN', '_save_summary_steps': 100}\n",
      "INFO:tensorflow:Create CheckpointSaverHook.\n",
      "INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpevAffN/model.ckpt.\n",
      "INFO:tensorflow:loss = 139.59, step = 1\n",
      "INFO:tensorflow:global_step/sec: 236.62\n",
      "INFO:tensorflow:loss = 22.4486, step = 101 (0.426 sec)\n",
      "INFO:tensorflow:global_step/sec: 220.134\n",
      "INFO:tensorflow:loss = 16.4447, step = 201 (0.453 sec)\n",
      "INFO:tensorflow:global_step/sec: 405.709\n",
      "INFO:tensorflow:loss = 10.2933, step = 301 (0.245 sec)\n",
      "INFO:tensorflow:global_step/sec: 408.988\n",
      "INFO:tensorflow:loss = 8.26349, step = 401 (0.245 sec)\n",
      "INFO:tensorflow:global_step/sec: 451.818\n",
      "INFO:tensorflow:loss = 5.78063, step = 501 (0.221 sec)\n",
      "INFO:tensorflow:global_step/sec: 504.152\n",
      "INFO:tensorflow:loss = 6.63015, step = 601 (0.198 sec)\n",
      "INFO:tensorflow:global_step/sec: 529.939\n",
      "INFO:tensorflow:loss = 4.01163, step = 701 (0.189 sec)\n",
      "INFO:tensorflow:global_step/sec: 432.051\n",
      "INFO:tensorflow:loss = 3.45307, step = 801 (0.232 sec)\n",
      "INFO:tensorflow:global_step/sec: 419.46\n",
      "INFO:tensorflow:loss = 3.85858, step = 901 (0.238 sec)\n",
      "INFO:tensorflow:Saving checkpoints for 1000 into /tmp/tmpevAffN/model.ckpt.\n",
      "INFO:tensorflow:Loss for final step: 8.55838.\n",
      "INFO:tensorflow:Starting evaluation at 2018-01-30-02:46:59\n",
      "INFO:tensorflow:Restoring parameters from /tmp/tmpevAffN/model.ckpt-1000\n",
      "INFO:tensorflow:Finished evaluation at 2018-01-30-02:46:59\n",
      "INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.966667, average_loss = 0.0587675, global_step = 1000, loss = 1.76303\n",
      "\n",
      "Test set accuracy: 0.967\n",
      "\n",
      "WARNING:tensorflow:Input graph does not contain a QueueRunner. That means predict yields forever. This is probably a mistake.\n",
      "INFO:tensorflow:Restoring parameters from /tmp/tmpevAffN/model.ckpt-1000\n",
      "\n",
      "Prediction is \"Sentosa\" (99.8%), expected \"Setosa\"\n",
      "\n",
      "Prediction is \"Versicolor\" (99.6%), expected \"Versicolor\"\n",
      "\n",
      "Prediction is \"Virginica\" (96.0%), expected \"Virginica\"\n"
     ]
    },
    {
     "ename": "SystemExit",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "An exception has occurred, use %tb to see the full traceback.\n",
      "\u001b[0;31mSystemExit\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "from __future__ import absolute_import\n",
    "from __future__ import division\n",
    "from __future__ import print_function\n",
    "\n",
    "import argparse\n",
    "import tensorflow as tf\n",
    "\n",
    "\n",
    "\n",
    "# parser = argparse.ArgumentParser()\n",
    "# parser.add_argument('--batch_size', default=100, type=int, help='batch size')\n",
    "# parser.add_argument('--train_steps', default=1000, type=int,\n",
    "#                     help='number of training steps')\n",
    "\n",
    "def main(argv):\n",
    "#     args = parser.parse_args(argv[1:])\n",
    "\n",
    "    # Fetch the data\n",
    "    (train_x, train_y), (test_x, test_y) = load_data()\n",
    "\n",
    "    # Feature columns describe how to use the input.\n",
    "    my_feature_columns = []\n",
    "    for key in train_x.keys():\n",
    "        my_feature_columns.append(tf.feature_column.numeric_column(key=key))\n",
    "\n",
    "    # Build 2 hidden layer DNN with 10, 10 units respectively.\n",
    "    classifier = tf.estimator.DNNClassifier(\n",
    "        feature_columns=my_feature_columns,\n",
    "        # Two hidden layers of 10 nodes each.\n",
    "        hidden_units=[10, 10],\n",
    "        # The model must choose between 3 classes.\n",
    "        n_classes=3)\n",
    "\n",
    "    # Train the Model.\n",
    "    classifier.train(\n",
    "        input_fn=lambda:train_input_fn(train_x, train_y,\n",
    "                                                 100),\n",
    "        steps=1000)\n",
    "\n",
    "    # Evaluate the model.\n",
    "    eval_result = classifier.evaluate(\n",
    "        input_fn=lambda:eval_input_fn(test_x, test_y,\n",
    "                                                100))\n",
    "\n",
    "    print('\\nTest set accuracy: {accuracy:0.3f}\\n'.format(**eval_result))\n",
    "\n",
    "    # Generate predictions from the model\n",
    "    expected = ['Setosa', 'Versicolor', 'Virginica']\n",
    "    predict_x = {\n",
    "        'SepalLength': [5.1, 5.9, 6.9],\n",
    "        'SepalWidth': [3.3, 3.0, 3.1],\n",
    "        'PetalLength': [1.7, 4.2, 5.4],\n",
    "        'PetalWidth': [0.5, 1.5, 2.1],\n",
    "    }\n",
    "\n",
    "    predictions = classifier.predict(\n",
    "        input_fn=lambda:eval_input_fn(predict_x,\n",
    "                                                labels=None,\n",
    "                                                batch_size=100))\n",
    "\n",
    "    for pred_dict, expec in zip(predictions, expected):\n",
    "        template = ('\\nPrediction is \"{}\" ({:.1f}%), expected \"{}\"')\n",
    "\n",
    "        class_id = pred_dict['class_ids'][0]\n",
    "        probability = pred_dict['probabilities'][class_id]\n",
    "\n",
    "        print(template.format(SPECIES[class_id],\n",
    "                              100 * probability, expec))\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    tf.logging.set_verbosity(tf.logging.INFO)\n",
    "    tf.app.run(main)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "deeplearning",
   "language": "python",
   "name": "deeplearning"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}