{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# ML with Tensorflow\n", "\n", "Tensorflow the name's enough!\n", "Let's dig in!\n", "\n", "We'll use a pretrained model to classify Iris flower dataset." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import tensorflow as tf\n", "\n", "TRAIN_URL = \"http://download.tensorflow.org/data/iris_training.csv\"\n", "TEST_URL = \"http://download.tensorflow.org/data/iris_test.csv\"\n", "\n", "CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',\n", " 'PetalLength', 'PetalWidth', 'Species']\n", "SPECIES = ['Sentosa', 'Versicolor', 'Virginica']\n", "\n", "def maybe_download():\n", " train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)\n", " test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)\n", "\n", " return train_path, test_path\n", "\n", "def load_data(y_name='Species'):\n", " \"\"\"Returns the iris dataset as (train_x, train_y), (test_x, test_y).\"\"\"\n", " train_path, test_path = maybe_download()\n", "\n", " train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)\n", " train_x, train_y = train, train.pop(y_name)\n", "\n", " test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)\n", " test_x, test_y = test, test.pop(y_name)\n", "\n", " return (train_x, train_y), (test_x, test_y)\n", "\n", "\n", "def train_input_fn(features, labels, batch_size):\n", " \"\"\"An input function for training\"\"\"\n", " # Convert the inputs to a Dataset.\n", " dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))\n", "\n", " # Shuffle, repeat, and batch the examples.\n", " dataset = dataset.shuffle(1000).repeat().batch(batch_size)\n", "\n", " # Return the read end of the pipeline.\n", " return dataset.make_one_shot_iterator().get_next()\n", "\n", "\n", "def eval_input_fn(features, labels, batch_size):\n", " \"\"\"An input function for evaluation or prediction\"\"\"\n", " features=dict(features)\n", " if labels is None:\n", " # No labels, use only features.\n", " inputs = features\n", " else:\n", " inputs = (features, labels)\n", "\n", " # Convert the inputs to a Dataset.\n", " dataset = tf.data.Dataset.from_tensor_slices(inputs)\n", "\n", " # Batch the examples\n", " assert batch_size is not None, \"batch_size must not be None\"\n", " dataset = dataset.batch(batch_size)\n", "\n", " # Return the read end of the pipeline.\n", " return dataset.make_one_shot_iterator().get_next()\n", "\n", "\n", "# The remainder of this file contains a simple example of a csv parser,\n", "# implemented using a the `Dataset` class.\n", "\n", "# `tf.parse_csv` sets the types of the outputs to match the examples given in\n", "# the `record_defaults` argument.\n", "CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]\n", "\n", "# def _parse_line(line):\n", "# # Decode the line into its fields\n", "# fields = tf.decode_csv(line, record_defaults=CSV_TYPES)\n", "\n", "# # Pack the result into a dictionary\n", "# features = dict(zip(CSV_COLUMN_NAMES, fields))\n", "\n", "# # Separate the label from the features\n", "# label = features.pop('Species')\n", "\n", "# return features, label\n", "\n", "\n", "# def csv_input_fn(csv_path, batch_size):\n", "# # Create a dataset containing the text lines.\n", "# dataset = tf.data.TextLineDataset(csv_path).skip(1)\n", "\n", "# # Parse each line.\n", "# dataset = dataset.map(_parse_line)\n", "\n", "# # Shuffle, repeat, and batch the examples.\n", "# dataset = dataset.shuffle(1000).repeat().batch(batch_size)\n", "\n", "# # Return the read end of the pipeline.\n", "# return dataset.make_one_shot_iterator().get_next()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Using default config.\n", "WARNING:tensorflow:Using temporary folder as model directory: /tmp/tmpevAffN\n", "INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': , '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/tmp/tmpevAffN', '_save_summary_steps': 100}\n", "INFO:tensorflow:Create CheckpointSaverHook.\n", "INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpevAffN/model.ckpt.\n", "INFO:tensorflow:loss = 139.59, step = 1\n", "INFO:tensorflow:global_step/sec: 236.62\n", "INFO:tensorflow:loss = 22.4486, step = 101 (0.426 sec)\n", "INFO:tensorflow:global_step/sec: 220.134\n", "INFO:tensorflow:loss = 16.4447, step = 201 (0.453 sec)\n", "INFO:tensorflow:global_step/sec: 405.709\n", "INFO:tensorflow:loss = 10.2933, step = 301 (0.245 sec)\n", "INFO:tensorflow:global_step/sec: 408.988\n", "INFO:tensorflow:loss = 8.26349, step = 401 (0.245 sec)\n", "INFO:tensorflow:global_step/sec: 451.818\n", "INFO:tensorflow:loss = 5.78063, step = 501 (0.221 sec)\n", "INFO:tensorflow:global_step/sec: 504.152\n", "INFO:tensorflow:loss = 6.63015, step = 601 (0.198 sec)\n", "INFO:tensorflow:global_step/sec: 529.939\n", "INFO:tensorflow:loss = 4.01163, step = 701 (0.189 sec)\n", "INFO:tensorflow:global_step/sec: 432.051\n", "INFO:tensorflow:loss = 3.45307, step = 801 (0.232 sec)\n", "INFO:tensorflow:global_step/sec: 419.46\n", "INFO:tensorflow:loss = 3.85858, step = 901 (0.238 sec)\n", "INFO:tensorflow:Saving checkpoints for 1000 into /tmp/tmpevAffN/model.ckpt.\n", "INFO:tensorflow:Loss for final step: 8.55838.\n", "INFO:tensorflow:Starting evaluation at 2018-01-30-02:46:59\n", "INFO:tensorflow:Restoring parameters from /tmp/tmpevAffN/model.ckpt-1000\n", "INFO:tensorflow:Finished evaluation at 2018-01-30-02:46:59\n", "INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.966667, average_loss = 0.0587675, global_step = 1000, loss = 1.76303\n", "\n", "Test set accuracy: 0.967\n", "\n", "WARNING:tensorflow:Input graph does not contain a QueueRunner. That means predict yields forever. This is probably a mistake.\n", "INFO:tensorflow:Restoring parameters from /tmp/tmpevAffN/model.ckpt-1000\n", "\n", "Prediction is \"Sentosa\" (99.8%), expected \"Setosa\"\n", "\n", "Prediction is \"Versicolor\" (99.6%), expected \"Versicolor\"\n", "\n", "Prediction is \"Virginica\" (96.0%), expected \"Virginica\"\n" ] }, { "ename": "SystemExit", "evalue": "", "output_type": "error", "traceback": [ "An exception has occurred, use %tb to see the full traceback.\n", "\u001b[0;31mSystemExit\u001b[0m\n" ] } ], "source": [ "from __future__ import absolute_import\n", "from __future__ import division\n", "from __future__ import print_function\n", "\n", "import argparse\n", "import tensorflow as tf\n", "\n", "\n", "\n", "# parser = argparse.ArgumentParser()\n", "# parser.add_argument('--batch_size', default=100, type=int, help='batch size')\n", "# parser.add_argument('--train_steps', default=1000, type=int,\n", "# help='number of training steps')\n", "\n", "def main(argv):\n", "# args = parser.parse_args(argv[1:])\n", "\n", " # Fetch the data\n", " (train_x, train_y), (test_x, test_y) = load_data()\n", "\n", " # Feature columns describe how to use the input.\n", " my_feature_columns = []\n", " for key in train_x.keys():\n", " my_feature_columns.append(tf.feature_column.numeric_column(key=key))\n", "\n", " # Build 2 hidden layer DNN with 10, 10 units respectively.\n", " classifier = tf.estimator.DNNClassifier(\n", " feature_columns=my_feature_columns,\n", " # Two hidden layers of 10 nodes each.\n", " hidden_units=[10, 10],\n", " # The model must choose between 3 classes.\n", " n_classes=3)\n", "\n", " # Train the Model.\n", " classifier.train(\n", " input_fn=lambda:train_input_fn(train_x, train_y,\n", " 100),\n", " steps=1000)\n", "\n", " # Evaluate the model.\n", " eval_result = classifier.evaluate(\n", " input_fn=lambda:eval_input_fn(test_x, test_y,\n", " 100))\n", "\n", " print('\\nTest set accuracy: {accuracy:0.3f}\\n'.format(**eval_result))\n", "\n", " # Generate predictions from the model\n", " expected = ['Setosa', 'Versicolor', 'Virginica']\n", " predict_x = {\n", " 'SepalLength': [5.1, 5.9, 6.9],\n", " 'SepalWidth': [3.3, 3.0, 3.1],\n", " 'PetalLength': [1.7, 4.2, 5.4],\n", " 'PetalWidth': [0.5, 1.5, 2.1],\n", " }\n", "\n", " predictions = classifier.predict(\n", " input_fn=lambda:eval_input_fn(predict_x,\n", " labels=None,\n", " batch_size=100))\n", "\n", " for pred_dict, expec in zip(predictions, expected):\n", " template = ('\\nPrediction is \"{}\" ({:.1f}%), expected \"{}\"')\n", "\n", " class_id = pred_dict['class_ids'][0]\n", " probability = pred_dict['probabilities'][class_id]\n", "\n", " print(template.format(SPECIES[class_id],\n", " 100 * probability, expec))\n", "\n", "\n", "if __name__ == '__main__':\n", " tf.logging.set_verbosity(tf.logging.INFO)\n", " tf.app.run(main)\n" ] } ], "metadata": { "kernelspec": { "display_name": "deeplearning", "language": "python", "name": "deeplearning" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 2 }