{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# CS 20 : TensorFlow for Deep Learning Research\n", "## Lecture 05 : Variable sharing and managing experiments\n", "### Applied example with tf.placeholder\n", "Ref : [Toward Best Practices of TensorFlow Code Patterns](https://wookayin.github.io/TensorFlowKR-2017-talk-bestpractice/ko/#1) by Jongwook Choi, Beomjun Shin \n", " \n", "- Using **low-level api**\n", "- Creating the **input pipeline** with `tf.placeholder`\n", "- Creating the model as **Class**\n", "- Training the model with **learning rate scheduling** by exponential decay learning rate\n", "- Saving the model and Restoring the model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.12.0\n" ] } ], "source": [ "from __future__ import absolute_import, division, print_function\n", "import os, sys\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import tensorflow as tf\n", "%matplotlib inline\n", "\n", "print(tf.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load and Pre-process data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "(x_train, y_train), (x_tst, y_tst) = tf.keras.datasets.mnist.load_data()\n", "x_train = x_train / 255\n", "x_train = x_train.reshape(-1, 784)\n", "x_tst = x_tst / 255\n", "x_tst = x_tst.reshape(-1, 784)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(55000, 784) (55000,)\n", "(5000, 784) (5000,)\n" ] } ], "source": [ "tr_indices = np.random.choice(range(x_train.shape[0]), size = 55000, replace = False)\n", "\n", "x_tr = x_train[tr_indices]\n", "y_tr = y_train[tr_indices]\n", "\n", "x_val = np.delete(arr = x_train, obj = tr_indices, axis = 0)\n", "y_val = np.delete(arr = y_train, obj = tr_indices, axis = 0)\n", "\n", "print(x_tr.shape, y_tr.shape)\n", "print(x_val.shape, y_val.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define DNN Classifier with two hidden layer" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "class DNNClassifier:\n", " def __init__(self, X, y, n_of_classes, hidden_dims = [100, 50], name = 'DNN'):\n", " \n", " with tf.variable_scope(name):\n", " with tf.variable_scope('input_layer'):\n", " self.X = X\n", " self.y = y\n", " \n", " h = self.X\n", " \n", " for layer, h_dim in enumerate(hidden_dims):\n", " with tf.variable_scope('hidden_layer_{}'.format(layer + 1)):\n", " h = tf.nn.tanh(self.__fully_connected(X = h, output_dim = h_dim))\n", " \n", " with tf.variable_scope('output_layer'):\n", " score = self.__fully_connected(X = h, output_dim = n_of_classes)\n", " \n", " with tf.variable_scope('ce_loss'):\n", " self.loss = self.__loss(score = score, y = self.y)\n", " \n", " with tf.variable_scope('prediction'):\n", " self.__prediction = tf.argmax(input = score, axis = 1)\n", " \n", " def __fully_connected(self, X, output_dim):\n", " w = tf.get_variable(name = 'weights',\n", " shape = [X.shape[1], output_dim],\n", " initializer = tf.random_normal_initializer())\n", " b = tf.get_variable(name = 'biases',\n", " shape = [output_dim],\n", " initializer = tf.constant_initializer(0.0))\n", " return tf.matmul(X, w) + b\n", " \n", " def __loss(self, score, y):\n", " loss = tf.losses.sparse_softmax_cross_entropy(labels = y, logits = score)\n", " return loss\n", " \n", " def predict(self, sess, X):\n", " feed_predict = {self.X : X}\n", " return sess.run(fetches = self.__prediction, feed_dict = feed_predict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a model of DNN Classifier" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "## create placeholders for x_data and y_data\n", "x_data = tf.placeholder(dtype = tf.float32, shape = [None, 784])\n", "y_data = tf.placeholder(dtype = tf.int32, shape = [None])\n", "\n", "dnn = DNNClassifier(X = x_data, y = y_data, n_of_classes = 10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create training op and train model\n", "Applying exponential decay learning rate to train DNN model \n", "```python\n", "decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)\n", "\n", "```\n", "Ref : https://www.tensorflow.org/api_docs/python/tf/train/exponential_decay" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "859\n" ] } ], "source": [ "# hyper-parameter\n", "epochs = 15\n", "batch_size = 64\n", "learning_rate = .005\n", "total_step = int(x_tr.shape[0] / batch_size)\n", "print(total_step)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "## Applying exponential decay learning rate to train dnn model\n", "global_step = tf.Variable(initial_value = 0 , trainable = False)\n", "exp_decayed_lr = tf.train.exponential_decay(learning_rate = learning_rate,\n", " global_step = global_step,\n", " decay_steps = total_step * 5,\n", " decay_rate = .9,\n", " staircase = True)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# create training op\n", "opt = tf.train.AdamOptimizer(learning_rate = exp_decayed_lr)\n", "\n", "# equal to 'var_list = None'\n", "training_op = opt.minimize(loss = dnn.loss,\n", " var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES),\n", " global_step = global_step) \n", "\n", "# create summary op for tensorboard\n", "loss_summ = tf.summary.scalar(name = 'loss', tensor = dnn.loss)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "train_writer = tf.summary.FileWriter(logdir = '../graphs/lecture05/applied_example_wp/train',\n", " graph = tf.get_default_graph())\n", "val_writer = tf.summary.FileWriter(logdir = '../graphs/lecture05/applied_example_wp/val',\n", " graph = tf.get_default_graph())\n", "saver = tf.train.Saver()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch : 5, tr_loss : 0.25, val_loss : 0.30\n", "epoch : 10, tr_loss : 0.16, val_loss : 0.24\n", "epoch : 15, tr_loss : 0.12, val_loss : 0.20\n" ] } ], "source": [ "sess_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))\n", "sess = tf.Session(config = sess_config)\n", "sess.run(tf.global_variables_initializer())\n", "\n", "tr_loss_hist = []\n", "val_loss_hist = []\n", "\n", "for epoch in range(epochs):\n", " avg_tr_loss = 0\n", " avg_val_loss = 0\n", " \n", " for step in range(total_step):\n", " \n", " batch_indices = np.random.choice(range(x_tr.shape[0]), size = batch_size, replace = False)\n", " val_indices = np.random.choice(range(x_val.shape[0]), size = batch_size, replace = False)\n", " \n", " batch_xs = x_tr[batch_indices] \n", " batch_ys = y_tr[batch_indices]\n", " val_xs = x_val[val_indices]\n", " val_ys = y_val[val_indices]\n", " \n", " _, tr_loss, tr_loss_summ = sess.run(fetches = [training_op, dnn.loss, loss_summ],\n", " feed_dict = {x_data : batch_xs, y_data : batch_ys})\n", "\n", " val_loss, val_loss_summ = sess.run(fetches = [dnn.loss, loss_summ],\n", " feed_dict = {x_data : val_xs, y_data : val_ys})\n", " avg_tr_loss += tr_loss / total_step\n", " avg_val_loss += val_loss / total_step\n", " \n", " tr_loss_hist.append(avg_tr_loss)\n", " val_loss_hist.append(avg_val_loss)\n", " \n", " train_writer.add_summary(summary = tr_loss_summ, global_step = (epoch + 1))\n", " val_writer.add_summary(summary = val_loss_summ, global_step = (epoch + 1))\n", " \n", " if (epoch + 1) % 5 == 0:\n", " print('epoch : {:3}, tr_loss : {:.2f}, val_loss : {:.2f}'.format(epoch + 1, avg_tr_loss, avg_val_loss))\n", " saver.save(sess = sess, save_path = '../graphs/lecture05/applied_example_wp/dnn', global_step = (epoch + 1))\n", "\n", "train_writer.close()\n", "val_writer.close()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<matplotlib.legend.Legend at 0x7fe49c7c84a8>" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(tr_loss_hist, label = 'train')\n", "plt.plot(val_loss_hist, label = 'validation')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test acc: 94.62%\n" ] } ], "source": [ "yhat = dnn.predict(sess = sess, X = x_tst)\n", "print('test acc: {:.2%}'.format(np.mean(yhat == y_tst)))\n", "sess.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Restore model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Example 1\n", "Restore my model at epoch 15" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x_data = tf.placeholder(dtype = tf.float32, shape = [None, 784])\n", "y_data = tf.placeholder(dtype = tf.int32, shape = [None])\n", "\n", "dnn_restore = DNNClassifier(X = x_data, y = y_data, n_of_classes = 10)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "model_checkpoint_path: \"../graphs/lecture05/applied_example_wp/dnn-15\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-5\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-10\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-15\"\n", "\n" ] } ], "source": [ "ckpt_list = tf.train.get_checkpoint_state(checkpoint_dir = '../graphs/lecture05/applied_example_wp/')\n", "print(ckpt_list) " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Restoring parameters from ../graphs/lecture05/applied_example_wp/dnn-15\n" ] } ], "source": [ "# restore my model at epoch 15\n", "sess = tf.Session()\n", "saver = tf.train.Saver()\n", "saver.restore(sess = sess, save_path = '../graphs/lecture05/applied_example_wp/dnn-15')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test acc: 94.62%\n" ] } ], "source": [ "yhat = dnn_restore.predict(sess = sess, X = x_tst)\n", "print('test acc: {:.2%}'.format(np.mean(yhat == y_tst)))\n", "sess.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Example 2\n", "Restore my model at epoch 10" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "tf.reset_default_graph()\n", "\n", "x_data = tf.placeholder(dtype = tf.float32, shape = [None, 784])\n", "y_data = tf.placeholder(dtype = tf.int32, shape = [None])\n", "\n", "dnn_restore = DNNClassifier(X = x_data, y = y_data, n_of_classes = 10)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "model_checkpoint_path: \"../graphs/lecture05/applied_example_wp/dnn-15\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-5\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-10\"\n", "all_model_checkpoint_paths: \"../graphs/lecture05/applied_example_wp/dnn-15\"\n", "\n" ] } ], "source": [ "ckpt_list = tf.train.get_checkpoint_state(checkpoint_dir = '../graphs/lecture05/applied_example_wp/')\n", "print(ckpt_list) " ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "INFO:tensorflow:Restoring parameters from ../graphs/lecture05/applied_example_wp/dnn-10\n" ] } ], "source": [ "# restore my model at epoch 10\n", "sess = tf.Session()\n", "saver = tf.train.Saver()\n", "saver.restore(sess = sess, save_path = '../graphs/lecture05/applied_example_wp/dnn-10')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "test acc: 93.52%\n" ] } ], "source": [ "yhat = dnn_restore.predict(sess = sess, X = x_tst)\n", "print('test acc: {:.2%}'.format(np.mean(yhat == y_tst)))\n", "sess.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }