{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "1. Add descrioptinsp\n", "2. add drop out\n", "3. add normalization of data" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "from tensorflow.examples.tutorials.mnist import input_data\n", "from tensorflow.contrib.layers import flatten\n", "\n", "\n", "EPOCHS = 10\n", "BATCH_SIZE = 50" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def conv2d(input, weight_tuple, strides_list, depth):\n", " # Filter (weights and bias)\n", " F_W = tf.Variable(tf.truncated_normal(weight_tuple))\n", " F_b = tf.Variable(tf.zeros(depth))\n", " strides = strides_list\n", " padding = 'VALID'\n", " return tf.nn.conv2d(input, F_W, strides, padding) + F_b\n", "\n", "def max_pool(input, ksize, strides):\n", " padding = 'VALID'\n", " return tf.nn.max_pool(input, ksize, strides, padding)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# LeNet architecture:\n", "# INPUT -> CONV -> ACT -> POOL -> CONV -> ACT -> POOL -> FLATTEN -> FC -> ACT -> FC\n", "#\n", "# Don't worry about anything else in the file too much, all you have to do is\n", "# create the LeNet and return the result of the last fully connected layer.\n", "def LeNet(x):\n", " # Reshape from 2D to 4D. This prepares the data for\n", " # convolutional and pooling layers.\n", " x = tf.reshape(x, (-1, 28, 28, 1))\n", " # Pad 0s to 32x32. Centers the digit further.\n", " # Add 2 rows/columns on each side for height and width dimensions.\n", " x = tf.pad(x, [[0, 0], [2, 2], [2, 2], [0, 0]], mode=\"CONSTANT\")\n", " \n", " x = conv2d(x, (5,5,1,6), [1,1,1,1], 6)\n", " x = tf.nn.relu(x)\n", " x = max_pool(x, [1,2,2,1], [1,2,2,1])\n", " \n", " x = conv2d(x, (2,2,6,16), [1,2,2,1], 16)\n", " x = tf.nn.relu(x)\n", " x = max_pool(x, [1,2,2,1], [1,2,2,1])\n", " \n", " # Flatten\n", " fc1 = flatten(x)\n", " # (5 * 5 * 16, 120)\n", " fc1_shape = (fc1.get_shape().as_list()[-1], 120)\n", " \n", " fc1_W = tf.Variable(tf.truncated_normal(shape=(fc1_shape)))\n", " fc1_b = tf.Variable(tf.zeros(120))\n", " fc1 = tf.matmul(fc1, fc1_W) + fc1_b\n", " fc1 = tf.nn.relu(fc1)\n", " \n", " fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 10)))\n", " fc2_b = tf.Variable(tf.zeros(10))\n", " return tf.matmul(fc1, fc2_W) + fc2_b" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Extracting MNIST_data/train-images-idx3-ubyte.gz\n", "Extracting MNIST_data/train-labels-idx1-ubyte.gz\n", "Extracting MNIST_data/t10k-images-idx3-ubyte.gz\n", "Extracting MNIST_data/t10k-labels-idx1-ubyte.gz\n", "EPOCH 1 ...\n", "Validation loss = 16.058\n", "Validation accuracy = 0.739\n", "\n", "EPOCH 2 ...\n", "Validation loss = 7.659\n", "Validation accuracy = 0.828\n", "\n", "EPOCH 3 ...\n", "Validation loss = 4.818\n", "Validation accuracy = 0.871\n", "\n", "EPOCH 4 ...\n", "Validation loss = 3.415\n", "Validation accuracy = 0.885\n", "\n", "EPOCH 5 ...\n", "Validation loss = 2.489\n", "Validation accuracy = 0.895\n", "\n", "EPOCH 6 ...\n", "Validation loss = 1.897\n", "Validation accuracy = 0.908\n", "\n", "EPOCH 7 ...\n", "Validation loss = 1.512\n", "Validation accuracy = 0.911\n", "\n", "EPOCH 8 ...\n", "Validation loss = 1.170\n", "Validation accuracy = 0.921\n", "\n", "EPOCH 9 ...\n", "Validation loss = 0.991\n", "Validation accuracy = 0.921\n", "\n", "EPOCH 10 ...\n", "Validation loss = 0.827\n", "Validation accuracy = 0.926\n", "\n", "Test loss = 0.832\n", "Test accuracy = 0.920\n" ] } ], "source": [ "# MNIST consists of 28x28x1, grayscale images\n", "x = tf.placeholder(tf.float32, (None, 784))\n", "# Classify over 10 digits 0-9\n", "y = tf.placeholder(tf.float32, (None, 10))\n", "fc2 = LeNet(x)\n", "\n", "loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(fc2, y))\n", "opt = tf.train.AdamOptimizer()\n", "train_op = opt.minimize(loss_op)\n", "correct_prediction = tf.equal(tf.argmax(fc2, 1), tf.argmax(y, 1))\n", "accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))\n", "\n", "\n", "def eval_data(dataset):\n", " \"\"\"\n", " Given a dataset as input returns the loss and accuracy.\n", " \"\"\"\n", " # If dataset.num_examples is not divisible by BATCH_SIZE\n", " # the remainder will be discarded.\n", " # Ex: If BATCH_SIZE is 64 and training set has 55000 examples\n", " # steps_per_epoch = 55000 // 64 = 859\n", " # num_examples = 859 * 64 = 54976\n", " #\n", " # So in that case we go over 54976 examples instead of 55000.\n", " steps_per_epoch = dataset.num_examples // BATCH_SIZE\n", " num_examples = steps_per_epoch * BATCH_SIZE\n", " total_acc, total_loss = 0, 0\n", " sess = tf.get_default_session()\n", " for step in range(steps_per_epoch):\n", " batch_x, batch_y = dataset.next_batch(BATCH_SIZE)\n", " loss, acc = sess.run([loss_op, accuracy_op], feed_dict={x: batch_x, y: batch_y})\n", " total_acc += (acc * batch_x.shape[0])\n", " total_loss += (loss * batch_x.shape[0])\n", " return total_loss/num_examples, total_acc/num_examples\n", "\n", "\n", "if __name__ == '__main__':\n", " # Load data\n", " training_file = \"./data/train.p\"\n", " testing_file = \"./data/test.p\"\n", "\n", " with open(training_file, mode='rb') as f:\n", " train = pickle.load(f)\n", " with open(testing_file, mode='rb') as f:\n", " test = pickle.load(f)\n", "\n", " X, y = train['features'], train['labels']\n", " X_test, y_test = test['features'], test['labels']\n", " from sklearn.model_selection import train_test_split\n", " X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.20, random_state=42)\n", "\n", " with tf.Session() as sess:\n", " sess.run(tf.initialize_all_variables())\n", " steps_per_epoch = mnist.train.num_examples // BATCH_SIZE\n", " num_examples = steps_per_epoch * BATCH_SIZE\n", "\n", " # Train model\n", " for i in range(EPOCHS):\n", " for step in range(steps_per_epoch):\n", " batch_x, batch_y = mnist.train.next_batch(BATCH_SIZE)\n", " loss = sess.run(train_op, feed_dict={x: batch_x, y: batch_y})\n", "\n", " val_loss, val_acc = eval_data(mnist.validation)\n", " print(\"EPOCH {} ...\".format(i+1))\n", " print(\"Validation loss = {:.3f}\".format(val_loss))\n", " print(\"Validation accuracy = {:.3f}\".format(val_acc))\n", " print()\n", "\n", " # Evaluate on the test data\n", " test_loss, test_acc = eval_data(mnist.test)\n", " print(\"Test loss = {:.3f}\".format(test_loss))\n", " print(\"Test accuracy = {:.3f}\".format(test_acc))\n", "\n", "\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:CarND-Traffic-Sign-Classifier-Project]", "language": "python", "name": "conda-env-CarND-Traffic-Sign-Classifier-Project-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }