{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# A Network Tour of Data Science\n", "###       Xavier Bresson, Winter 2016/17\n", "## Assignment 2 : Convolutional Neural Networks" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Import libraries\n", "import numpy as np\n", "import tensorflow as tf\n", "import time\n", "import collections\n", "import os\n", "\n", "import matplotlib.pyplot as plt\n", "# This is a bit of magic to make matplotlib figures appear inline in the notebook\n", "# rather than in a new window.\n", "%matplotlib inline\n", "plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots\n", "plt.rcParams['image.interpolation'] = 'nearest'\n", "plt.rcParams['image.cmap'] = 'gray'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Load small part of CIFAR dataset\n", "[X_train, y_train, X_test, y_test] = np.load(os.path.join('datasets', 'cifar.npy'))\n", "\n", "# Convert to float\n", "train_data_orig = X_train.astype('float32')\n", "y_train = y_train.astype('float32')\n", "test_data_orig = X_test.astype('float32')\n", "y_test = y_test.astype('float32')\n", "\n", "# See shapes of matrices\n", "print('Training data shape: ', train_data_orig.shape)\n", "print('Training label shape: ', y_train.shape)\n", "print('Test data shape: ', test_data_orig.shape)\n", "print('Test label shape: ', y_test.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Visualize a few examples of training images from each class\n", "classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n", "num_classes = len(classes)\n", "samples_per_class = 7\n", "for y, cls in enumerate(classes):\n", " idxs = np.flatnonzero(y_train == y)\n", " idxs = np.random.choice(idxs, samples_per_class, replace=False)\n", " for i, idx in enumerate(idxs):\n", " plt_idx = i * num_classes + y + 1\n", " plt.subplot(samples_per_class, num_classes, plt_idx)\n", " xx = train_data_orig[idx,:,:,:]\n", " xx -= np.min(xx)\n", " xx /= np.max(xx)\n", " plt.imshow(xx)\n", " plt.axis('off')\n", " if i == 0:\n", " plt.title(cls)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Data pre-processing\n", "n = train_data_orig.shape[0]\n", "train_data = np.zeros([n,32**2])\n", "for i in range(n):\n", " xx = train_data_orig[i,:,:,:]\n", " xx = np.linalg.norm(xx,axis=2)\n", " xx -= np.mean(xx)\n", " xx /= np.linalg.norm(xx)\n", " train_data[i] = np.reshape(xx,[-1])\n", "\n", "n = test_data_orig.shape[0]\n", "test_data = np.zeros([n,32**2])\n", "for i in range(n):\n", " xx = test_data_orig[i,:,:,:]\n", " xx = np.linalg.norm(xx,axis=2)\n", " xx -= np.mean(xx)\n", " xx /= np.linalg.norm(xx)\n", " test_data[i] = np.reshape(xx,[-1])\n", "\n", "print(train_data.shape)\n", "print(test_data.shape)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Convert label values to one_hot vector\n", "from scipy.sparse import coo_matrix\n", "def convert_to_one_hot(a,max_val=None):\n", " N = a.size\n", " data = np.ones(N,dtype=int)\n", " sparse_out = coo_matrix((data,(np.arange(N),a.ravel())), shape=(N,max_val))\n", " return np.array(sparse_out.todense())\n", "\n", "train_labels = convert_to_one_hot(y_train,10)\n", "test_labels = convert_to_one_hot(y_test,10)\n", "\n", "print(train_labels.shape)\n", "print(test_labels.shape)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Model 1\n", "**Question 1** Define with TensorFlow a linear classifier model:\n", "\n", "$$\n", "y=\\textrm{softmax}(xW+b)\n", "$$\n", "\n", "Compute the train accuracy and the test accuracy (you should get a test accuracy around 25% at iteration 10,000)

\n", "Hints:
\n", "(1) You may use functions *tf.matmul(), tf.nn.softmax()*
\n", "(2) You may use Xavier's initialization discussed during lectures for W, and b=0
\n", "(3) You may use optimization schemes *tf.train.GradientDescentOptimizer(), tf.train.AdamOptimizer()*
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Define computational graph (CG)\n", "batch_size = 100 # batch size\n", "d = train_data.shape[1] # data dimensionality\n", "nc = 10 # number of classes\n", "\n", "# CG inputs\n", "xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n", "y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n", "\n", "# Fully Connected layer\n", "y = YOUR CODE HERE\n", "\n", "# Softmax\n", "y = YOUR CODE HERE\n", "\n", "# Loss\n", "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n", "total_loss = cross_entropy\n", "\n", "# Optimization scheme\n", "train_step = tf.train.YOUR CODE HERE\n", "\n", "# Accuracy\n", "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n", "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Run Computational Graph\n", "n = train_data.shape[0]\n", "indices = collections.deque()\n", "init = tf.initialize_all_variables()\n", "sess = tf.Session()\n", "sess.run(init)\n", "for i in range(10001):\n", " \n", " # Batch extraction\n", " if len(indices) < batch_size:\n", " indices.extend(np.random.permutation(n)) \n", " idx = [indices.popleft() for i in range(batch_size)]\n", " batch_x, batch_y = train_data[idx,:], train_labels[idx]\n", " #print(batch_x.shape,batch_y.shape)\n", " \n", " # Run CG for variable training\n", " _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n", " \n", " # Run CG for test set\n", " if not i%1000:\n", " print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n", " acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n", " print('test accuracy=',acc_test)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Model 2\n", "**Question 2a.** Define with TensorFlow a 2-layer neural network classifier:\n", "\n", "$$\n", "y=\\textrm{softmax}(ReLU(xW_1+b_1)W_2+b_2)\n", "$$\n", "\n", "Compute the train accuracy and the test accuracy (you should be able to overfit the train set)
\n", "Hint: You may use functions *tf.nn.relu()*

\n", "\n", "**Question 2b.** Add a L2 regularization term to prevent overfitting. Compute the train accuracy and the test accuracy (you should get a test accuracy around 35%)
\n", "Hints:
\n", "(1) You may use functions *tf.nn.l2_loss()*
\n", "(2) Do not forget the constant parameter *reg_par*: total_loss = cross_entropy + reg_par* reg_loss
" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Define computational graph (CG)\n", "batch_size = 100 # batch size\n", "d = train_data.shape[1] # data dimensionality\n", "nc = 10 # number of classes\n", "\n", "# CG inputs\n", "xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n", "y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n", "\n", "# 1st Fully Connected layer\n", "y = YOUR CODE HERE\n", "\n", "# ReLU activation\n", "y = YOUR CODE HERE\n", "\n", "# 2nd Fully Connected layer\n", "y = YOUR CODE HERE\n", "\n", "# Softmax\n", "y = YOUR CODE HERE\n", "\n", "# Loss\n", "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n", "\n", "# L2 Regularization\n", "reg_loss = YOUR CODE HERE\n", "reg_par = YOUR CODE HERE\n", "total_loss = YOUR CODE HERE\n", "\n", "# Optimization scheme\n", "train_step = tf.train.YOUR CODE HERE\n", "\n", "# Accuracy\n", "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n", "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "# Run Computational Graph\n", "n = train_data.shape[0]\n", "indices = collections.deque()\n", "init = tf.initialize_all_variables()\n", "sess = tf.Session()\n", "sess.run(init)\n", "for i in range(10001):\n", " \n", " # Batch extraction\n", " if len(indices) < batch_size:\n", " indices.extend(np.random.permutation(n)) \n", " idx = [indices.popleft() for i in range(batch_size)]\n", " batch_x, batch_y = train_data[idx,:], train_labels[idx]\n", " #print(batch_x.shape,batch_y.shape)\n", " \n", " # Run CG for variable training\n", " _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n", " \n", " # Run CG for test set\n", " if not i%1000:\n", " print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n", " acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n", " print('test accuracy=',acc_test)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Model 3\n", "**Question 3.** Define a convolutional neural network classifier:\n", "\n", "$$\n", "y=\\textrm{softmax}(ReLU(x\\ast W_1+b_1)W_2+b_2)\n", "$$\n", "\n", "Hint: You may use function *tf.nn.conv2d(x_2d, Wcl, strides=[1, 1, 1, 1], padding='SAME')*
\n", "with *Wcl = tf.Variable(tf.truncated_normal([K,K,1,F], stddev=YOUR CODE HERE ))*\n", "for the convolution operator $\\ast$
\n", "and *x_2d = tf.reshape(xin, [-1,32,32,1])*
\n", "\n", "Compute the train accuracy and the test accuracy (you should be able to overfit the train set)

" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Define computational graph (CG)\n", "batch_size = 100 # batch size\n", "d = train_data.shape[1] # data dimensionality\n", "nc = 10 # number of classes\n", "\n", "# CG inputs\n", "xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n", "y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n", "\n", "\n", "# Convolutional layer\n", "K = 5 # size of the patch\n", "F = 10 # number of filters\n", "x = YOUR CODE HERE\n", "\n", "# ReLU activation\n", "x = YOUR CODE HERE\n", "\n", "# Fully Connected layer\n", "nfc = 32*32*F\n", "y = YOUR CODE HERE\n", "\n", "# Softmax\n", "y = YOUR CODE HERE\n", "\n", "# Loss\n", "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n", "total_loss = cross_entropy\n", "\n", "# Optimization scheme\n", "train_step = tf.train.YOUR CODE HERE\n", "\n", "# Accuracy\n", "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n", "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "# Run Computational Graph\n", "n = train_data.shape[0]\n", "indices = collections.deque()\n", "init = tf.initialize_all_variables()\n", "sess = tf.Session()\n", "sess.run(init)\n", "for i in range(10001):\n", " \n", " # Batch extraction\n", " if len(indices) < batch_size:\n", " indices.extend(np.random.permutation(n)) \n", " idx = [indices.popleft() for i in range(batch_size)]\n", " batch_x, batch_y = train_data[idx,:], train_labels[idx]\n", " #print(batch_x.shape,batch_y.shape)\n", " \n", " # Run CG for variable training\n", " _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n", " \n", " # Run CG for test set\n", " if not i%1000:\n", " print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n", " acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n", " print('test accuracy=',acc_test)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "# Model 4\n", "**Question 4.** Regularize the previous convolutional neural network classifier:\n", "\n", "$$\n", "y=\\textrm{softmax}(ReLU(x\\ast W_1+b_1)W_2+b_2)\n", "$$\n", "\n", "with the dropout technique discussed during lectures.\n", "\n", "Hint: You may use function *tf.nn.dropout()* with probability around 0.25.
\n", "\n", "Compute the train accuracy and the test accuracy (you should get a test accuracy of 45%)
\n", "Note: It is not mandatory to achieve 40% (as quality may change depending on initialization), but it is essential to implement correctly the classifier.

" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Define computational graph (CG)\n", "batch_size = 100 # batch size\n", "d = train_data.shape[1] # data dimensionality\n", "nc = 10 # number of classes\n", "\n", "# CG inputs\n", "xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n", "y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n", "d = tf.placeholder(tf.float32);\n", "\n", "# Convolutional layer\n", "K = 5 # size of the patch\n", "F = 10 # number of filters\n", "x = YOUR CODE HERE\n", "\n", "# ReLU activation\n", "x = YOUR CODE HERE\n", "\n", "# Dropout\n", "x = YOUR CODE HERE\n", "\n", "# Fully Connected layer\n", "y = YOUR CODE HERE\n", "\n", "# Softmax\n", "y = YOUR CODE HERE\n", "\n", "# Loss\n", "cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n", "total_loss = cross_entropy\n", "\n", "# Optimization scheme\n", "train_step = tf.train.YOUR CODE HERE\n", "\n", "# Accuracy\n", "correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n", "accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Run Computational Graph\n", "n = train_data.shape[0]\n", "indices = collections.deque()\n", "init = tf.initialize_all_variables()\n", "sess = tf.Session()\n", "sess.run(init)\n", "for i in range(10001):\n", " \n", " # Batch extraction\n", " if len(indices) < batch_size:\n", " indices.extend(np.random.permutation(n)) \n", " idx = [indices.popleft() for i in range(batch_size)]\n", " batch_x, batch_y = train_data[idx,:], train_labels[idx]\n", " #print(batch_x.shape,batch_y.shape)\n", " \n", " # Run CG for variable training\n", " _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y, d: 0.25})\n", " \n", " # Run CG for test set\n", " if not i%1000:\n", " print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n", " acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels, d: 1.0})\n", " print('test accuracy=',acc_test)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 0 }