{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# A Network Tour of Data Science\n",
"### Xavier Bresson, Winter 2016/17\n",
"## Assignment 2 : Convolutional Neural Networks"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# Import libraries\n",
"import numpy as np\n",
"import tensorflow as tf\n",
"import time\n",
"import collections\n",
"import os\n",
"\n",
"import matplotlib.pyplot as plt\n",
"# This is a bit of magic to make matplotlib figures appear inline in the notebook\n",
"# rather than in a new window.\n",
"%matplotlib inline\n",
"plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots\n",
"plt.rcParams['image.interpolation'] = 'nearest'\n",
"plt.rcParams['image.cmap'] = 'gray'"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Load small part of CIFAR dataset\n",
"[X_train, y_train, X_test, y_test] = np.load(os.path.join('datasets', 'cifar.npy'))\n",
"\n",
"# Convert to float\n",
"train_data_orig = X_train.astype('float32')\n",
"y_train = y_train.astype('float32')\n",
"test_data_orig = X_test.astype('float32')\n",
"y_test = y_test.astype('float32')\n",
"\n",
"# See shapes of matrices\n",
"print('Training data shape: ', train_data_orig.shape)\n",
"print('Training label shape: ', y_train.shape)\n",
"print('Test data shape: ', test_data_orig.shape)\n",
"print('Test label shape: ', y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Visualize a few examples of training images from each class\n",
"classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n",
"num_classes = len(classes)\n",
"samples_per_class = 7\n",
"for y, cls in enumerate(classes):\n",
" idxs = np.flatnonzero(y_train == y)\n",
" idxs = np.random.choice(idxs, samples_per_class, replace=False)\n",
" for i, idx in enumerate(idxs):\n",
" plt_idx = i * num_classes + y + 1\n",
" plt.subplot(samples_per_class, num_classes, plt_idx)\n",
" xx = train_data_orig[idx,:,:,:]\n",
" xx -= np.min(xx)\n",
" xx /= np.max(xx)\n",
" plt.imshow(xx)\n",
" plt.axis('off')\n",
" if i == 0:\n",
" plt.title(cls)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Data pre-processing\n",
"n = train_data_orig.shape[0]\n",
"train_data = np.zeros([n,32**2])\n",
"for i in range(n):\n",
" xx = train_data_orig[i,:,:,:]\n",
" xx = np.linalg.norm(xx,axis=2)\n",
" xx -= np.mean(xx)\n",
" xx /= np.linalg.norm(xx)\n",
" train_data[i] = np.reshape(xx,[-1])\n",
"\n",
"n = test_data_orig.shape[0]\n",
"test_data = np.zeros([n,32**2])\n",
"for i in range(n):\n",
" xx = test_data_orig[i,:,:,:]\n",
" xx = np.linalg.norm(xx,axis=2)\n",
" xx -= np.mean(xx)\n",
" xx /= np.linalg.norm(xx)\n",
" test_data[i] = np.reshape(xx,[-1])\n",
"\n",
"print(train_data.shape)\n",
"print(test_data.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Convert label values to one_hot vector\n",
"from scipy.sparse import coo_matrix\n",
"def convert_to_one_hot(a,max_val=None):\n",
" N = a.size\n",
" data = np.ones(N,dtype=int)\n",
" sparse_out = coo_matrix((data,(np.arange(N),a.ravel())), shape=(N,max_val))\n",
" return np.array(sparse_out.todense())\n",
"\n",
"train_labels = convert_to_one_hot(y_train,10)\n",
"test_labels = convert_to_one_hot(y_test,10)\n",
"\n",
"print(train_labels.shape)\n",
"print(test_labels.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Model 1\n",
"**Question 1** Define with TensorFlow a linear classifier model:\n",
"\n",
"$$\n",
"y=\\textrm{softmax}(xW+b)\n",
"$$\n",
"\n",
"Compute the train accuracy and the test accuracy (you should get a test accuracy around 25% at iteration 10,000)
\n",
"Hints:
\n",
"(1) You may use functions *tf.matmul(), tf.nn.softmax()*
\n",
"(2) You may use Xavier's initialization discussed during lectures for W, and b=0
\n",
"(3) You may use optimization schemes *tf.train.GradientDescentOptimizer(), tf.train.AdamOptimizer()*
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define computational graph (CG)\n",
"batch_size = 100 # batch size\n",
"d = train_data.shape[1] # data dimensionality\n",
"nc = 10 # number of classes\n",
"\n",
"# CG inputs\n",
"xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n",
"y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n",
"\n",
"# Fully Connected layer\n",
"y = YOUR CODE HERE\n",
"\n",
"# Softmax\n",
"y = YOUR CODE HERE\n",
"\n",
"# Loss\n",
"cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n",
"total_loss = cross_entropy\n",
"\n",
"# Optimization scheme\n",
"train_step = tf.train.YOUR CODE HERE\n",
"\n",
"# Accuracy\n",
"correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n",
"accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Run Computational Graph\n",
"n = train_data.shape[0]\n",
"indices = collections.deque()\n",
"init = tf.initialize_all_variables()\n",
"sess = tf.Session()\n",
"sess.run(init)\n",
"for i in range(10001):\n",
" \n",
" # Batch extraction\n",
" if len(indices) < batch_size:\n",
" indices.extend(np.random.permutation(n)) \n",
" idx = [indices.popleft() for i in range(batch_size)]\n",
" batch_x, batch_y = train_data[idx,:], train_labels[idx]\n",
" #print(batch_x.shape,batch_y.shape)\n",
" \n",
" # Run CG for variable training\n",
" _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n",
" \n",
" # Run CG for test set\n",
" if not i%1000:\n",
" print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n",
" acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n",
" print('test accuracy=',acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Model 2\n",
"**Question 2a.** Define with TensorFlow a 2-layer neural network classifier:\n",
"\n",
"$$\n",
"y=\\textrm{softmax}(ReLU(xW_1+b_1)W_2+b_2)\n",
"$$\n",
"\n",
"Compute the train accuracy and the test accuracy (you should be able to overfit the train set)
\n",
"Hint: You may use functions *tf.nn.relu()*
\n",
"\n",
"**Question 2b.** Add a L2 regularization term to prevent overfitting. Compute the train accuracy and the test accuracy (you should get a test accuracy around 35%)
\n",
"Hints:
\n",
"(1) You may use functions *tf.nn.l2_loss()*
\n",
"(2) Do not forget the constant parameter *reg_par*: total_loss = cross_entropy + reg_par* reg_loss
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define computational graph (CG)\n",
"batch_size = 100 # batch size\n",
"d = train_data.shape[1] # data dimensionality\n",
"nc = 10 # number of classes\n",
"\n",
"# CG inputs\n",
"xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n",
"y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n",
"\n",
"# 1st Fully Connected layer\n",
"y = YOUR CODE HERE\n",
"\n",
"# ReLU activation\n",
"y = YOUR CODE HERE\n",
"\n",
"# 2nd Fully Connected layer\n",
"y = YOUR CODE HERE\n",
"\n",
"# Softmax\n",
"y = YOUR CODE HERE\n",
"\n",
"# Loss\n",
"cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n",
"\n",
"# L2 Regularization\n",
"reg_loss = YOUR CODE HERE\n",
"reg_par = YOUR CODE HERE\n",
"total_loss = YOUR CODE HERE\n",
"\n",
"# Optimization scheme\n",
"train_step = tf.train.YOUR CODE HERE\n",
"\n",
"# Accuracy\n",
"correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n",
"accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"# Run Computational Graph\n",
"n = train_data.shape[0]\n",
"indices = collections.deque()\n",
"init = tf.initialize_all_variables()\n",
"sess = tf.Session()\n",
"sess.run(init)\n",
"for i in range(10001):\n",
" \n",
" # Batch extraction\n",
" if len(indices) < batch_size:\n",
" indices.extend(np.random.permutation(n)) \n",
" idx = [indices.popleft() for i in range(batch_size)]\n",
" batch_x, batch_y = train_data[idx,:], train_labels[idx]\n",
" #print(batch_x.shape,batch_y.shape)\n",
" \n",
" # Run CG for variable training\n",
" _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n",
" \n",
" # Run CG for test set\n",
" if not i%1000:\n",
" print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n",
" acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n",
" print('test accuracy=',acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Model 3\n",
"**Question 3.** Define a convolutional neural network classifier:\n",
"\n",
"$$\n",
"y=\\textrm{softmax}(ReLU(x\\ast W_1+b_1)W_2+b_2)\n",
"$$\n",
"\n",
"Hint: You may use function *tf.nn.conv2d(x_2d, Wcl, strides=[1, 1, 1, 1], padding='SAME')*
\n",
"with *Wcl = tf.Variable(tf.truncated_normal([K,K,1,F], stddev=YOUR CODE HERE ))*\n",
"for the convolution operator $\\ast$
\n",
"and *x_2d = tf.reshape(xin, [-1,32,32,1])*
\n",
"\n",
"Compute the train accuracy and the test accuracy (you should be able to overfit the train set)
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define computational graph (CG)\n",
"batch_size = 100 # batch size\n",
"d = train_data.shape[1] # data dimensionality\n",
"nc = 10 # number of classes\n",
"\n",
"# CG inputs\n",
"xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n",
"y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n",
"\n",
"\n",
"# Convolutional layer\n",
"K = 5 # size of the patch\n",
"F = 10 # number of filters\n",
"x = YOUR CODE HERE\n",
"\n",
"# ReLU activation\n",
"x = YOUR CODE HERE\n",
"\n",
"# Fully Connected layer\n",
"nfc = 32*32*F\n",
"y = YOUR CODE HERE\n",
"\n",
"# Softmax\n",
"y = YOUR CODE HERE\n",
"\n",
"# Loss\n",
"cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n",
"total_loss = cross_entropy\n",
"\n",
"# Optimization scheme\n",
"train_step = tf.train.YOUR CODE HERE\n",
"\n",
"# Accuracy\n",
"correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n",
"accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"scrolled": false
},
"outputs": [],
"source": [
"# Run Computational Graph\n",
"n = train_data.shape[0]\n",
"indices = collections.deque()\n",
"init = tf.initialize_all_variables()\n",
"sess = tf.Session()\n",
"sess.run(init)\n",
"for i in range(10001):\n",
" \n",
" # Batch extraction\n",
" if len(indices) < batch_size:\n",
" indices.extend(np.random.permutation(n)) \n",
" idx = [indices.popleft() for i in range(batch_size)]\n",
" batch_x, batch_y = train_data[idx,:], train_labels[idx]\n",
" #print(batch_x.shape,batch_y.shape)\n",
" \n",
" # Run CG for variable training\n",
" _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y})\n",
" \n",
" # Run CG for test set\n",
" if not i%1000:\n",
" print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n",
" acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels})\n",
" print('test accuracy=',acc_test)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"# Model 4\n",
"**Question 4.** Regularize the previous convolutional neural network classifier:\n",
"\n",
"$$\n",
"y=\\textrm{softmax}(ReLU(x\\ast W_1+b_1)W_2+b_2)\n",
"$$\n",
"\n",
"with the dropout technique discussed during lectures.\n",
"\n",
"Hint: You may use function *tf.nn.dropout()* with probability around 0.25.
\n",
"\n",
"Compute the train accuracy and the test accuracy (you should get a test accuracy of 45%)
\n",
"Note: It is not mandatory to achieve 40% (as quality may change depending on initialization), but it is essential to implement correctly the classifier.
"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Define computational graph (CG)\n",
"batch_size = 100 # batch size\n",
"d = train_data.shape[1] # data dimensionality\n",
"nc = 10 # number of classes\n",
"\n",
"# CG inputs\n",
"xin = tf.placeholder(tf.float32,[batch_size,d]); #print('xin=',xin,xin.get_shape())\n",
"y_label = tf.placeholder(tf.float32,[batch_size,nc]); #print('y_label=',y_label,y_label.get_shape())\n",
"d = tf.placeholder(tf.float32);\n",
"\n",
"# Convolutional layer\n",
"K = 5 # size of the patch\n",
"F = 10 # number of filters\n",
"x = YOUR CODE HERE\n",
"\n",
"# ReLU activation\n",
"x = YOUR CODE HERE\n",
"\n",
"# Dropout\n",
"x = YOUR CODE HERE\n",
"\n",
"# Fully Connected layer\n",
"y = YOUR CODE HERE\n",
"\n",
"# Softmax\n",
"y = YOUR CODE HERE\n",
"\n",
"# Loss\n",
"cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_label * tf.log(y), 1))\n",
"total_loss = cross_entropy\n",
"\n",
"# Optimization scheme\n",
"train_step = tf.train.YOUR CODE HERE\n",
"\n",
"# Accuracy\n",
"correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_label,1))\n",
"accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# Run Computational Graph\n",
"n = train_data.shape[0]\n",
"indices = collections.deque()\n",
"init = tf.initialize_all_variables()\n",
"sess = tf.Session()\n",
"sess.run(init)\n",
"for i in range(10001):\n",
" \n",
" # Batch extraction\n",
" if len(indices) < batch_size:\n",
" indices.extend(np.random.permutation(n)) \n",
" idx = [indices.popleft() for i in range(batch_size)]\n",
" batch_x, batch_y = train_data[idx,:], train_labels[idx]\n",
" #print(batch_x.shape,batch_y.shape)\n",
" \n",
" # Run CG for variable training\n",
" _,acc_train,total_loss_o = sess.run([train_step,accuracy,total_loss], feed_dict={xin: batch_x, y_label: batch_y, d: 0.25})\n",
" \n",
" # Run CG for test set\n",
" if not i%1000:\n",
" print('\\nIteration i=',i,', train accuracy=',acc_train,', loss=',total_loss_o)\n",
" acc_test = sess.run(accuracy, feed_dict={xin: test_data, y_label: test_labels, d: 1.0})\n",
" print('test accuracy=',acc_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}