{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using gpu device 2: GeForce GTX TITAN X (CNMeM is enabled with initial size: 90.0% of memory, cuDNN 4007)\n" ] } ], "source": [ "from theano.sandbox import cuda\n", "cuda.use('gpu2')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using Theano backend.\n" ] } ], "source": [ "%matplotlib inline\n", "import utils; reload(utils)\n", "from utils import *\n", "from __future__ import division, print_function" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "batch_size=64" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "((60000, 28, 28), (60000,), (10000, 28, 28), (10000,))" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from keras.datasets import mnist\n", "(X_train, y_train), (X_test, y_test) = mnist.load_data()\n", "(X_train.shape, y_train.shape, X_test.shape, y_test.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "X_test = np.expand_dims(X_test,1)\n", "X_train = np.expand_dims(X_train,1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "(60000, 1, 28, 28)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "array([5, 0, 4, 1, 9], dtype=uint8)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train[:5]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "y_train = onehot(y_train)\n", "y_test = onehot(y_test)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "array([[ 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],\n", " [ 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],\n", " [ 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],\n", " [ 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train[:5]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "mean_px = X_train.mean().astype(np.float32)\n", "std_px = X_train.std().astype(np.float32)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "def norm_input(x): return (x-mean_px)/std_px" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Linear model" ] }, { "cell_type": "code", "execution_count": 160, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def get_lin_model():\n", " model = Sequential([\n", " Lambda(norm_input, input_shape=(1,28,28)),\n", " Flatten(),\n", " Dense(10, activation='softmax')\n", " ])\n", " model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "lm = get_lin_model()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "gen = image.ImageDataGenerator()\n", "batches = gen.flow(X_train, y_train, batch_size=64)\n", "test_batches = gen.flow(X_test, y_test, batch_size=64)" ] }, { "cell_type": "code", "execution_count": 164, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 5s - loss: 0.4175 - acc: 0.8771 - val_loss: 0.2958 - val_acc: 0.9177\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 167, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "lm.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 169, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 5s - loss: 0.2770 - acc: 0.9225 - val_loss: 0.2734 - val_acc: 0.9252\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 169, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 172, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "lm.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 173, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 5s - loss: 0.2710 - acc: 0.9241 - val_loss: 0.2858 - val_acc: 0.9216\n", "Epoch 2/4\n", "60000/60000 [==============================] - 5s - loss: 0.2667 - acc: 0.9249 - val_loss: 0.2764 - val_acc: 0.9242\n", "Epoch 3/4\n", "60000/60000 [==============================] - 4s - loss: 0.2707 - acc: 0.9249 - val_loss: 0.2759 - val_acc: 0.9219\n", "Epoch 4/4\n", "60000/60000 [==============================] - 4s - loss: 0.2603 - acc: 0.9267 - val_loss: 0.2810 - val_acc: 0.9240\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 173, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lm.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Single dense layer" ] }, { "cell_type": "code", "execution_count": 175, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def get_fc_model():\n", " model = Sequential([\n", " Lambda(norm_input, input_shape=(1,28,28)),\n", " Flatten(),\n", " Dense(512, activation='softmax'),\n", " Dense(10, activation='softmax')\n", " ])\n", " model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 182, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "fc = get_fc_model()" ] }, { "cell_type": "code", "execution_count": 183, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 5s - loss: 1.5393 - acc: 0.8851 - val_loss: 1.0240 - val_acc: 0.9176\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 183, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fc.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 184, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "fc.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 185, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 5s - loss: 0.7502 - acc: 0.9241 - val_loss: 0.5514 - val_acc: 0.9290\n", "Epoch 2/4\n", "60000/60000 [==============================] - 5s - loss: 0.4507 - acc: 0.9338 - val_loss: 0.3896 - val_acc: 0.9321\n", "Epoch 3/4\n", "60000/60000 [==============================] - 5s - loss: 0.3507 - acc: 0.9357 - val_loss: 0.3417 - val_acc: 0.9306\n", "Epoch 4/4\n", "60000/60000 [==============================] - 5s - loss: 0.3069 - acc: 0.9374 - val_loss: 0.3091 - val_acc: 0.9325\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 185, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fc.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "fc.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 189, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 5s - loss: 0.2549 - acc: 0.9431 - val_loss: 0.2797 - val_acc: 0.9341\n", "Epoch 2/4\n", "60000/60000 [==============================] - 5s - loss: 0.2408 - acc: 0.9457 - val_loss: 0.2753 - val_acc: 0.9341\n", "Epoch 3/4\n", "60000/60000 [==============================] - 5s - loss: 0.2358 - acc: 0.9453 - val_loss: 0.2733 - val_acc: 0.9339\n", "Epoch 4/4\n", "60000/60000 [==============================] - 5s - loss: 0.2252 - acc: 0.9474 - val_loss: 0.2670 - val_acc: 0.9397\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 189, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fc.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Basic 'VGG-style' CNN" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def get_model():\n", " model = Sequential([\n", " Lambda(norm_input, input_shape=(1,28,28)),\n", " Convolution2D(32,3,3, activation='relu'),\n", " Convolution2D(32,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " Convolution2D(64,3,3, activation='relu'),\n", " Convolution2D(64,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " Flatten(),\n", " Dense(512, activation='relu'),\n", " Dense(10, activation='softmax')\n", " ])\n", " model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model = get_model()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 6s - loss: 0.1097 - acc: 0.9664 - val_loss: 0.0396 - val_acc: 0.9863\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 7s - loss: 0.0353 - acc: 0.9889 - val_loss: 0.0291 - val_acc: 0.9902\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/8\n", "60000/60000 [==============================] - 6s - loss: 0.0232 - acc: 0.9929 - val_loss: 0.0207 - val_acc: 0.9935\n", "Epoch 2/8\n", "60000/60000 [==============================] - 6s - loss: 0.0193 - acc: 0.9935 - val_loss: 0.0252 - val_acc: 0.9919\n", "Epoch 3/8\n", "60000/60000 [==============================] - 6s - loss: 0.0155 - acc: 0.9949 - val_loss: 0.0298 - val_acc: 0.9919\n", "Epoch 4/8\n", "60000/60000 [==============================] - 6s - loss: 0.0133 - acc: 0.9958 - val_loss: 0.0313 - val_acc: 0.9913\n", "Epoch 5/8\n", "60000/60000 [==============================] - 6s - loss: 0.0095 - acc: 0.9970 - val_loss: 0.0327 - val_acc: 0.9913\n", "Epoch 6/8\n", "60000/60000 [==============================] - 6s - loss: 0.0107 - acc: 0.9966 - val_loss: 0.0301 - val_acc: 0.9906\n", "Epoch 7/8\n", "60000/60000 [==============================] - 7s - loss: 0.0070 - acc: 0.9979 - val_loss: 0.0269 - val_acc: 0.9938\n", "Epoch 8/8\n", "60000/60000 [==============================] - 6s - loss: 0.0082 - acc: 0.9975 - val_loss: 0.0261 - val_acc: 0.9926\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=8, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Data augmentation" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "model = get_model()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "gen = image.ImageDataGenerator(rotation_range=8, width_shift_range=0.08, shear_range=0.3,\n", " height_shift_range=0.08, zoom_range=0.08)\n", "batches = gen.flow(X_train, y_train, batch_size=64)\n", "test_batches = gen.flow(X_test, y_test, batch_size=64)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 7s - loss: 0.2064 - acc: 0.9360 - val_loss: 0.0643 - val_acc: 0.9778\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 7s - loss: 0.0706 - acc: 0.9787 - val_loss: 0.0496 - val_acc: 0.9844\n", "Epoch 2/4\n", "60000/60000 [==============================] - 7s - loss: 0.0531 - acc: 0.9838 - val_loss: 0.0395 - val_acc: 0.9873\n", "Epoch 3/4\n", "60000/60000 [==============================] - 7s - loss: 0.0473 - acc: 0.9856 - val_loss: 0.0329 - val_acc: 0.9886\n", "Epoch 4/4\n", "60000/60000 [==============================] - 7s - loss: 0.0402 - acc: 0.9870 - val_loss: 0.0381 - val_acc: 0.9878\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/8\n", "60000/60000 [==============================] - 7s - loss: 0.0381 - acc: 0.9887 - val_loss: 0.0295 - val_acc: 0.9908\n", "Epoch 2/8\n", "60000/60000 [==============================] - 7s - loss: 0.0340 - acc: 0.9893 - val_loss: 0.0266 - val_acc: 0.9918\n", "Epoch 3/8\n", "60000/60000 [==============================] - 7s - loss: 0.0318 - acc: 0.9903 - val_loss: 0.0400 - val_acc: 0.9877\n", "Epoch 4/8\n", "60000/60000 [==============================] - 7s - loss: 0.0322 - acc: 0.9899 - val_loss: 0.0264 - val_acc: 0.9922\n", "Epoch 5/8\n", "60000/60000 [==============================] - 7s - loss: 0.0281 - acc: 0.9910 - val_loss: 0.0266 - val_acc: 0.9911\n", "Epoch 6/8\n", "60000/60000 [==============================] - 7s - loss: 0.0283 - acc: 0.9909 - val_loss: 0.0238 - val_acc: 0.9922\n", "Epoch 7/8\n", "60000/60000 [==============================] - 7s - loss: 0.0277 - acc: 0.9917 - val_loss: 0.0314 - val_acc: 0.9911\n", "Epoch 8/8\n", "60000/60000 [==============================] - 6s - loss: 0.0251 - acc: 0.9925 - val_loss: 0.0287 - val_acc: 0.9921\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=8, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.001" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/12\n", "60000/60000 [==============================] - 6s - loss: 0.0242 - acc: 0.9920 - val_loss: 0.0271 - val_acc: 0.9912\n", "Epoch 2/12\n", "60000/60000 [==============================] - 7s - loss: 0.0250 - acc: 0.9922 - val_loss: 0.0351 - val_acc: 0.9894\n", "Epoch 3/12\n", "60000/60000 [==============================] - 7s - loss: 0.0225 - acc: 0.9931 - val_loss: 0.0323 - val_acc: 0.9905\n", "Epoch 4/12\n", "60000/60000 [==============================] - 7s - loss: 0.0223 - acc: 0.9932 - val_loss: 0.0235 - val_acc: 0.9927\n", "Epoch 5/12\n", "60000/60000 [==============================] - 7s - loss: 0.0236 - acc: 0.9926 - val_loss: 0.0216 - val_acc: 0.9937\n", "Epoch 6/12\n", "60000/60000 [==============================] - 6s - loss: 0.0220 - acc: 0.9933 - val_loss: 0.0259 - val_acc: 0.9918\n", "Epoch 7/12\n", "60000/60000 [==============================] - 7s - loss: 0.0207 - acc: 0.9936 - val_loss: 0.0298 - val_acc: 0.9899\n", "Epoch 8/12\n", "60000/60000 [==============================] - 7s - loss: 0.0216 - acc: 0.9932 - val_loss: 0.0268 - val_acc: 0.9929\n", "Epoch 9/12\n", "60000/60000 [==============================] - 7s - loss: 0.0206 - acc: 0.9936 - val_loss: 0.0282 - val_acc: 0.9913\n", "Epoch 10/12\n", "60000/60000 [==============================] - 7s - loss: 0.0194 - acc: 0.9940 - val_loss: 0.0296 - val_acc: 0.9927\n", "Epoch 11/12\n", "60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9940 - val_loss: 0.0193 - val_acc: 0.9941\n", "Epoch 12/12\n", "60000/60000 [==============================] - 7s - loss: 0.0187 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9914\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=14, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.0001" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/10\n", "60000/60000 [==============================] - 7s - loss: 0.0191 - acc: 0.9942 - val_loss: 0.0277 - val_acc: 0.9906\n", "Epoch 2/10\n", "60000/60000 [==============================] - 7s - loss: 0.0196 - acc: 0.9938 - val_loss: 0.0192 - val_acc: 0.9945\n", "Epoch 3/10\n", "60000/60000 [==============================] - 6s - loss: 0.0173 - acc: 0.9946 - val_loss: 0.0258 - val_acc: 0.9924\n", "Epoch 4/10\n", "60000/60000 [==============================] - 7s - loss: 0.0189 - acc: 0.9943 - val_loss: 0.0249 - val_acc: 0.9924\n", "Epoch 5/10\n", "60000/60000 [==============================] - 7s - loss: 0.0166 - acc: 0.9951 - val_loss: 0.0271 - val_acc: 0.9920\n", "Epoch 6/10\n", "60000/60000 [==============================] - 7s - loss: 0.0183 - acc: 0.9942 - val_loss: 0.0229 - val_acc: 0.9937\n", "Epoch 7/10\n", "60000/60000 [==============================] - 7s - loss: 0.0177 - acc: 0.9944 - val_loss: 0.0275 - val_acc: 0.9924\n", "Epoch 8/10\n", "60000/60000 [==============================] - 6s - loss: 0.0168 - acc: 0.9946 - val_loss: 0.0246 - val_acc: 0.9926\n", "Epoch 9/10\n", "60000/60000 [==============================] - 7s - loss: 0.0169 - acc: 0.9943 - val_loss: 0.0215 - val_acc: 0.9936\n", "Epoch 10/10\n", "60000/60000 [==============================] - 7s - loss: 0.0160 - acc: 0.9953 - val_loss: 0.0267 - val_acc: 0.9919\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=10, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Batchnorm + data augmentation" ] }, { "cell_type": "code", "execution_count": 125, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def get_model_bn():\n", " model = Sequential([\n", " Lambda(norm_input, input_shape=(1,28,28)),\n", " Convolution2D(32,3,3, activation='relu'),\n", " BatchNormalization(axis=1),\n", " Convolution2D(32,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " BatchNormalization(axis=1),\n", " Convolution2D(64,3,3, activation='relu'),\n", " BatchNormalization(axis=1),\n", " Convolution2D(64,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " Flatten(),\n", " BatchNormalization(),\n", " Dense(512, activation='relu'),\n", " BatchNormalization(),\n", " Dense(10, activation='softmax')\n", " ])\n", " model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 126, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model = get_model_bn()" ] }, { "cell_type": "code", "execution_count": 127, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 12s - loss: 0.1273 - acc: 0.9605 - val_loss: 0.0559 - val_acc: 0.9833\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 127, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 128, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 129, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 13s - loss: 0.0555 - acc: 0.9827 - val_loss: 0.0439 - val_acc: 0.9859\n", "Epoch 2/4\n", "60000/60000 [==============================] - 13s - loss: 0.0455 - acc: 0.9859 - val_loss: 0.0337 - val_acc: 0.9899\n", "Epoch 3/4\n", "60000/60000 [==============================] - 13s - loss: 0.0377 - acc: 0.9882 - val_loss: 0.0332 - val_acc: 0.9890\n", "Epoch 4/4\n", "60000/60000 [==============================] - 13s - loss: 0.0372 - acc: 0.9884 - val_loss: 0.0303 - val_acc: 0.9904\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 129, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 130, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 131, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/12\n", "60000/60000 [==============================] - 13s - loss: 0.0327 - acc: 0.9900 - val_loss: 0.0312 - val_acc: 0.9911\n", "Epoch 2/12\n", "60000/60000 [==============================] - 12s - loss: 0.0290 - acc: 0.9911 - val_loss: 0.0349 - val_acc: 0.9893\n", "Epoch 3/12\n", "60000/60000 [==============================] - 13s - loss: 0.0293 - acc: 0.9912 - val_loss: 0.0452 - val_acc: 0.9853\n", "Epoch 4/12\n", "60000/60000 [==============================] - 13s - loss: 0.0266 - acc: 0.9915 - val_loss: 0.0260 - val_acc: 0.9924\n", "Epoch 5/12\n", "60000/60000 [==============================] - 12s - loss: 0.0236 - acc: 0.9924 - val_loss: 0.0234 - val_acc: 0.9927\n", "Epoch 6/12\n", "60000/60000 [==============================] - 13s - loss: 0.0234 - acc: 0.9927 - val_loss: 0.0305 - val_acc: 0.9901\n", "Epoch 7/12\n", "60000/60000 [==============================] - 12s - loss: 0.0234 - acc: 0.9929 - val_loss: 0.0164 - val_acc: 0.9960\n", "Epoch 8/12\n", "60000/60000 [==============================] - 13s - loss: 0.0198 - acc: 0.9935 - val_loss: 0.0333 - val_acc: 0.9898\n", "Epoch 9/12\n", "60000/60000 [==============================] - 12s - loss: 0.0201 - acc: 0.9939 - val_loss: 0.0184 - val_acc: 0.9940\n", "Epoch 10/12\n", "60000/60000 [==============================] - 12s - loss: 0.0173 - acc: 0.9945 - val_loss: 0.0194 - val_acc: 0.9938\n", "Epoch 11/12\n", "60000/60000 [==============================] - 13s - loss: 0.0183 - acc: 0.9940 - val_loss: 0.0323 - val_acc: 0.9904\n", "Epoch 12/12\n", "60000/60000 [==============================] - 13s - loss: 0.0177 - acc: 0.9945 - val_loss: 0.0294 - val_acc: 0.9918\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 131, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=12, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 132, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.001" ] }, { "cell_type": "code", "execution_count": 133, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/12\n", "60000/60000 [==============================] - 13s - loss: 0.0166 - acc: 0.9947 - val_loss: 0.0205 - val_acc: 0.9933\n", "Epoch 2/12\n", "60000/60000 [==============================] - 13s - loss: 0.0168 - acc: 0.9950 - val_loss: 0.0194 - val_acc: 0.9942\n", "Epoch 3/12\n", "60000/60000 [==============================] - 12s - loss: 0.0151 - acc: 0.9953 - val_loss: 0.0197 - val_acc: 0.9942\n", "Epoch 4/12\n", "60000/60000 [==============================] - 13s - loss: 0.0135 - acc: 0.9954 - val_loss: 0.0179 - val_acc: 0.9938\n", "Epoch 5/12\n", "60000/60000 [==============================] - 12s - loss: 0.0143 - acc: 0.9953 - val_loss: 0.0257 - val_acc: 0.9925\n", "Epoch 6/12\n", "60000/60000 [==============================] - 12s - loss: 0.0139 - acc: 0.9954 - val_loss: 0.0150 - val_acc: 0.9949\n", "Epoch 7/12\n", "60000/60000 [==============================] - 13s - loss: 0.0127 - acc: 0.9958 - val_loss: 0.0218 - val_acc: 0.9932\n", "Epoch 8/12\n", "60000/60000 [==============================] - 13s - loss: 0.0121 - acc: 0.9962 - val_loss: 0.0264 - val_acc: 0.9917\n", "Epoch 9/12\n", "60000/60000 [==============================] - 13s - loss: 0.0120 - acc: 0.9960 - val_loss: 0.0209 - val_acc: 0.9935\n", "Epoch 10/12\n", "60000/60000 [==============================] - 13s - loss: 0.0130 - acc: 0.9957 - val_loss: 0.0171 - val_acc: 0.9948\n", "Epoch 11/12\n", "60000/60000 [==============================] - 13s - loss: 0.0132 - acc: 0.9958 - val_loss: 0.0227 - val_acc: 0.9932\n", "Epoch 12/12\n", "60000/60000 [==============================] - 12s - loss: 0.0115 - acc: 0.9964 - val_loss: 0.0172 - val_acc: 0.9945\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 133, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=12, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Batchnorm + dropout + data augmentation" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def get_model_bn_do():\n", " model = Sequential([\n", " Lambda(norm_input, input_shape=(1,28,28)),\n", " Convolution2D(32,3,3, activation='relu'),\n", " BatchNormalization(axis=1),\n", " Convolution2D(32,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " BatchNormalization(axis=1),\n", " Convolution2D(64,3,3, activation='relu'),\n", " BatchNormalization(axis=1),\n", " Convolution2D(64,3,3, activation='relu'),\n", " MaxPooling2D(),\n", " Flatten(),\n", " BatchNormalization(),\n", " Dense(512, activation='relu'),\n", " BatchNormalization(),\n", " Dropout(0.5),\n", " Dense(10, activation='softmax')\n", " ])\n", " model.compile(Adam(), loss='categorical_crossentropy', metrics=['accuracy'])\n", " return model" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "model = get_model_bn_do()" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 13s - loss: 0.1894 - acc: 0.9419 - val_loss: 0.0605 - val_acc: 0.9815\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.1" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "collapsed": false, "hidden": true, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/4\n", "60000/60000 [==============================] - 13s - loss: 0.0766 - acc: 0.9764 - val_loss: 0.0394 - val_acc: 0.9871\n", "Epoch 2/4\n", "60000/60000 [==============================] - 13s - loss: 0.0622 - acc: 0.9806 - val_loss: 0.0360 - val_acc: 0.9885\n", "Epoch 3/4\n", "60000/60000 [==============================] - 13s - loss: 0.0576 - acc: 0.9830 - val_loss: 0.0364 - val_acc: 0.9882\n", "Epoch 4/4\n", "60000/60000 [==============================] - 14s - loss: 0.0512 - acc: 0.9842 - val_loss: 0.0347 - val_acc: 0.9911\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=4, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.01" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/12\n", "60000/60000 [==============================] - 14s - loss: 0.0464 - acc: 0.9862 - val_loss: 0.0300 - val_acc: 0.9904\n", "Epoch 2/12\n", "60000/60000 [==============================] - 13s - loss: 0.0474 - acc: 0.9856 - val_loss: 0.0287 - val_acc: 0.9912\n", "Epoch 3/12\n", "60000/60000 [==============================] - 13s - loss: 0.0400 - acc: 0.9880 - val_loss: 0.0408 - val_acc: 0.9879\n", "Epoch 4/12\n", "60000/60000 [==============================] - 14s - loss: 0.0379 - acc: 0.9884 - val_loss: 0.0255 - val_acc: 0.9918\n", "Epoch 5/12\n", "60000/60000 [==============================] - 13s - loss: 0.0394 - acc: 0.9881 - val_loss: 0.0247 - val_acc: 0.9923\n", "Epoch 6/12\n", "60000/60000 [==============================] - 14s - loss: 0.0344 - acc: 0.9893 - val_loss: 0.0267 - val_acc: 0.9921\n", "Epoch 7/12\n", "60000/60000 [==============================] - 14s - loss: 0.0342 - acc: 0.9895 - val_loss: 0.0208 - val_acc: 0.9938\n", "Epoch 8/12\n", "60000/60000 [==============================] - 14s - loss: 0.0291 - acc: 0.9908 - val_loss: 0.0251 - val_acc: 0.9914\n", "Epoch 9/12\n", "60000/60000 [==============================] - 14s - loss: 0.0309 - acc: 0.9907 - val_loss: 0.0253 - val_acc: 0.9919\n", "Epoch 10/12\n", "60000/60000 [==============================] - 14s - loss: 0.0299 - acc: 0.9906 - val_loss: 0.0205 - val_acc: 0.9934\n", "Epoch 11/12\n", "60000/60000 [==============================] - 14s - loss: 0.0276 - acc: 0.9912 - val_loss: 0.0200 - val_acc: 0.9940\n", "Epoch 12/12\n", "60000/60000 [==============================] - 13s - loss: 0.0268 - acc: 0.9918 - val_loss: 0.0201 - val_acc: 0.9929\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=12, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.optimizer.lr=0.001" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/1\n", "60000/60000 [==============================] - 13s - loss: 0.0186 - acc: 0.9942 - val_loss: 0.0193 - val_acc: 0.9945\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit_generator(batches, batches.N, nb_epoch=1, \n", " validation_data=test_batches, nb_val_samples=test_batches.N)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ensembling" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def fit_model():\n", " model = get_model_bn_do()\n", " model.fit_generator(batches, batches.N, nb_epoch=1, verbose=0,\n", " validation_data=test_batches, nb_val_samples=test_batches.N)\n", " model.optimizer.lr=0.1\n", " model.fit_generator(batches, batches.N, nb_epoch=4, verbose=0,\n", " validation_data=test_batches, nb_val_samples=test_batches.N)\n", " model.optimizer.lr=0.01\n", " model.fit_generator(batches, batches.N, nb_epoch=12, verbose=0,\n", " validation_data=test_batches, nb_val_samples=test_batches.N)\n", " model.optimizer.lr=0.001\n", " model.fit_generator(batches, batches.N, nb_epoch=18, verbose=0,\n", " validation_data=test_batches, nb_val_samples=test_batches.N)\n", " return model" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": true }, "outputs": [], "source": [ "models = [fit_model() for i in range(6)]" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": true }, "outputs": [], "source": [ "path = \"data/mnist/\"\n", "model_path = path + 'models/'" ] }, { "cell_type": "code", "execution_count": 93, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for i,m in enumerate(models):\n", " m.save_weights(model_path+'cnn-mnist23-'+str(i)+'.pkl')" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 9984/10000 [============================>.] - ETA: 0s" ] } ], "source": [ "evals = np.array([m.evaluate(X_test, y_test, batch_size=256) for m in models])" ] }, { "cell_type": "code", "execution_count": 95, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "array([ 0.016, 0.995])" ] }, "execution_count": 95, "metadata": {}, "output_type": "execute_result" } ], "source": [ "evals.mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 96, "metadata": { "collapsed": false }, "outputs": [], "source": [ "all_preds = np.stack([m.predict(X_test, batch_size=256) for m in models])" ] }, { "cell_type": "code", "execution_count": 97, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(6, 10000, 10)" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_preds.shape" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": false }, "outputs": [], "source": [ "avg_preds = all_preds.mean(axis=0)" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array(0.9969000220298767, dtype=float32)" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keras.metrics.categorical_accuracy(y_test, avg_preds).eval()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" }, "nav_menu": {}, "toc": { "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 6, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false }, "widgets": { "state": {}, "version": "1.1.2" } }, "nbformat": 4, "nbformat_minor": 0 }