{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MNIST-Overfit-Dropout" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# coding: utf-8\n", "import sys, os\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import math\n", "\n", "sys.path.append(os.pardir)\n", "from deeplink.mnist import *\n", "from deeplink.networks import *" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Multilayer Neural Network Model (Two Hidden Layers) and Learing/Validation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Multi Layer Model Class" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class MultiLayerNetExtended(MultiLayerNet):\n", " def __init__(self, input_size, hidden_size_list, output_size, activation='ReLU', initializer='N2', \n", " optimizer='AdaGrad', learning_rate=0.01, \n", " use_batch_normalization=False, \n", " use_weight_decay=False, weight_decay_lambda=0.0\n", " use_dropout=False, dropout_rate_list):\n", " self.input_size = input_size\n", " self.output_size = output_size\n", " self.hidden_size_list = hidden_size_list\n", " self.hidden_layer_num = len(hidden_size_list)\n", " \n", " self.use_batch_normalization = use_batch_normalization\n", "\n", " self.use_weight_decay = use_weight_decay\n", " self.weight_decay_lambda = weight_decay_lambda\n", " \n", " # Weight Initialization\n", " self.params = {}\n", " self.weight_initialization(initializer)\n", " \n", " # Layering\n", " self.layers = OrderedDict()\n", " self.last_layer = None\n", " self.layering(activation)\n", "\n", " # Optimization Method\n", " self.optimizer = optimizers[optimizer](lr=learning_rate)\n", " \n", " def weight_initialization(self, initializer):\n", " params_size_list = [self.input_size] + self.hidden_size_list + [self.output_size]\n", " initializer_obj = initializers[initializer](self.params, \n", " params_size_list, \n", " self.use_batch_normalization)\n", " initializer_obj.initialize_params();\n", " \n", " def layering(self, activation):\n", " for idx in range(1, self.hidden_layer_num + 1):\n", " self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])\n", " if self.use_batch_normalization:\n", " self.layers['Batch_Normalization' + str(idx)] = BatchNormalization(self.params['gamma' + str(idx)], \n", " self.params['beta' + str(idx)])\n", " self.layers['Activation' + str(idx)] = activation_layers[activation]()\n", "\n", " idx = self.hidden_layer_num + 1\n", " self.layers['Affine' + str(idx)] = Affine(self.params['W' + str(idx)], self.params['b' + str(idx)])\n", "\n", " self.last_layer = SoftmaxWithCrossEntropyLoss() \n", "\n", " def predict(self, x, is_train=False):\n", " for key, layer in self.layers.items():\n", " if \"BatchNorm\" in key:\n", " x = layer.forward(x, is_train)\n", " else:\n", " x = layer.forward(x)\n", " return x\n", "\n", " def loss(self, x, t, is_train=False)\n", " y = self.predict(x, is_train)\n", "\n", " if self.use_weight_decay:\n", " weight_decay = 0.0\n", " for idx in range(1, self.hidden_layer_num + 2):\n", " W = self.params['W' + str(idx)]\n", " weight_decay += 0.5 * self.weight_decay_lambda * np.sum(W**2)\n", " return self.last_layer.forward(y, t) + weight_decay\n", " else:\n", " return self.last_layer.forward(y, t)\n", "\n", " def accuracy(self, x, t):\n", " y = self.predict(x, is_train=False)\n", " y = np.argmax(y, axis=1)\n", " if t.ndim != 1 : t = np.argmax(t, axis=1)\n", "\n", " accuracy = np.sum(y == t) / float(x.shape[0])\n", " return accuracy \n", "\n", " def backpropagation_gradient(self, x, t):\n", " # forward\n", " self.loss(x, t, is_train=True)\n", "\n", " # backward\n", " din = 1\n", " din = self.last_layer.backward(din)\n", "\n", " layers = list(self.layers.values())\n", " layers.reverse()\n", " for layer in layers:\n", " din = layer.backward(din)\n", "\n", " grads = {}\n", " for idx in range(1, self.hidden_layer_num + 2):\n", " if self.use_weight_decay:\n", " grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW + self.weight_decay_lambda * self.params['W' + str(idx)]\n", " else:\n", " grads['W' + str(idx)] = self.layers['Affine' + str(idx)].dW\n", " grads['b' + str(idx)] = self.layers['Affine' + str(idx)].db\n", "\n", " if self.use_batch_normalization and idx <= self.hidden_layer_num:\n", " grads['gamma' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dgamma\n", " grads['beta' + str(idx)] = self.layers['Batch_Normalization' + str(idx)].dbeta\n", " \n", " return grads\n", "\n", " def learning(self, x_batch, t_batch):\n", " grads = self.backpropagation_gradient(x_batch, t_batch)\n", " self.optimizer.update(self.params, grads)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Training and Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = mnist_data(\"/Users/yhhan/git/aiclass/0.Professor/data/MNIST_data/.\")\n", "(img_train, label_train), (img_validation, label_validation), (img_test, label_test) = data.load_mnist(flatten=True, normalize=True, one_hot_label=True)\n", "\n", "# 오버피팅을 유도하기 위하여 데이터 수를 대폭 줄임\n", "img_train = img_train[:200]\n", "label_train = label_train[:200]\n", "\n", "# 오버피팅을 유도하기 위하여 레이어를 깊게 가져가고 파라미터를 대폭 늘림\n", "input_size=784\n", "hidden_layer1_size=128\n", "hidden_layer2_size=128\n", "hidden_layer3_size=128\n", "hidden_layer4_size=128\n", "hidden_layer5_size=128\n", "hidden_layer6_size=128\n", "output_size=10\n", "\n", "num_epochs = 200\n", "train_size = img_train.shape[0]\n", "batch_size = 100\n", "learning_rate = 0.1\n", "\n", "markers = {\"N2, AdaGrad, No_Batch_Norm, lambda=0.0\": \"x\", \"N2, AdaGrad, No_Batch_Norm, lambda=0.1\": \"o\"}\n", "\n", "networks = {}\n", "train_errors = {}\n", "validation_errors = {}\n", "test_accuracy_values = {}\n", "max_test_accuracy_epoch = {}\n", "max_test_accuracy_value = {}\n", "\n", "for key in markers.keys():\n", " if key == \"N2, AdaGrad, No_Batch_Norm, lambda=0.0\":\n", " networks[key] = MultiLayerNetExtended(input_size, \n", " [hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size], \n", " output_size, \n", " activation='ReLU', \n", " initializer='N2',\n", " optimizer='AdaGrad', learning_rate=learning_rate,\n", " use_batch_normalization=False, weight_decay_lambda=0.0)\n", " elif key == \"N2, AdaGrad, No_Batch_Norm, lambda=0.1\":\n", " networks[key] = MultiLayerNetExtended(input_size, \n", " [hidden_layer1_size, hidden_layer2_size, hidden_layer3_size, hidden_layer4_size, hidden_layer5_size, hidden_layer6_size], \n", " output_size, \n", " activation='ReLU', \n", " initializer='N2',\n", " optimizer='AdaGrad', learning_rate=learning_rate,\n", " use_batch_normalization=False, weight_decay_lambda=0.1)\n", " \n", " train_errors[key] = [] \n", " validation_errors[key] = []\n", " test_accuracy_values[key] = []\n", " max_test_accuracy_epoch[key] = 0\n", " max_test_accuracy_value[key] = 0.0" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, " ] } ], "source": [ "epoch_list = []\n", "\n", "num_batch = math.ceil(train_size / batch_size)\n", "\n", "for i in range(num_epochs):\n", " epoch_list.append(i)\n", " for key in markers.keys():\n", " for k in range(num_batch):\n", " x_batch = img_train[k * batch_size : k * batch_size + batch_size]\n", " t_batch = label_train[k * batch_size : k * batch_size + batch_size]\n", " networks[key].learning(x_batch, t_batch)\n", "\n", " train_loss = networks[key].loss(x_batch, t_batch, is_train=True)\n", " train_errors[key].append(train_loss)\n", "\n", " validation_loss = networks[key].loss(img_validation, label_validation, is_train=False)\n", " validation_errors[key].append(validation_loss) \n", "\n", " test_accuracy = networks[key].accuracy(img_test, label_test)\n", " test_accuracy_values[key].append(test_accuracy)\n", " if test_accuracy > max_test_accuracy_value[key]:\n", " max_test_accuracy_epoch[key] = i \n", " max_test_accuracy_value[key] = test_accuracy\n", "# print(\"{0:26s}-Epoch:{1:3d}, Train Err.:{2:7.5f}, Validation Err.:{3:7.5f}, Test Accuracy:{4:7.5f}, Max Test Accuracy:{5:7.5f}\".format(\n", "# key,\n", "# i,\n", "# train_loss,\n", "# validation_loss,\n", "# test_accuracy,\n", "# max_test_accuracy_value[key]\n", "# ))\n", " print(i, end=\", \") " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f, axarr = plt.subplots(2, 2, figsize=(20, 12))\n", "for key in markers.keys():\n", " axarr[0, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key)\n", "axarr[0, 0].set_ylabel('Train - Total Error')\n", "axarr[0, 0].set_xlabel('Epochs')\n", "axarr[0, 0].grid(True)\n", "axarr[0, 0].set_title('Train Error')\n", "axarr[0, 0].legend(loc='upper right')\n", "\n", "for key in markers.keys():\n", " axarr[0, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key)\n", "axarr[0, 1].set_ylabel('Validation - Total Error')\n", "axarr[0, 1].set_xlabel('Epochs')\n", "axarr[0, 1].grid(True)\n", "axarr[0, 1].set_title('Validation Error')\n", "axarr[0, 1].legend(loc='upper right')\n", "\n", "for key in markers.keys():\n", " axarr[1, 0].plot(epoch_list[1:], train_errors[key][1:], marker=markers[key], markevery=2, label=key)\n", "axarr[1, 0].set_ylabel('Train - Total Error')\n", "axarr[1, 0].set_xlabel('Epochs')\n", "axarr[1, 0].grid(True)\n", "axarr[1, 0].set_ylim(2.25, 2.4)\n", "axarr[1, 0].set_title('Train Error (2.25 ~ 2.4)')\n", "axarr[1, 0].legend(loc='upper right')\n", "\n", "for key in markers.keys():\n", " axarr[1, 1].plot(epoch_list[1:], validation_errors[key][1:], marker=markers[key], markevery=2, label=key)\n", "axarr[1, 1].set_ylabel('Validation - Total Error')\n", "axarr[1, 1].set_xlabel('Epochs')\n", "axarr[1, 1].grid(True)\n", "axarr[1, 1].set_ylim(2.25, 2.4)\n", "axarr[1, 1].set_title('Validation Error (2.25 ~ 2.4)')\n", "axarr[1, 1].legend(loc='upper right')\n", "\n", "f.subplots_adjust(hspace=0.3)\n", "\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "f, axarr = plt.subplots(2, 1, figsize=(15,10))\n", "for key in markers.keys():\n", " axarr[0].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key)\n", "axarr[0].set_ylabel('Test Accuracy')\n", "axarr[0].set_xlabel('Epochs')\n", "axarr[0].grid(True)\n", "axarr[0].set_title('Test Accuracy')\n", "axarr[0].legend(loc='lower right')\n", "\n", "for key in markers.keys():\n", " axarr[1].plot(epoch_list[1:], test_accuracy_values[key][1:], marker=markers[key], markevery=1, label=key)\n", "axarr[1].set_ylabel('Test Accuracy')\n", "axarr[1].set_xlabel('Epochs')\n", "axarr[1].grid(True)\n", "axarr[1].set_ylim(0.94, 0.99)\n", "axarr[1].set_title('Test Accuracy (0.7 ~ 1.0)')\n", "axarr[1].legend(loc='lower right')\n", "\n", "f.subplots_adjust(hspace=0.3)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for key in markers.keys():\n", " print(\"{0:26s} - Epoch:{1:3d}, Max Test Accuracy: {2:7.5f}\".format(key, max_test_accuracy_epoch[key], max_test_accuracy_value[key]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 0 }