{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Programming Exercise 4 - Neural Networks Learning" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# %load ../../../standard_import.txt\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "\n", "# load MATLAB files\n", "from scipy.io import loadmat\n", "\n", "pd.set_option('display.notebook_repr_html', False)\n", "pd.set_option('display.max_columns', None)\n", "pd.set_option('display.max_rows', 150)\n", "pd.set_option('display.max_seq_items', None)\n", " \n", "#%config InlineBackend.figure_formats = {'pdf',}\n", "%matplotlib inline\n", "\n", "import seaborn as sns\n", "sns.set_context('notebook')\n", "sns.set_style('darkgrid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Load MATLAB datafiles" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['X', '__header__', 'y', '__globals__', '__version__'])" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = loadmat('data/ex4data1.mat')\n", "data.keys()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "X: (5000, 401) (with intercept)\n", "y: (5000, 1)\n" ] } ], "source": [ "y = data['y']\n", "# Add intercept\n", "X = np.c_[np.ones((data['X'].shape[0],1)), data['X']]\n", "\n", "print('X:',X.shape, '(with intercept)')\n", "print('y:',y.shape)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "dict_keys(['Theta2', '__header__', 'Theta1', '__globals__', '__version__'])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "weights = loadmat('data/ex3weights.mat')\n", "weights.keys()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "theta1 : (25, 401)\n", "theta2 : (10, 26)\n", "params : (10285,)\n" ] } ], "source": [ "theta1, theta2 = weights['Theta1'], weights['Theta2']\n", "print('theta1 :', theta1.shape)\n", "print('theta2 :', theta2.shape)\n", "params = np.r_[theta1.ravel(), theta2.ravel()]\n", "print('params :', params.shape)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Neural Network\n", "Input layer size = 400 (20x20 pixels)
\n", "Hidden layer size = 25
\n", "Number of labels = 10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Neural Networks - Feed Forward and Cost Function" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def sigmoid(z):\n", " return(1 / (1 + np.exp(-z)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sigmoid gradient\n", "#### $$ g'(z) = g(z)(1 - g(z))$$\n", "where $$ g(z) = \\frac{1}{1+e^{-z}}$$" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def sigmoidGradient(z):\n", " return(sigmoid(z)*(1-sigmoid(z)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Cost Function \n", "#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\big[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\big]$$\n", "\n", "#### Regularized Cost Function\n", "#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\bigg[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\bigg] + \\frac{\\lambda}{2m}\\bigg[\\sum_{j=1}^{25}\\sum_{k=1}^{400}(\\Theta_{j,k}^{(1)})^2+\\sum_{j=1}^{10}\\sum_{k=1}^{25}(\\Theta_{j,k}^{(2)})^2\\bigg]$$" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg):\n", " \n", " # When comparing to Octave code note that Python uses zero-indexed arrays.\n", " # But because Numpy indexing does not include the right side, the code is the same anyway.\n", " theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))\n", " theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))\n", "\n", " m = features.shape[0]\n", " y_matrix = pd.get_dummies(classes.ravel()).as_matrix() \n", " \n", " # Cost\n", " a1 = features # 5000x401\n", " \n", " z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 \n", " a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 \n", " \n", " z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 \n", " a3 = sigmoid(z3) # 10x5000\n", " \n", " J = -1*(1/m)*np.sum((np.log(a3.T)*(y_matrix)+np.log(1-a3).T*(1-y_matrix))) + \\\n", " (reg/(2*m))*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))\n", "\n", " # Gradients\n", " d3 = a3.T - y_matrix # 5000x10\n", " d2 = theta2[:,1:].T.dot(d3.T)*sigmoidGradient(z2) # 25x10 *10x5000 * 25x5000 = 25x5000\n", " \n", " delta1 = d2.dot(a1) # 25x5000 * 5000x401 = 25x401\n", " delta2 = d3.T.dot(a2) # 10x5000 *5000x26 = 10x26\n", " \n", " theta1_ = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]\n", " theta2_ = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]\n", " \n", " theta1_grad = delta1/m + (theta1_*reg)/m\n", " theta2_grad = delta2/m + (theta2_*reg)/m\n", " \n", " return(J, theta1_grad, theta2_grad)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.28762916516131892" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Regularization parameter = 0\n", "nnCostFunction(params, 400, 25, 10, X, y, 0)[0]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.38376985909092365" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Regularization parameter = 1\n", "nnCostFunction(params, 400, 25, 10, X, y, 1)[0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[0.19661193324148185,\n", " 0.23500371220159449,\n", " 0.25,\n", " 0.23500371220159449,\n", " 0.19661193324148185]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[sigmoidGradient(z) for z in [-1, -0.5, 0, 0.5, 1]]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }