{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Programming Exercise 4 - Neural Networks Learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# %load ../../../standard_import.txt\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "# load MATLAB files\n",
    "from scipy.io import loadmat\n",
    "\n",
    "pd.set_option('display.notebook_repr_html', False)\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.max_rows', 150)\n",
    "pd.set_option('display.max_seq_items', None)\n",
    " \n",
    "#%config InlineBackend.figure_formats = {'pdf',}\n",
    "%matplotlib inline\n",
    "\n",
    "import seaborn as sns\n",
    "sns.set_context('notebook')\n",
    "sns.set_style('darkgrid')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Load MATLAB datafiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['X', '__header__', 'y', '__globals__', '__version__'])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = loadmat('data/ex4data1.mat')\n",
    "data.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "X: (5000, 401) (with intercept)\n",
      "y: (5000, 1)\n"
     ]
    }
   ],
   "source": [
    "y = data['y']\n",
    "# Add intercept\n",
    "X = np.c_[np.ones((data['X'].shape[0],1)), data['X']]\n",
    "\n",
    "print('X:',X.shape, '(with intercept)')\n",
    "print('y:',y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['Theta2', '__header__', 'Theta1', '__globals__', '__version__'])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "weights = loadmat('data/ex3weights.mat')\n",
    "weights.keys()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "theta1 : (25, 401)\n",
      "theta2 : (10, 26)\n",
      "params : (10285,)\n"
     ]
    }
   ],
   "source": [
    "theta1, theta2 = weights['Theta1'], weights['Theta2']\n",
    "print('theta1 :', theta1.shape)\n",
    "print('theta2 :', theta2.shape)\n",
    "params = np.r_[theta1.ravel(), theta2.ravel()]\n",
    "print('params :', params.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Neural Network\n",
    "Input layer size = 400 (20x20 pixels) <br>\n",
    "Hidden layer size = 25 <br>\n",
    "Number of labels = 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Neural Networks - Feed Forward and Cost Function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def sigmoid(z):\n",
    "    return(1 / (1 + np.exp(-z)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Sigmoid gradient\n",
    "#### $$ g'(z) = g(z)(1 - g(z))$$\n",
    "where $$ g(z) = \\frac{1}{1+e^{-z}}$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def sigmoidGradient(z):\n",
    "    return(sigmoid(z)*(1-sigmoid(z)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Cost Function \n",
    "#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\big[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\big]$$\n",
    "\n",
    "#### Regularized Cost Function\n",
    "#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\bigg[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\bigg] + \\frac{\\lambda}{2m}\\bigg[\\sum_{j=1}^{25}\\sum_{k=1}^{400}(\\Theta_{j,k}^{(1)})^2+\\sum_{j=1}^{10}\\sum_{k=1}^{25}(\\Theta_{j,k}^{(2)})^2\\bigg]$$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg):\n",
    "    \n",
    "    # When comparing to Octave code note that Python uses zero-indexed arrays.\n",
    "    # But because Numpy indexing does not include the right side, the code is the same anyway.\n",
    "    theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))\n",
    "    theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))\n",
    "\n",
    "    m = features.shape[0]\n",
    "    y_matrix = pd.get_dummies(classes.ravel()).as_matrix() \n",
    "    \n",
    "    # Cost\n",
    "    a1 = features # 5000x401\n",
    "        \n",
    "    z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 \n",
    "    a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 \n",
    "    \n",
    "    z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 \n",
    "    a3 = sigmoid(z3) # 10x5000\n",
    "    \n",
    "    J = -1*(1/m)*np.sum((np.log(a3.T)*(y_matrix)+np.log(1-a3).T*(1-y_matrix))) + \\\n",
    "        (reg/(2*m))*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))\n",
    "\n",
    "    # Gradients\n",
    "    d3 = a3.T - y_matrix # 5000x10\n",
    "    d2 = theta2[:,1:].T.dot(d3.T)*sigmoidGradient(z2) # 25x10 *10x5000 * 25x5000 = 25x5000\n",
    "    \n",
    "    delta1 = d2.dot(a1) # 25x5000 * 5000x401 = 25x401\n",
    "    delta2 = d3.T.dot(a2) # 10x5000 *5000x26 = 10x26\n",
    "    \n",
    "    theta1_ = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]\n",
    "    theta2_ = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]\n",
    "    \n",
    "    theta1_grad = delta1/m + (theta1_*reg)/m\n",
    "    theta2_grad = delta2/m + (theta2_*reg)/m\n",
    "    \n",
    "    return(J, theta1_grad, theta2_grad)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.28762916516131892"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Regularization parameter = 0\n",
    "nnCostFunction(params, 400, 25, 10, X, y, 0)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.38376985909092365"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Regularization parameter = 1\n",
    "nnCostFunction(params, 400, 25, 10, X, y, 1)[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.19661193324148185,\n",
       " 0.23500371220159449,\n",
       " 0.25,\n",
       " 0.23500371220159449,\n",
       " 0.19661193324148185]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[sigmoidGradient(z) for z in [-1, -0.5, 0, 0.5, 1]]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}