{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Programming Exercise 4 - Neural Networks Learning"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"# %load ../../../standard_import.txt\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# load MATLAB files\n",
"from scipy.io import loadmat\n",
"\n",
"pd.set_option('display.notebook_repr_html', False)\n",
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.max_rows', 150)\n",
"pd.set_option('display.max_seq_items', None)\n",
" \n",
"#%config InlineBackend.figure_formats = {'pdf',}\n",
"%matplotlib inline\n",
"\n",
"import seaborn as sns\n",
"sns.set_context('notebook')\n",
"sns.set_style('darkgrid')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Load MATLAB datafiles"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['X', '__header__', 'y', '__globals__', '__version__'])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = loadmat('data/ex4data1.mat')\n",
"data.keys()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"X: (5000, 401) (with intercept)\n",
"y: (5000, 1)\n"
]
}
],
"source": [
"y = data['y']\n",
"# Add intercept\n",
"X = np.c_[np.ones((data['X'].shape[0],1)), data['X']]\n",
"\n",
"print('X:',X.shape, '(with intercept)')\n",
"print('y:',y.shape)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"dict_keys(['Theta2', '__header__', 'Theta1', '__globals__', '__version__'])"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"weights = loadmat('data/ex3weights.mat')\n",
"weights.keys()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"theta1 : (25, 401)\n",
"theta2 : (10, 26)\n",
"params : (10285,)\n"
]
}
],
"source": [
"theta1, theta2 = weights['Theta1'], weights['Theta2']\n",
"print('theta1 :', theta1.shape)\n",
"print('theta2 :', theta2.shape)\n",
"params = np.r_[theta1.ravel(), theta2.ravel()]\n",
"print('params :', params.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Neural Network\n",
"Input layer size = 400 (20x20 pixels)
\n",
"Hidden layer size = 25
\n",
"Number of labels = 10"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Neural Networks - Feed Forward and Cost Function"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def sigmoid(z):\n",
" return(1 / (1 + np.exp(-z)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Sigmoid gradient\n",
"#### $$ g'(z) = g(z)(1 - g(z))$$\n",
"where $$ g(z) = \\frac{1}{1+e^{-z}}$$"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def sigmoidGradient(z):\n",
" return(sigmoid(z)*(1-sigmoid(z)))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Cost Function \n",
"#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\big[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\big]$$\n",
"\n",
"#### Regularized Cost Function\n",
"#### $$ J(\\theta) = \\frac{1}{m}\\sum_{i=1}^{m}\\sum_{k=1}^{K}\\bigg[-y^{(i)}_{k}\\, log\\,(( h_\\theta\\,(x^{(i)}))_k)-(1-y^{(i)}_k)\\,log\\,(1-h_\\theta(x^{(i)}))_k)\\bigg] + \\frac{\\lambda}{2m}\\bigg[\\sum_{j=1}^{25}\\sum_{k=1}^{400}(\\Theta_{j,k}^{(1)})^2+\\sum_{j=1}^{10}\\sum_{k=1}^{25}(\\Theta_{j,k}^{(2)})^2\\bigg]$$"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, features, classes, reg):\n",
" \n",
" # When comparing to Octave code note that Python uses zero-indexed arrays.\n",
" # But because Numpy indexing does not include the right side, the code is the same anyway.\n",
" theta1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))].reshape(hidden_layer_size,(input_layer_size+1))\n",
" theta2 = nn_params[(hidden_layer_size*(input_layer_size+1)):].reshape(num_labels,(hidden_layer_size+1))\n",
"\n",
" m = features.shape[0]\n",
" y_matrix = pd.get_dummies(classes.ravel()).as_matrix() \n",
" \n",
" # Cost\n",
" a1 = features # 5000x401\n",
" \n",
" z2 = theta1.dot(a1.T) # 25x401 * 401x5000 = 25x5000 \n",
" a2 = np.c_[np.ones((features.shape[0],1)),sigmoid(z2.T)] # 5000x26 \n",
" \n",
" z3 = theta2.dot(a2.T) # 10x26 * 26x5000 = 10x5000 \n",
" a3 = sigmoid(z3) # 10x5000\n",
" \n",
" J = -1*(1/m)*np.sum((np.log(a3.T)*(y_matrix)+np.log(1-a3).T*(1-y_matrix))) + \\\n",
" (reg/(2*m))*(np.sum(np.square(theta1[:,1:])) + np.sum(np.square(theta2[:,1:])))\n",
"\n",
" # Gradients\n",
" d3 = a3.T - y_matrix # 5000x10\n",
" d2 = theta2[:,1:].T.dot(d3.T)*sigmoidGradient(z2) # 25x10 *10x5000 * 25x5000 = 25x5000\n",
" \n",
" delta1 = d2.dot(a1) # 25x5000 * 5000x401 = 25x401\n",
" delta2 = d3.T.dot(a2) # 10x5000 *5000x26 = 10x26\n",
" \n",
" theta1_ = np.c_[np.ones((theta1.shape[0],1)),theta1[:,1:]]\n",
" theta2_ = np.c_[np.ones((theta2.shape[0],1)),theta2[:,1:]]\n",
" \n",
" theta1_grad = delta1/m + (theta1_*reg)/m\n",
" theta2_grad = delta2/m + (theta2_*reg)/m\n",
" \n",
" return(J, theta1_grad, theta2_grad)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.28762916516131892"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Regularization parameter = 0\n",
"nnCostFunction(params, 400, 25, 10, X, y, 0)[0]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0.38376985909092365"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Regularization parameter = 1\n",
"nnCostFunction(params, 400, 25, 10, X, y, 1)[0]"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[0.19661193324148185,\n",
" 0.23500371220159449,\n",
" 0.25,\n",
" 0.23500371220159449,\n",
" 0.19661193324148185]"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[sigmoidGradient(z) for z in [-1, -0.5, 0, 0.5, 1]]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.4.3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}