{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Backpropagation Neural Network\n", "\n", "[Question](http://stats.stackexchange.com/questions/184483/backpropagation-neural-network-training)\n", "\n" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "untrained\n", "weights\n", "[array([[ 1., 1.],\n", " [ 1., 1.],\n", " [ 1., 1.],\n", " [ 1., 1.],\n", " [ 1., 1.]]), array([[ 1., 1., 1.],\n", " [ 1., 1., 1.]]), array([[ 1., 1., 1., 1.],\n", " [ 1., 1., 1., 1.],\n", " [ 1., 1., 1., 1.]]), array([[ 1., 1., 1.],\n", " [ 1., 1., 1.],\n", " [ 1., 1., 1.],\n", " [ 1., 1., 1.]]), array([[ 1., 1., 1.],\n", " [ 1., 1., 1.],\n", " [ 1., 1., 1.],\n", " [ 1., 1., 1.]])]\n" ] }, { "ename": "ValueError", "evalue": "shapes (3,) and (5,2) not aligned: 3 (dim 0) != 5 (dim 0)", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 120\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"untrained\\nweights\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 121\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 122\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"return\\n\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_data_set\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 123\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 124\u001b[0m \u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_data_set\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0moutput_data_set\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36mpredict\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m 103\u001b[0m \u001b[0minputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mones\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mT\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 104\u001b[0m \u001b[0mactivations\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__feed_forward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 106\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 107\u001b[0m \u001b[1;31m# ---------------------------------------------------------------------------\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;32m\u001b[0m in \u001b[0;36m__feed_forward\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m 70\u001b[0m \u001b[1;32myield\u001b[0m \u001b[0mactivation\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 72\u001b[1;33m \u001b[0mactivation\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mheaviside\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactivation\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 73\u001b[0m \u001b[1;32myield\u001b[0m \u001b[0mactivation\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mValueError\u001b[0m: shapes (3,) and (5,2) not aligned: 3 (dim 0) != 5 (dim 0)" ] } ], "source": [ "#!/usr/bin/env python\n", "\n", "\"\"\"\n", " This module consits of a Back Propogation Network.\n", "\n", " Author : Goutam - TAGA Labs\n", " Date : 2015-11-27 18:54:33\n", "\"\"\"\n", "\n", "import numpy as np # For matrix and other linear algebra calcuations \n", "from scipy.special import expit\n", "import sys\n", "\n", "def expit_prime(x):\n", " return expit(x)*(1-expit(x))\n", "def heaviside(x):\n", " return np.array([0 if xx < 0 else 1 for xx in x])\n", "\n", "class BackpropagationNetwork(object):\n", " \"\"\" \n", " A back Propogation Neural Network.\n", " \"\"\"\n", "\n", " def __init__(self,shape):\n", " \"\"\" \n", " Constructs the network of given ``shape``\n", "\n", " It initializes the weights of the network\n", " \"\"\"\n", "\n", " self.shape = shape \n", " self.weights = list()\n", "\n", "\n", " for i in range(1,len(shape)-1):\n", " # Weights of input & hidden layer\n", " weight = np.ones((shape[i-1] + 1,shape[i] + 1))\n", " #weight = np.random.random((shape[i-1] + 1,shape[i] + 1)) # Initializing weights with bias unit\n", " self.weights.append(weight)\n", " else:\n", " # Weights of output layer\n", " weight = np.ones(((shape[i-1] + 1,shape[i] + 1)))\n", " #weight = np.random.random((shape[i] + 1,shape[i+1])) # No bias unit for output layer\n", " self.weights.append(weight)\n", "\n", " def train(self,input_data_set,output_data_set,epochs=10000):\n", " \"\"\"\n", " Trains the network with given training data set\n", "\n", "\n", " Arguements:\n", " input_data_set -- input training data set\n", " output_data_set -- output training data set\n", " \"\"\"\n", " ones = np.atleast_2d(np.ones(input_data_set.shape[0]))\n", " input_data_set = np.concatenate((ones.T, input_data_set), axis=1)\n", " for i in range(epochs):\n", "\n", " for inputs, outputs in zip(input_data_set, output_data_set):\n", " activations = list(self.__feed_forward(inputs))\n", " deltas = self.__backpropagate(activations,outputs)\n", " self.__update_weights(deltas,activations)\n", "\n", " self.activations = activations\n", " self.deltas = deltas\n", "\n", " def __feed_forward(self,inputs):\n", " # Uses feed forward mechanism to calculate activations\n", " activation = inputs\n", " yield activation\n", " for weight in self.weights:\n", " activation = heaviside(activation.dot(weight))\n", " yield activation\n", "\n", " def __backpropagate(self,activations,outputs):\n", " # Uses back propagation algorithm to calculate deltas\n", "\n", " error = outputs - activations[-1] # Error of the last layer\n", " delta = error * expit_prime(activations[-1]) # Error delta \n", "\n", " deltas = [delta] # List to store generated deltas\n", "\n", " for (weight,activation) in zip(reversed(self.weights[1:]),reversed(activations[1:-1])):\n", " delta = delta.dot(weight.T) * expit_prime(activation)\n", " deltas.append(delta)\n", "\n", " deltas.reverse()\n", " return deltas\n", "\n", " def __update_weights(self,deltas,activations):\n", " # Uses gradient descent to update the weights of the network\n", " for (weight,delta,activation) in zip(self.weights,deltas,activations[:-1]):\n", " activation = np.atleast_2d(activation)\n", " delta = np.atleast_2d(delta)\n", " weight += activation.T.dot(delta)\n", "\n", " def predict(self,inputs):\n", " \"\"\"\n", " Predicts output for given ``inputs``\n", "\n", " Returns list of outputs \n", " \"\"\"\n", " inputs = np.concatenate((np.ones(1).T, np.array(inputs)))\n", " activations = self.__feed_forward(inputs)\n", " return list(activations)\n", "\n", "# ---------------------------------------------------------------------------\n", "\n", "np.random.seed(0)\n", "\n", "bpn = BackpropagationNetwork((4,1,2,3,2,1))\n", "\n", "input_data_set = np.array((\n", " (0,0),\n", " (0,1),\n", " (1,0),\n", " (1,1)\n", " ))\n", "output_data_set = np.array((0,1,1,0))\n", "print(\"untrained\\nweights\")\n", "print(bpn.weights)\n", "print(\"return\\n\",bpn.predict(input_data_set[1]))\n", "\n", "bpn.train(input_data_set,output_data_set, epochs=1000)\n", "\n", "print(\"trained\\nweights\\n\",bpn.weights)\n", "\n", "print(bpn.predict(input_data_set[1]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.0" } }, "nbformat": 4, "nbformat_minor": 0 }