{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Backpropagation Neural Network\n",
    "\n",
    "[Question](http://stats.stackexchange.com/questions/184483/backpropagation-neural-network-training)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "untrained\n",
      "weights\n",
      "[array([[ 1.,  1.],\n",
      "       [ 1.,  1.],\n",
      "       [ 1.,  1.],\n",
      "       [ 1.,  1.],\n",
      "       [ 1.,  1.]]), array([[ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.]]), array([[ 1.,  1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.,  1.]]), array([[ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.]]), array([[ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.],\n",
      "       [ 1.,  1.,  1.]])]\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "shapes (3,) and (5,2) not aligned: 3 (dim 0) != 5 (dim 0)",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-33-7ab0a6ac84f9>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m    120\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"untrained\\nweights\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    121\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 122\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"return\\n\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_data_set\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    123\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    124\u001b[0m \u001b[0mbpn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minput_data_set\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0moutput_data_set\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1000\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-33-7ab0a6ac84f9>\u001b[0m in \u001b[0;36mpredict\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m    103\u001b[0m         \u001b[0minputs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mones\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mT\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    104\u001b[0m         \u001b[0mactivations\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m__feed_forward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 105\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactivations\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    106\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    107\u001b[0m \u001b[1;31m# ---------------------------------------------------------------------------\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-33-7ab0a6ac84f9>\u001b[0m in \u001b[0;36m__feed_forward\u001b[1;34m(self, inputs)\u001b[0m\n\u001b[0;32m     70\u001b[0m         \u001b[1;32myield\u001b[0m \u001b[0mactivation\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     71\u001b[0m         \u001b[1;32mfor\u001b[0m \u001b[0mweight\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mweights\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 72\u001b[1;33m             \u001b[0mactivation\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mheaviside\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mactivation\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdot\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mweight\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     73\u001b[0m             \u001b[1;32myield\u001b[0m \u001b[0mactivation\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     74\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mValueError\u001b[0m: shapes (3,) and (5,2) not aligned: 3 (dim 0) != 5 (dim 0)"
     ]
    }
   ],
   "source": [
    "#!/usr/bin/env python\n",
    "\n",
    "\"\"\"\n",
    "    This module consits of a Back Propogation Network.\n",
    "\n",
    "    Author : Goutam - TAGA Labs\n",
    "    Date : 2015-11-27 18:54:33\n",
    "\"\"\"\n",
    "\n",
    "import numpy as np          # For matrix and other linear algebra calcuations \n",
    "from scipy.special import expit\n",
    "import sys\n",
    "\n",
    "def expit_prime(x):\n",
    "    return expit(x)*(1-expit(x))\n",
    "def heaviside(x):\n",
    "    return np.array([0 if xx < 0 else 1 for xx in x])\n",
    "\n",
    "class BackpropagationNetwork(object):\n",
    "    \"\"\"    \n",
    "    A back Propogation Neural Network.\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self,shape):\n",
    "        \"\"\" \n",
    "        Constructs  the network of given ``shape``\n",
    "\n",
    "        It initializes the weights of the network\n",
    "        \"\"\"\n",
    "\n",
    "        self.shape = shape              \n",
    "        self.weights = list()\n",
    "\n",
    "\n",
    "        for i in range(1,len(shape)-1):\n",
    "            # Weights of input & hidden layer\n",
    "            weight = np.ones((shape[i-1] + 1,shape[i] + 1))\n",
    "            #weight = np.random.random((shape[i-1] + 1,shape[i] + 1))  # Initializing weights with bias unit\n",
    "            self.weights.append(weight)\n",
    "        else:\n",
    "            # Weights of output layer\n",
    "            weight = np.ones(((shape[i-1] + 1,shape[i] + 1)))\n",
    "            #weight = np.random.random((shape[i] + 1,shape[i+1]))      # No bias unit for output layer\n",
    "            self.weights.append(weight)\n",
    "\n",
    "    def train(self,input_data_set,output_data_set,epochs=10000):\n",
    "        \"\"\"\n",
    "        Trains the network with given training data set\n",
    "\n",
    "\n",
    "        Arguements:\n",
    "        input_data_set -- input training data set\n",
    "        output_data_set -- output training  data set\n",
    "        \"\"\"\n",
    "        ones = np.atleast_2d(np.ones(input_data_set.shape[0]))\n",
    "        input_data_set = np.concatenate((ones.T, input_data_set), axis=1)\n",
    "        for i in range(epochs):\n",
    "\n",
    "            for inputs, outputs in zip(input_data_set, output_data_set):\n",
    "                activations = list(self.__feed_forward(inputs))\n",
    "                deltas = self.__backpropagate(activations,outputs)\n",
    "                self.__update_weights(deltas,activations)\n",
    "\n",
    "        self.activations = activations\n",
    "        self.deltas = deltas\n",
    "\n",
    "    def __feed_forward(self,inputs):\n",
    "        # Uses feed forward mechanism to calculate activations\n",
    "        activation = inputs\n",
    "        yield activation\n",
    "        for weight in self.weights:\n",
    "            activation = heaviside(activation.dot(weight))\n",
    "            yield activation\n",
    "\n",
    "    def __backpropagate(self,activations,outputs):\n",
    "        # Uses back propagation algorithm to calculate deltas\n",
    "\n",
    "        error = outputs - activations[-1]                           # Error of the last layer\n",
    "        delta = error * expit_prime(activations[-1])   # Error delta \n",
    "\n",
    "        deltas = [delta]                                            # List to store generated deltas\n",
    "\n",
    "        for (weight,activation) in zip(reversed(self.weights[1:]),reversed(activations[1:-1])):\n",
    "            delta = delta.dot(weight.T) * expit_prime(activation)\n",
    "            deltas.append(delta)\n",
    "\n",
    "        deltas.reverse()\n",
    "        return deltas\n",
    "\n",
    "    def __update_weights(self,deltas,activations):\n",
    "        # Uses gradient descent to update the weights of the network\n",
    "        for (weight,delta,activation) in zip(self.weights,deltas,activations[:-1]):\n",
    "            activation = np.atleast_2d(activation)\n",
    "            delta = np.atleast_2d(delta)\n",
    "            weight += activation.T.dot(delta)\n",
    "\n",
    "    def predict(self,inputs):\n",
    "        \"\"\"\n",
    "        Predicts output for given ``inputs``\n",
    "\n",
    "        Returns list  of outputs \n",
    "        \"\"\"\n",
    "        inputs = np.concatenate((np.ones(1).T, np.array(inputs)))\n",
    "        activations = self.__feed_forward(inputs)\n",
    "        return list(activations)\n",
    "\n",
    "# ---------------------------------------------------------------------------\n",
    "\n",
    "np.random.seed(0)\n",
    "\n",
    "bpn = BackpropagationNetwork((4,1,2,3,2,1))\n",
    "\n",
    "input_data_set = np.array((\n",
    "            (0,0),\n",
    "            (0,1),\n",
    "            (1,0),\n",
    "            (1,1)\n",
    "            ))\n",
    "output_data_set = np.array((0,1,1,0))\n",
    "print(\"untrained\\nweights\")\n",
    "print(bpn.weights)\n",
    "print(\"return\\n\",bpn.predict(input_data_set[1]))\n",
    "\n",
    "bpn.train(input_data_set,output_data_set, epochs=1000)\n",
    "\n",
    "print(\"trained\\nweights\\n\",bpn.weights)\n",
    "\n",
    "print(bpn.predict(input_data_set[1]))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}