{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Backpropagation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class MulLayer:\n",
    "    def __init__(self):\n",
    "        self.x = None\n",
    "        self.y = None\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        self.x = x\n",
    "        self.y = y                \n",
    "        out = x * y\n",
    "\n",
    "        return out\n",
    "\n",
    "    def backward(self, din):\n",
    "        dx = din * self.y\n",
    "        dy = din * self.x\n",
    "\n",
    "        return dx, dy\n",
    "\n",
    "\n",
    "class AddLayer:\n",
    "    def __init__(self):\n",
    "        pass\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        out = x + y\n",
    "\n",
    "        return out\n",
    "\n",
    "    def backward(self, din):\n",
    "        dx = din * 1\n",
    "        dy = din * 1\n",
    "\n",
    "        return dx, dy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "price: 220\n",
      "\n",
      "dapple_price: 1.1\n",
      "dtax: 200\n",
      "\n",
      "dapple: 2.2\n",
      "dapple_num: 110\n"
     ]
    }
   ],
   "source": [
    "apple = 100\n",
    "apple_num = 2\n",
    "tax = 1.1\n",
    "\n",
    "mul_apple_layer = MulLayer()\n",
    "mul_tax_layer = MulLayer()\n",
    "\n",
    "# forward\n",
    "apple_price = mul_apple_layer.forward(apple, apple_num)\n",
    "price = mul_tax_layer.forward(apple_price, tax)\n",
    "print(\"price:\", int(price), end=\"\\n\\n\")\n",
    "\n",
    "# backward\n",
    "dprice = 1\n",
    "dapple_price, dtax = mul_tax_layer.backward(dprice)\n",
    "print(\"dapple_price:\", dapple_price)\n",
    "print(\"dtax:\", dtax, end=\"\\n\\n\")\n",
    "\n",
    "dapple, dapple_num = mul_apple_layer.backward(dapple_price)\n",
    "print(\"dapple:\", dapple)\n",
    "print(\"dapple_num:\", int(dapple_num))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Affine 계층"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 110,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2 (2, 3)\n",
      "[[1 2 3]\n",
      " [4 5 6]]\n",
      "\n",
      "21\n",
      "[5 7 9]\n",
      "[ 6 15]\n"
     ]
    }
   ],
   "source": [
    "a = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "print(a.ndim, a.shape)\n",
    "print(a)\n",
    "print()\n",
    "print(np.sum(a))\n",
    "print(np.sum(a, axis=0)) # Axis along which a sum is performed.\n",
    "print(np.sum(a, axis=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 119,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Affine:\n",
    "    def __init__(self, W, b):\n",
    "        self.W = W\n",
    "        self.b = b        \n",
    "        self.x = None\n",
    "        self.dW = None\n",
    "        self.db = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        self.x = x\n",
    "        out = np.dot(self.x, self.W) + self.b\n",
    "        return out\n",
    "\n",
    "    def backward(self, din):\n",
    "        dx = np.dot(din, self.W.T)\n",
    "        self.dW = np.dot(self.x.T, din)\n",
    "        self.db = np.sum(din, axis=0)\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x.shape: (1, 2), W.shape: (2, 3), b.shape: (3,)\n",
      "\n",
      "out: [[16 20 24]], out.shape: (1, 3)\n",
      "\n",
      "dx.shape: (1, 2), dW.shape: (2, 3), db.shape: (3,)\n",
      "dx: [[ 6 15]]\n",
      "affine.dW: \n",
      "[[1 1 1]\n",
      " [2 2 2]]\n",
      "affine.db: [1 1 1]\n"
     ]
    }
   ],
   "source": [
    "x = np.array([[1, 2]])\n",
    "W = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "b = np.array([7, 8, 9])\n",
    "print(\"x.shape: {0}, W.shape: {1}, b.shape: {2}\".format(x.shape, W.shape, b.shape))\n",
    "print()\n",
    "\n",
    "affine = Affine(W, b)\n",
    "out = affine.forward(x)\n",
    "print(\"out: {0}, out.shape: {1}\".format(out, out.shape))\n",
    "print()\n",
    "\n",
    "din = np.ones_like(out) # [[1, 1, 1]]\n",
    "dx = affine.backward(din)\n",
    "print(\"dx.shape: {0}, dW.shape: {1}, db.shape: {2}\".format(dx.shape, affine.dW.shape, affine.db.shape))\n",
    "print(\"dx: {0}\".format(dx))\n",
    "print(\"affine.dW: \\n{0}\".format(affine.dW))\n",
    "print(\"affine.db: {0}\".format(affine.db))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 141,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "x.shape: (2, 2), W.shape: (2, 3), b.shape: (3,)\n",
      "\n",
      "out: \n",
      "[[16 20 24]\n",
      " [25 32 39]]\n",
      "out.shape: (2, 3)\n",
      "\n",
      "dx.shape: (2, 2), dW.shape: (2, 3), db.shape: (3,)\n",
      "dx: [[ 6 15]\n",
      " [ 6 15]]\n",
      "affine.dW: \n",
      "[[3 3 3]\n",
      " [6 6 6]]\n",
      "affine.db: [2 2 2]\n"
     ]
    }
   ],
   "source": [
    "x = np.array([[1, 2], [2, 4]])\n",
    "W = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "b = np.array([7, 8, 9])\n",
    "print(\"x.shape: {0}, W.shape: {1}, b.shape: {2}\".format(x.shape, W.shape, b.shape))\n",
    "print()\n",
    "\n",
    "affine = Affine(W, b)\n",
    "out = affine.forward(x)\n",
    "print(\"out: \\n{0}\\nout.shape: {1}\".format(out, out.shape))\n",
    "print()\n",
    "\n",
    "din = np.ones_like(out)\n",
    "dx = affine.backward(din)\n",
    "print(\"dx.shape: {0}, dW.shape: {1}, db.shape: {2}\".format(dx.shape, affine.dW.shape, affine.db.shape))\n",
    "print(\"dx: {0}\".format(dx))\n",
    "print(\"affine.dW: \\n{0}\".format(affine.dW))\n",
    "print(\"affine.db: {0}\".format(affine.db))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "def softmax(x):\n",
    "    if x.ndim == 2:\n",
    "        x = x.T\n",
    "        x = x - np.max(x, axis=0)\n",
    "        y = np.exp(x) / np.sum(np.exp(x), axis=0)\n",
    "        return y.T \n",
    "\n",
    "    x = x - np.max(x)\n",
    "    return np.exp(x) / np.sum(np.exp(x))\n",
    "\n",
    "def cross_entropy_error(y, t):\n",
    "    #print(y.shape, t.shape)\n",
    "    if y.ndim == 1:\n",
    "        y = y.reshape(1, y.size)\n",
    "        t = t.reshape(1, t.size)\n",
    "\n",
    "    if t.size == y.size:\n",
    "        t = t.argmax(axis=1)\n",
    "\n",
    "    batch_size = y.shape[0]\n",
    "    return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size\n",
    "\n",
    "class Relu:\n",
    "    def __init__(self):\n",
    "        self.mask = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        self.mask = (x <= 0)\n",
    "        out = x.copy()\n",
    "        out[self.mask] = 0\n",
    "        return out\n",
    "\n",
    "    def backward(self, din):\n",
    "        din[self.mask] = 0\n",
    "        dx = din\n",
    "        return dx\n",
    "\n",
    "class Sigmoid:\n",
    "    def __init__(self):\n",
    "        self.out = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        out = sigmoid(x)\n",
    "        self.out = out\n",
    "        return out\n",
    "\n",
    "    def backward(self, din):\n",
    "        dx = din * self.out * (1.0 - self.out)\n",
    "        return dx\n",
    "\n",
    "class SoftmaxWithLoss:\n",
    "    def __init__(self):\n",
    "        self.loss = None\n",
    "        self.y = None\n",
    "        self.t = None\n",
    "    \n",
    "    def forward(self, x, t):\n",
    "        self.t = t\n",
    "        self.y = softmax(x)\n",
    "        self.loss = cross_entropy_error(self.y, self.t)\n",
    "        return self.loss\n",
    "\n",
    "    def backward(self, din=1):\n",
    "        batch_size = self.t.shape[0]\n",
    "        dx = (self.y - self.t) / float(batch_size)        \n",
    "        return dx"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}