{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 誤差逆伝播法\n",
    "## 単純なレイヤの実装\n",
    "### 乗算レイヤの実装"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class MulLayer:\n",
    "    def __init__(self):\n",
    "        self.x = None\n",
    "        self.y = None\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        self.x = x\n",
    "        self.y = y                \n",
    "        out = x * y\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = dout * self.y\n",
    "        dy = dout * self.x\n",
    "        return dx, dy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "price: 220\n",
      "dApple: 2.2\n",
      "dApple_num: 110\n",
      "dTax: 200\n"
     ]
    }
   ],
   "source": [
    "# coding: utf-8\n",
    "import numpy as np\n",
    "from mymodule.layer_naive import *\n",
    "\n",
    "apple = 100\n",
    "apple_num = 2\n",
    "tax = 1.1\n",
    "\n",
    "mul_apple_layer = MulLayer()\n",
    "mul_tax_layer = MulLayer()\n",
    "\n",
    "# forward\n",
    "apple_price = mul_apple_layer.forward(apple, apple_num)\n",
    "price = mul_tax_layer.forward(apple_price, tax)\n",
    "\n",
    "# backward\n",
    "dprice = 1\n",
    "dapple_price, dtax = mul_tax_layer.backward(dprice)\n",
    "dapple, dapple_num = mul_apple_layer.backward(dapple_price)\n",
    "\n",
    "print(\"price:\", int(price))\n",
    "print(\"dApple:\", dapple)\n",
    "print(\"dApple_num:\", int(dapple_num))\n",
    "print(\"dTax:\", dtax)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 加算レイヤの実装"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class AddLayer:\n",
    "    def __init__(self):\n",
    "        pass\n",
    "\n",
    "    def forward(self, x, y):\n",
    "        out = x + y\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = dout * 1\n",
    "        dy = dout * 1\n",
    "        return dx, dy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "price: 715\n",
      "dApple: 2.2\n",
      "dApple_num: 110\n",
      "dOrange: 3.3000000000000003\n",
      "dOrange_num: 165\n",
      "dTax: 650\n"
     ]
    }
   ],
   "source": [
    "# coding: utf-8\n",
    "import numpy as np\n",
    "from mymodule.layer_naive import *\n",
    "\n",
    "apple = 100\n",
    "apple_num = 2\n",
    "orange = 150\n",
    "orange_num = 3\n",
    "tax = 1.1\n",
    "\n",
    "# layer\n",
    "mul_apple_layer = MulLayer()\n",
    "mul_orange_layer = MulLayer()\n",
    "add_apple_orange_layer = AddLayer()\n",
    "mul_tax_layer = MulLayer()\n",
    "\n",
    "# forward\n",
    "apple_price = mul_apple_layer.forward(apple, apple_num)  # (1)\n",
    "orange_price = mul_orange_layer.forward(orange, orange_num)  # (2)\n",
    "all_price = add_apple_orange_layer.forward(apple_price, orange_price)  # (3)\n",
    "price = mul_tax_layer.forward(all_price, tax)  # (4)\n",
    "\n",
    "# backward\n",
    "dprice = 1\n",
    "dall_price, dtax = mul_tax_layer.backward(dprice)  # (4)\n",
    "dapple_price, dorange_price = add_apple_orange_layer.backward(dall_price)  # (3)\n",
    "dorange, dorange_num = mul_orange_layer.backward(dorange_price)  # (2)\n",
    "dapple, dapple_num = mul_apple_layer.backward(dapple_price)  # (1)\n",
    "\n",
    "print(\"price:\", int(price))\n",
    "print(\"dApple:\", dapple)\n",
    "print(\"dApple_num:\", int(dapple_num))\n",
    "print(\"dOrange:\", dorange)\n",
    "print(\"dOrange_num:\", int(dorange_num))\n",
    "print(\"dTax:\", dtax)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 活性化関数レイヤの実装\n",
    "### ReLUレイヤ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[False  True]\n",
      " [ True False]]\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "x = np.array([[1.0, -0.5], [-2.0, 3.0]])\n",
    "mask = (x <= 0)\n",
    "print(mask)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class Relu:\n",
    "    def __init__(self):\n",
    "        self.mask = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        self.mask = (x <= 0)\n",
    "        out = x.copy()\n",
    "        out[self.mask] = 0\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dout[self.mask] = 0\n",
    "        dx = dout\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Sigmoidレイヤ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class Sigmoid:\n",
    "    def __init__(self):\n",
    "        self.out = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        out = sigmoid(x)\n",
    "        self.out = out\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = dout * (1.0 - self.out) * self.out\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Affine / Softmax レイヤの実装\n",
    "### Affineレイヤ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "class Affine:\n",
    "    def __init__(self, W, b):\n",
    "        self.W =W\n",
    "        self.b = b\n",
    "        self.x = None\n",
    "        self.original_x_shape = None\n",
    "        # 重み・バイアスパラメータの微分\n",
    "        self.dW = None\n",
    "        self.db = None\n",
    "\n",
    "    def forward(self, x):\n",
    "        # テンソル対応\n",
    "        self.original_x_shape = x.shape\n",
    "        x = x.reshape(x.shape[0], -1)\n",
    "        self.x = x\n",
    "        out = np.dot(self.x, self.W) + self.b\n",
    "        return out\n",
    "\n",
    "    def backward(self, dout):\n",
    "        dx = np.dot(dout, self.W.T)\n",
    "        self.dW = np.dot(self.x.T, dout)\n",
    "        self.db = np.sum(dout, axis=0)\n",
    "        dx = dx.reshape(*self.original_x_shape)  # 入力データの形状に戻す（テンソル対応）\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Softmax-With-Lossレイヤ"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class SoftmaxWithLoss:\n",
    "    def __init__(self):\n",
    "        self.loss = None\n",
    "        self.y = None # softmaxの出力\n",
    "        self.t = None # 教師データ\n",
    "\n",
    "    def forward(self, x, t):\n",
    "        self.t = t\n",
    "        self.y = softmax(x)\n",
    "        self.loss = cross_entropy_error(self.y, self.t)\n",
    "        return self.loss\n",
    "\n",
    "    def backward(self, dout=1):\n",
    "        batch_size = self.t.shape[0]\n",
    "        if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合\n",
    "            dx = (self.y - self.t) / batch_size\n",
    "        else:\n",
    "            dx = self.y.copy()\n",
    "            dx[np.arange(batch_size), self.t] -= 1\n",
    "            dx = dx / batch_size\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 誤差逆伝播法の実装"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# coding: utf-8\n",
    "import sys, os\n",
    "sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定\n",
    "import numpy as np\n",
    "from mymodule.layers import *\n",
    "from mymodule.gradient import numerical_gradient\n",
    "from collections import OrderedDict\n",
    "\n",
    "\n",
    "class TwoLayerNet:\n",
    "\n",
    "    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):\n",
    "        # 重みの初期化\n",
    "        self.params = {}\n",
    "        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)\n",
    "        self.params['b1'] = np.zeros(hidden_size)\n",
    "        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) \n",
    "        self.params['b2'] = np.zeros(output_size)\n",
    "\n",
    "        # レイヤの生成\n",
    "        self.layers = OrderedDict()\n",
    "        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])\n",
    "        self.layers['Relu1'] = Relu()\n",
    "        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])\n",
    "\n",
    "        self.lastLayer = SoftmaxWithLoss()\n",
    "        \n",
    "    def predict(self, x):\n",
    "        for layer in self.layers.values():\n",
    "            x = layer.forward(x)\n",
    "        \n",
    "        return x\n",
    "        \n",
    "    # x:入力データ, t:教師データ\n",
    "    def loss(self, x, t):\n",
    "        y = self.predict(x)\n",
    "        return self.lastLayer.forward(y, t)\n",
    "    \n",
    "    def accuracy(self, x, t):\n",
    "        y = self.predict(x)\n",
    "        y = np.argmax(y, axis=1)\n",
    "        if t.ndim != 1 : t = np.argmax(t, axis=1)\n",
    "        \n",
    "        accuracy = np.sum(y == t) / float(x.shape[0])\n",
    "        return accuracy\n",
    "        \n",
    "    # x:入力データ, t:教師データ\n",
    "    def numerical_gradient(self, x, t):\n",
    "        loss_W = lambda W: self.loss(x, t)\n",
    "        \n",
    "        grads = {}\n",
    "        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])\n",
    "        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])\n",
    "        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])\n",
    "        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])\n",
    "        \n",
    "        return grads\n",
    "        \n",
    "    def gradient(self, x, t):\n",
    "        # forward\n",
    "        self.loss(x, t)\n",
    "\n",
    "        # backward\n",
    "        dout = 1\n",
    "        dout = self.lastLayer.backward(dout)\n",
    "        \n",
    "        layers = list(self.layers.values())\n",
    "        layers.reverse()\n",
    "        for layer in layers:\n",
    "            dout = layer.backward(dout)\n",
    "\n",
    "        # 設定\n",
    "        grads = {}\n",
    "        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db\n",
    "        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db\n",
    "\n",
    "        return grads"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 誤差逆伝播法の勾配確認"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "W1:2.65339253248e-13\n",
      "b1:7.42792047488e-13\n",
      "W2:8.93400580132e-13\n",
      "b2:1.20348178645e-10\n"
     ]
    }
   ],
   "source": [
    "# coding: utf-8\n",
    "import sys, os\n",
    "sys.path.append(os.pardir)  # 親ディレクトリのファイルをインポートするための設定\n",
    "import numpy as np\n",
    "from mymodule.mnist import load_mnist\n",
    "from mymodule.two_layer_net import TwoLayerNet\n",
    "\n",
    "# データの読み込み\n",
    "(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)\n",
    "\n",
    "network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)\n",
    "\n",
    "x_batch = x_train[:3]\n",
    "t_batch = t_train[:3]\n",
    "\n",
    "grad_numerical = network.numerical_gradient(x_batch, t_batch)\n",
    "grad_backprop = network.gradient(x_batch, t_batch)\n",
    "\n",
    "for key in grad_numerical.keys():\n",
    "    diff = np.average( np.abs(grad_backprop[key] - grad_numerical[key]) )\n",
    "    print(key + \":\" + str(diff))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 誤差逆伝播法を使った学習"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.06215 0.0625\n",
      "0.902966666667 0.9061\n",
      "0.921516666667 0.9251\n",
      "0.934316666667 0.9365\n",
      "0.94705 0.9473\n",
      "0.952183333333 0.9506\n",
      "0.956283333333 0.9543\n",
      "0.961233333333 0.9567\n",
      "0.964783333333 0.9606\n",
      "0.966466666667 0.9612\n",
      "0.970933333333 0.964\n",
      "0.9713 0.9669\n",
      "0.97195 0.965\n",
      "0.974666666667 0.9663\n",
      "0.976066666667 0.9687\n",
      "0.9772 0.9691\n",
      "0.97815 0.9685\n"
     ]
    }
   ],
   "source": [
    "# coding: utf-8\n",
    "import sys, os\n",
    "sys.path.append(os.pardir)\n",
    "\n",
    "import numpy as np\n",
    "from dataset.mnist import load_mnist\n",
    "from mymodule.two_layer_net import TwoLayerNet\n",
    "\n",
    "# データの読み込み\n",
    "(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)\n",
    "\n",
    "network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)\n",
    "\n",
    "iters_num = 10000\n",
    "train_size = x_train.shape[0]\n",
    "batch_size = 100\n",
    "learning_rate = 0.1\n",
    "\n",
    "train_loss_list = []\n",
    "train_acc_list = []\n",
    "test_acc_list = []\n",
    "\n",
    "iter_per_epoch = max(train_size / batch_size, 1)\n",
    "\n",
    "for i in range(iters_num):\n",
    "    batch_mask = np.random.choice(train_size, batch_size)\n",
    "    x_batch = x_train[batch_mask]\n",
    "    t_batch = t_train[batch_mask]\n",
    "    \n",
    "    # 勾配\n",
    "    #grad = network.numerical_gradient(x_batch, t_batch)\n",
    "    grad = network.gradient(x_batch, t_batch)\n",
    "    \n",
    "    # 更新\n",
    "    for key in ('W1', 'b1', 'W2', 'b2'):\n",
    "        network.params[key] -= learning_rate * grad[key]\n",
    "    \n",
    "    loss = network.loss(x_batch, t_batch)\n",
    "    train_loss_list.append(loss)\n",
    "    \n",
    "    if i % iter_per_epoch == 0:\n",
    "        train_acc = network.accuracy(x_train, t_train)\n",
    "        test_acc = network.accuracy(x_test, t_test)\n",
    "        train_acc_list.append(train_acc)\n",
    "        test_acc_list.append(test_acc)\n",
    "        print(train_acc, test_acc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}