{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Backpropagation" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class MulLayer:\n", " def __init__(self):\n", " self.x = None\n", " self.y = None\n", "\n", " def forward(self, x, y):\n", " self.x = x\n", " self.y = y \n", " out = x * y\n", "\n", " return out\n", "\n", " def backward(self, din):\n", " dx = din * self.y\n", " dy = din * self.x\n", "\n", " return dx, dy\n", "\n", "\n", "class AddLayer:\n", " def __init__(self):\n", " pass\n", "\n", " def forward(self, x, y):\n", " out = x + y\n", "\n", " return out\n", "\n", " def backward(self, din):\n", " dx = din * 1\n", " dy = din * 1\n", "\n", " return dx, dy" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "price: 220\n", "\n", "dapple_price: 1.1\n", "dtax: 200\n", "\n", "dapple: 2.2\n", "dapple_num: 110\n" ] } ], "source": [ "apple = 100\n", "apple_num = 2\n", "tax = 1.1\n", "\n", "mul_apple_layer = MulLayer()\n", "mul_tax_layer = MulLayer()\n", "\n", "# forward\n", "apple_price = mul_apple_layer.forward(apple, apple_num)\n", "price = mul_tax_layer.forward(apple_price, tax)\n", "print(\"price:\", int(price), end=\"\\n\\n\")\n", "\n", "# backward\n", "dprice = 1\n", "dapple_price, dtax = mul_tax_layer.backward(dprice)\n", "print(\"dapple_price:\", dapple_price)\n", "print(\"dtax:\", dtax, end=\"\\n\\n\")\n", "\n", "dapple, dapple_num = mul_apple_layer.backward(dapple_price)\n", "print(\"dapple:\", dapple)\n", "print(\"dapple_num:\", int(dapple_num))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Affine 계층" ] }, { "cell_type": "code", "execution_count": 110, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2 (2, 3)\n", "[[1 2 3]\n", " [4 5 6]]\n", "\n", "21\n", "[5 7 9]\n", "[ 6 15]\n" ] } ], "source": [ "a = np.array([[1, 2, 3], [4, 5, 6]])\n", "print(a.ndim, a.shape)\n", "print(a)\n", "print()\n", "print(np.sum(a))\n", "print(np.sum(a, axis=0)) # Axis along which a sum is performed.\n", "print(np.sum(a, axis=1))" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class Affine:\n", " def __init__(self, W, b):\n", " self.W = W\n", " self.b = b \n", " self.x = None\n", " self.dW = None\n", " self.db = None\n", "\n", " def forward(self, x):\n", " self.x = x\n", " out = np.dot(self.x, self.W) + self.b\n", " return out\n", "\n", " def backward(self, din):\n", " dx = np.dot(din, self.W.T)\n", " self.dW = np.dot(self.x.T, din)\n", " self.db = np.sum(din, axis=0)\n", " return dx" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x.shape: (1, 2), W.shape: (2, 3), b.shape: (3,)\n", "\n", "out: [[16 20 24]], out.shape: (1, 3)\n", "\n", "dx.shape: (1, 2), dW.shape: (2, 3), db.shape: (3,)\n", "dx: [[ 6 15]]\n", "affine.dW: \n", "[[1 1 1]\n", " [2 2 2]]\n", "affine.db: [1 1 1]\n" ] } ], "source": [ "x = np.array([[1, 2]])\n", "W = np.array([[1, 2, 3], [4, 5, 6]])\n", "b = np.array([7, 8, 9])\n", "print(\"x.shape: {0}, W.shape: {1}, b.shape: {2}\".format(x.shape, W.shape, b.shape))\n", "print()\n", "\n", "affine = Affine(W, b)\n", "out = affine.forward(x)\n", "print(\"out: {0}, out.shape: {1}\".format(out, out.shape))\n", "print()\n", "\n", "din = np.ones_like(out) # [[1, 1, 1]]\n", "dx = affine.backward(din)\n", "print(\"dx.shape: {0}, dW.shape: {1}, db.shape: {2}\".format(dx.shape, affine.dW.shape, affine.db.shape))\n", "print(\"dx: {0}\".format(dx))\n", "print(\"affine.dW: \\n{0}\".format(affine.dW))\n", "print(\"affine.db: {0}\".format(affine.db))" ] }, { "cell_type": "code", "execution_count": 141, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "x.shape: (2, 2), W.shape: (2, 3), b.shape: (3,)\n", "\n", "out: \n", "[[16 20 24]\n", " [25 32 39]]\n", "out.shape: (2, 3)\n", "\n", "dx.shape: (2, 2), dW.shape: (2, 3), db.shape: (3,)\n", "dx: [[ 6 15]\n", " [ 6 15]]\n", "affine.dW: \n", "[[3 3 3]\n", " [6 6 6]]\n", "affine.db: [2 2 2]\n" ] } ], "source": [ "x = np.array([[1, 2], [2, 4]])\n", "W = np.array([[1, 2, 3], [4, 5, 6]])\n", "b = np.array([7, 8, 9])\n", "print(\"x.shape: {0}, W.shape: {1}, b.shape: {2}\".format(x.shape, W.shape, b.shape))\n", "print()\n", "\n", "affine = Affine(W, b)\n", "out = affine.forward(x)\n", "print(\"out: \\n{0}\\nout.shape: {1}\".format(out, out.shape))\n", "print()\n", "\n", "din = np.ones_like(out)\n", "dx = affine.backward(din)\n", "print(\"dx.shape: {0}, dW.shape: {1}, db.shape: {2}\".format(dx.shape, affine.dW.shape, affine.db.shape))\n", "print(\"dx: {0}\".format(dx))\n", "print(\"affine.dW: \\n{0}\".format(affine.dW))\n", "print(\"affine.db: {0}\".format(affine.db))" ] }, { "cell_type": "code", "execution_count": 100, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "\n", "def softmax(x):\n", " if x.ndim == 2:\n", " x = x.T\n", " x = x - np.max(x, axis=0)\n", " y = np.exp(x) / np.sum(np.exp(x), axis=0)\n", " return y.T \n", "\n", " x = x - np.max(x)\n", " return np.exp(x) / np.sum(np.exp(x))\n", "\n", "def cross_entropy_error(y, t):\n", " #print(y.shape, t.shape)\n", " if y.ndim == 1:\n", " y = y.reshape(1, y.size)\n", " t = t.reshape(1, t.size)\n", "\n", " if t.size == y.size:\n", " t = t.argmax(axis=1)\n", "\n", " batch_size = y.shape[0]\n", " return -np.sum(np.log(y[np.arange(batch_size), t])) / batch_size\n", "\n", "class Relu:\n", " def __init__(self):\n", " self.mask = None\n", "\n", " def forward(self, x):\n", " self.mask = (x <= 0)\n", " out = x.copy()\n", " out[self.mask] = 0\n", " return out\n", "\n", " def backward(self, din):\n", " din[self.mask] = 0\n", " dx = din\n", " return dx\n", "\n", "class Sigmoid:\n", " def __init__(self):\n", " self.out = None\n", "\n", " def forward(self, x):\n", " out = sigmoid(x)\n", " self.out = out\n", " return out\n", "\n", " def backward(self, din):\n", " dx = din * self.out * (1.0 - self.out)\n", " return dx\n", "\n", "class SoftmaxWithLoss:\n", " def __init__(self):\n", " self.loss = None\n", " self.y = None\n", " self.t = None\n", " \n", " def forward(self, x, t):\n", " self.t = t\n", " self.y = softmax(x)\n", " self.loss = cross_entropy_error(self.y, self.t)\n", " return self.loss\n", "\n", " def backward(self, din=1):\n", " batch_size = self.t.shape[0]\n", " dx = (self.y - self.t) / float(batch_size) \n", " return dx" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }