{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Linear Algebra: Matrix Transpose & Differential Calculus"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "np.set_printoptions(suppress=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 4, 5, 6, 8, 9])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.array([1,1,4,5,6,8,9])\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [1],\n",
       "       [4],\n",
       "       [5],\n",
       "       [6],\n",
       "       [8],\n",
       "       [9]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 『신경망 첫걸음, 2016』 에서는 이 방식으로 변환한다.\n",
    "np.array(x, ndmin=2).T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([1, 1, 4, 5, 6, 8, 9])"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "7"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x.shape[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[1],\n",
       "       [1],\n",
       "       [4],\n",
       "       [5],\n",
       "       [6],\n",
       "       [8],\n",
       "       [9]])"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 그러나 reshape로 좀 더 우아하게 변환할 수 있다.\n",
    "x = x.reshape(-1, 1)\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.,  5.,  3.,  7.,  4.,  5., 66.],\n",
       "       [ 2.,  4., 23.,  1.,  2.,  1.,  1.],\n",
       "       [ 4.,  4.,  1.,  1.,  1.,  1.,  2.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 엉뚱한 답을 찾으라고 할 순 없으므로 미리 정답 행렬로 답을 만들어 둔다.\n",
    "# w = np.array([[2,5,1,7,2,5,1], [1,4,5,1,2,4,1], [2,4,2,2,2,2,2]])\n",
    "\n",
    "# 우리가 학습하고자 하는 가중치 행렬\n",
    "def init_w():\n",
    "    w = np.array([[1,5,3,7,4,5,66], [2,4,23,1,2,1,1], [4,4,1,1,1,1,2]],  dtype=np.float)\n",
    "    return w\n",
    "\n",
    "w = init_w()\n",
    "w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[711.],\n",
       "       [132.],\n",
       "       [ 49.]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (3, 7)과 (7, 1) 처럼 첫 번째 matrix의 열과 두 번째 matrix의 행이 일치해야 점곱(dot product)이 가능하다.\n",
    "# 따라서 np.dot(x, w)는 성립하지 않으며 w, x 로 계산해야 한다.\n",
    "y = np.dot(w, x)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[107],\n",
       "       [ 83],\n",
       "       [ 70]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 정답\n",
    "t = np.array([[107],[83],[70]])\n",
    "t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[604.],\n",
       "       [ 49.],\n",
       "       [-21.]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y - t # d(1/2 * E)/d(y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 미분\n",
    "\n",
    "$\\frac{\\partial(E)}{\\partial(w_{kj})} = \\frac{\\partial}{\\partial(w_{kj})} \\sum(t_n - y_n) ^ 2$\n",
    "\n",
    "결과 $y_n$은 연결되는 노드로부터만 영향을 받는다.  \n",
    "다시 말해, 노드 $k$의 결과 값인 $y_k$는 그와 연결되는 가중치 $w_{kj}$에 의해서만 영향을 받는다.\n",
    "\n",
    "따라서 $w_{kj}$의 연결 노드인 $y_k$ 외에 모든 $y_n$을 제거할 수 있다. 이제 sum을 제거할 수 있다.  \n",
    "$\\frac{\\partial(E)}{\\partial(w_{kj})} = \\frac{\\partial}{\\partial(w_{kj})} (t_k - y_k) ^ 2$\n",
    "\n",
    "# 미분의 연쇄 법칙\n",
    "$\\frac{\\partial(E)}{\\partial(w_{kj})} = \\frac{\\partial(E)}{\\partial(y_k)} \\frac{\\partial(y_k)}{\\partial(w_{kj})} $\n",
    "\n",
    "두 수식을 따로 떼내면 아래와 같다.  \n",
    "$\\frac{\\partial(E)}{\\partial(y_k)} = -2 (t_k - y_k)$  \n",
    "$\\frac{\\partial(y_k)}{\\partial(w_{kj})} = \\frac{\\partial}{\\partial{w_{kj}}} \\sum(w_{kn} x_n)$  \n",
    "\n",
    "마찬가지로 $y_k$는 이와 연결되는 $w_{kj}$에 의해서만 영향을 받는다.  \n",
    "따라서 $w_{kj}$의 연결 노드인 $w_{kj} x_j$ 에만 영향을 주므로 sum을 제거할 수 있다.  \n",
    "$\\frac{\\partial(w_{kj}x_j)}{\\partial(w_{jk})} = x_j$\n",
    "\n",
    "미분 계산을 쉽게 하기 위해 1/2 * SE를 취한다.  \n",
    "$\\frac{\\partial(\\frac{1}{2} (t_k - y_k) ^ 2)}{\\partial(y_k)} = y_k - t_k$\n",
    "\n",
    "모두 정리하면 아래와 같은 깔끔한 수식이 된다.  \n",
    "$(y_k - t_k) x_j$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "l = 0.001 # learning rate, 0.01로 했을 경우 overshooting이 발생했다.\n",
    "\n",
    "for k in range(3):\n",
    "    for j in range(7):\n",
    "        delta_w = (y[k] - t[k]) * x[j] # 미분\n",
    "        delta_w = -1 * l * delta_w     # 미분의 역방향 * learning rate\n",
    "        # print (k, j, t[k], y[k], x[j], delta_w)\n",
    "        w[k][j] += delta_w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 0.396,  4.396,  0.584,  3.98 ,  0.376,  0.168, 60.564],\n",
       "       [ 1.951,  3.951, 22.804,  0.755,  1.706,  0.608,  0.559],\n",
       "       [ 4.021,  4.021,  1.084,  1.105,  1.126,  1.168,  2.189]])"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 역전파 1회 완료 후 계산된 가중치 행렬\n",
    "w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.,  5.,  3.,  7.,  4.,  5., 66.],\n",
       "       [ 2.,  4., 23.,  1.,  2.,  1.,  1.],\n",
       "       [ 4.,  4.,  1.,  1.,  1.,  1.,  2.]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 다시 가중치 행렬 초기화\n",
    "w = init_w()\n",
    "w"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "이번에는 전체 계산을 한 번에 진행한다.  \n",
    "$[[\\frac{\\partial(E_1)}{\\partial(y_1)}],[\\frac{\\partial(E_2)}{\\partial(y_2)}],[\\frac{\\partial(E_3)}{\\partial(y_3)}]]\\cdot[x_1, x_2, x_3 ... x_7]$\n",
    "\n",
    "미분 계산 편의상 1/2을 취한다.  \n",
    "$\\frac{\\partial(\\frac{1}{2}E_1)}{\\partial{y_1}} = \\frac{\\partial}{\\partial{y_1}}\\frac{1}{2} (t_1 - y_1) ^ 2 = y_1 - t_1$\n",
    "\n",
    "$x_n$ 과 dot 계산 결과는 [w_11, w_12, w_13 ... w_17], [w_21, w_22 ... w_27], [w_31 ...] 와 동일하다.  \n",
    "아래와 같이 미분과 전치 행렬로 한 번에 동일한 계산 결과를 줄 수 있다.\n",
    "\n",
    "`w += - l * np.dot((y - t), x.transpose())`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.,  5.,  3.,  7.,  4.,  5., 66.],\n",
       "       [ 2.,  4., 23.,  1.,  2.,  1.,  1.],\n",
       "       [ 4.,  4.,  1.,  1.,  1.,  1.,  2.]])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[604.],\n",
       "        [ 49.],\n",
       "        [-21.]]), array([[1, 1, 4, 5, 6, 8, 9]]))"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y-t, x.transpose()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 604.,  604., 2416., 3020., 3624., 4832., 5436.],\n",
       "       [  49.,   49.,  196.,  245.,  294.,  392.,  441.],\n",
       "       [ -21.,  -21.,  -84., -105., -126., -168., -189.]])"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Gradients\n",
    "np.dot((y - t), x.transpose())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[604.][604.][2416.][3020.][3624.][4832.][5436.]\n",
      "[49.][49.][196.][245.][294.][392.][441.]\n",
      "[-21.][-21.][-84.][-105.][-126.][-168.][-189.]\n"
     ]
    }
   ],
   "source": [
    "# Gradient Checking\n",
    "h = 0.1\n",
    "for k in range(3):\n",
    "    for j in range(7):\n",
    "        # 수치 미분 진행\n",
    "        w = init_w()\n",
    "        w[k][j] += h\n",
    "        y = np.dot(w, x)\n",
    "        e1 = ((t[k] - y[k]) ** 2)/2\n",
    "\n",
    "        w = init_w()\n",
    "        w[k][j] -= h\n",
    "        y = np.dot(w, x)\n",
    "        e2 = ((t[k] - y[k]) ** 2)/2\n",
    "\n",
    "        # 수치 미분 결과가 해석적 미분과 동일함을 확인할 수 있다.\n",
    "        # (계산을 편리하게 하기 위한 해석적 미분의 1/2을 동일하게 적용했다.)\n",
    "        print((e1 - e2) / (2 * h),end='')\n",
    "    print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[107.00187985],\n",
       "       [ 83.0001525 ],\n",
       "       [ 69.99993184]])"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 추가 학습\n",
    "for _ in range(50):\n",
    "    w += - l * np.dot((y - t), x.transpose())\n",
    "    y = np.dot(w, x)\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ -1.69642018,   2.30357982,  -7.78568072,  -6.4821009 ,\n",
       "        -12.17852108, -16.57136143,  41.73221839],\n",
       "       [  1.78125068,   3.78125068,  22.12500272,  -0.0937466 ,\n",
       "          0.68750408,  -0.74999455,  -0.96874387],\n",
       "       [  4.09776755,   4.09776755,   1.39107021,   1.48883776,\n",
       "          1.58660532,   1.78214042,   2.77990798]])"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "w"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 미분 계산 검증"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$$\\left(t - y\\right)^{2}$$"
      ],
      "text/plain": [
       "       2\n",
       "(t - y) "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import sympy\n",
    "sympy.init_printing(use_latex='mathjax')\n",
    "t, y = sympy.symbols('t y')\n",
    "e = (t - y)**2\n",
    "e"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$$\\frac{\\partial}{\\partial y} \\left(t - y\\right)^{2}$$"
      ],
      "text/plain": [
       "∂ ⎛       2⎞\n",
       "──⎝(t - y) ⎠\n",
       "∂y          "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sympy.Derivative(e, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$$- 2 t + 2 y$$"
      ],
      "text/plain": [
       "-2⋅t + 2⋅y"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sympy.Derivative(e, y).doit()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$$\\frac{\\partial}{\\partial y}\\left(0.5 \\left(t - y\\right)^{2}\\right)$$"
      ],
      "text/plain": [
       "∂ ⎛           2⎞\n",
       "──⎝0.5⋅(t - y) ⎠\n",
       "∂y              "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "e12 = 1/2 * (t - y) ** 2\n",
    "sympy.Derivative(e12, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/latex": [
       "$$- 1.0 t + 1.0 y$$"
      ],
      "text/plain": [
       "-1.0⋅t + 1.0⋅y"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sympy.Derivative(e12, y).doit()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}