{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Convolutions\n",
    "\n",
    "Import packages for:\n",
    "1. building a CNN from scratch;\n",
    "2. using built-in architectures."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.185492Z",
     "start_time": "2019-07-03T22:12:41.569269Z"
    }
   },
   "outputs": [],
   "source": [
    "from mxnet import np, npx\n",
    "from mxnet.gluon import nn\n",
    "npx.set_np()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "The 2D cross-correlation operator:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def corr2d(X, K):\n",
    "    h, w = K.shape\n",
    "    Y = np.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()\n",
    "    return Y"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For example, a two-dimensional cross-correlation operation. The shaded portions are the first output element and the input and kernel array elements used in its computation:  \n",
    "\n",
    "\\begin{align*}\n",
    "0\\times0+1\\times1+3\\times2+4\\times3=19,\\\\\n",
    "1\\times0+2\\times1+4\\times2+5\\times3=25,\\\\\n",
    "3\\times0+4\\times1+6\\times2+7\\times3=37,\\\\\n",
    "4\\times0+5\\times1+7\\times2+8\\times3=43,\\\\\n",
    "\n",
    "\\end{align*}\n",
    "\n",
    "\n",
    "![A simple cross-correlation example.](https://d2l.ai/_images/correlation.svg)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.312247Z",
     "start_time": "2019-07-03T22:12:43.188173Z"
    },
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[19., 25.],\n",
       "       [37., 43.]])"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n",
    "K = np.array([[0, 1], [2, 3]])\n",
    "corr2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "![A simple cross-correlation example.](https://d2l.ai/_images/correlation.svg)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "The convolutional layers\n",
    "\n",
    "$\\mathbf Y = \\mathbf X \\star \\mathbf W + b$"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.320777Z",
     "start_time": "2019-07-03T22:12:43.314285Z"
    },
    "attributes": {
     "classes": [],
     "id": "",
     "n": "70"
    }
   },
   "outputs": [],
   "source": [
    "class Conv2D(nn.Block):\n",
    "    def __init__(self, kernel_size, **kwargs):\n",
    "        super(Conv2D, self).__init__(**kwargs)\n",
    "        self.weight = self.params.get('weight', shape=kernel_size)\n",
    "        self.bias = self.params.get('bias', shape=(1,))\n",
    "\n",
    "    def forward(self, x):\n",
    "        return corr2d(x, self.weight.data()) + self.bias.data()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Check the output from the convolution layers. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.343747Z",
     "start_time": "2019-07-03T22:12:43.325742Z"
    }
   },
   "outputs": [],
   "source": [
    "def comp_conv2d(conv2d, X):\n",
    "    conv2d.initialize()\n",
    "    # Add batch and channel dimension.\n",
    "    X = X.reshape((1, 1) + X.shape)\n",
    "    Y = conv2d(X)\n",
    "    # Exclude the first two dimensions\n",
    "    return Y.reshape(Y.shape[2:])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "fragment"
    }
   },
   "source": [
    "Padding & Stride"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.364745Z",
     "start_time": "2019-07-03T22:12:43.345529Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4, 4)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.random.uniform(size=(8, 8))\n",
    "conv2d = nn.Conv2D(channels=1, kernel_size=3, padding=1, strides=2)\n",
    "comp_conv2d(conv2d, X).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\\begin{align}\n",
    "\\text{ Output shape} & = \\lfloor(n_h-k_h+p_h+s_h)/s_h\\rfloor \\times \\lfloor(n_w-k_w+p_w+s_w)/s_w\\rfloor \\\\\n",
    " & = \\lfloor(8 - 3 + 1 + 2) / 2\\rfloor \\times \\lfloor(8 - 3 + 1 + 2) / 2\\rfloor \\\\\n",
    " & = (4, 4)\n",
    "\\end{align}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "A slightly more complicated example."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.382376Z",
     "start_time": "2019-07-03T22:12:43.368194Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2, 2)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.random.uniform(size=(8, 8))\n",
    "conv2d = nn.Conv2D(1, kernel_size=(3, 5), padding=(0, 1), strides=(3, 4))\n",
    "comp_conv2d(conv2d, X).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\\begin{align}\n",
    "\\text{ Output shape} & = \\lfloor(n_h-k_h+p_h+s_h)/s_h\\rfloor \\times \\lfloor(n_w-k_w+p_w+s_w)/s_w\\rfloor \\\\\n",
    " & = \\lfloor(8 - 3 + 0 + 3)/3\\rfloor \\times \\lfloor(8 - 5 + 1 + 4)/4\\rfloor \\\\\n",
    " & = (2, 2)\n",
    "\\end{align}"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Pooling"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "A 2D pooling operator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[4., 5.],\n",
       "       [7., 8.]])"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def pool2d(X, pool_size, mode='max'):\n",
    "    p_h, p_w = pool_size\n",
    "    Y = np.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            if mode == 'max':\n",
    "                Y[i, j] = np.max(X[i: i + p_h, j: j + p_w])\n",
    "            elif mode == 'avg':\n",
    "                Y[i, j] = X[i: i + p_h, j: j + p_w].mean()\n",
    "    return Y\n",
    "\n",
    "X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n",
    "pool2d(X, (2, 2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Pooling with Padding and Stride "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[[[ 0.  1.  2.  3.]\n",
      "   [ 4.  5.  6.  7.]\n",
      "   [ 8.  9. 10. 11.]\n",
      "   [12. 13. 14. 15.]]]]\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[[[ 5.,  7.],\n",
       "         [13., 15.]]]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.arange(16).reshape((1, 1, 4, 4))\n",
    "print(X)\n",
    "pool2d = nn.MaxPool2D(pool_size=3, padding=1, strides=2)\n",
    "pool2d(X)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Multiple channels pooling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[[[ 0.  1.  2.  3.]\n",
      "   [ 4.  5.  6.  7.]\n",
      "   [ 8.  9. 10. 11.]\n",
      "   [12. 13. 14. 15.]]\n",
      "\n",
      "  [[ 1.  2.  3.  4.]\n",
      "   [ 5.  6.  7.  8.]\n",
      "   [ 9. 10. 11. 12.]\n",
      "   [13. 14. 15. 16.]]]]\n",
      "shape : (1, 2, 4, 4)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[[[ 5.,  7.],\n",
       "         [13., 15.]],\n",
       "\n",
       "        [[ 6.,  8.],\n",
       "         [14., 16.]]]])"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X = np.concatenate((X, X + 1), axis=1)\n",
    "print(X)\n",
    "print(\"Input shape :\", X.shape)\n",
    "\n",
    "pool2d = nn.MaxPool2D(pool_size=3, padding=1, strides=2)\n",
    "pool2d(X)"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  },
  "latex_envs": {
   "LaTeX_envs_menu_present": true,
   "autoclose": false,
   "autocomplete": true,
   "bibliofile": "biblio.bib",
   "cite_by": "apalike",
   "current_citInitial": 1,
   "eqLabelWithNumbers": true,
   "eqNumInitial": 1,
   "hotkeys": {
    "equation": "Ctrl-E",
    "itemize": "Ctrl-I"
   },
   "labels_anchors": false,
   "latex_user_defs": false,
   "report_style_numbering": false,
   "user_envs_cfg": false
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}