{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "# Convolutions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.185492Z",
     "start_time": "2019-07-03T22:12:41.569269Z"
    }
   },
   "outputs": [],
   "source": [
    "from mxnet import autograd, np, npx\n",
    "from mxnet.gluon import nn\n",
    "npx.set_np()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "The cross-correlation operator."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.312247Z",
     "start_time": "2019-07-03T22:12:43.188173Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[19., 25.],\n",
       "       [37., 43.]])"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def corr2d(X, K):\n",
    "    h, w = K.shape\n",
    "    Y = np.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n",
    "    for i in range(Y.shape[0]):\n",
    "        for j in range(Y.shape[1]):\n",
    "            Y[i, j] = (X[i: i + h, j: j + w] * K).sum()\n",
    "    return Y\n",
    "\n",
    "X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n",
    "K = np.array([[0, 1], [2, 3]])\n",
    "corr2d(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Convolutional layers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.320777Z",
     "start_time": "2019-07-03T22:12:43.314285Z"
    },
    "attributes": {
     "classes": [],
     "id": "",
     "n": "70"
    }
   },
   "outputs": [],
   "source": [
    "class Conv2D(nn.Block):\n",
    "    def __init__(self, kernel_size, **kwargs):\n",
    "        super(Conv2D, self).__init__(**kwargs)\n",
    "        self.weight = self.params.get('weight', shape=kernel_size)\n",
    "        self.bias = self.params.get('bias', shape=(1,))\n",
    "\n",
    "    def forward(self, x):\n",
    "        return corr2d(x, self.weight.data()) + self.bias.data()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Padding"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.343747Z",
     "start_time": "2019-07-03T22:12:43.325742Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(8, 8)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# A convenient function to test Gluon convoplution layers. \n",
    "def comp_conv2d(conv2d, X):\n",
    "    conv2d.initialize()\n",
    "    # Add batch and channel dimension.\n",
    "    X = X.reshape((1, 1) + X.shape)\n",
    "    Y = conv2d(X)\n",
    "    # Exclude the first two dimensions\n",
    "    return Y.reshape(Y.shape[2:])\n",
    "\n",
    "conv2d = nn.Conv2D(1, kernel_size=3, padding=1)\n",
    "X = np.random.uniform(size=(8, 8))\n",
    "comp_conv2d(conv2d, X).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Stride"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.364745Z",
     "start_time": "2019-07-03T22:12:43.345529Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4, 4)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv2d = nn.Conv2D(1, kernel_size=3, padding=1, strides=2)\n",
    "comp_conv2d(conv2d, X).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "A slightly more complicated example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.382376Z",
     "start_time": "2019-07-03T22:12:43.368194Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(2, 2)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv2d = nn.Conv2D(1, kernel_size=(3, 5), padding=(0, 1), strides=(3, 4))\n",
    "comp_conv2d(conv2d, X).shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Multiple input channels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.535573Z",
     "start_time": "2019-07-03T22:12:43.387749Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 56.,  72.],\n",
       "       [104., 120.]])"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def corr2d_multi_in(X, K):\n",
    "    return sum(corr2d(x, k) for x, k in zip(X, K))\n",
    "\n",
    "X = np.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],\n",
    "              [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])\n",
    "K = np.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])\n",
    "\n",
    "corr2d_multi_in(X, K)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "slideshow": {
     "slide_type": "slide"
    }
   },
   "source": [
    "Multiple output channels"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-07-03T22:12:43.576040Z",
     "start_time": "2019-07-03T22:12:43.538210Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((3, 2, 2, 2), (3, 2, 2))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def corr2d_multi_in_out(X, K):\n",
    "    return np.stack([corr2d_multi_in(X, k) for k in K])\n",
    "\n",
    "K = np.stack((K, K + 1, K + 2))\n",
    "K.shape, corr2d_multi_in_out(X, K).shape"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Slideshow",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}