{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Convolutions" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.185492Z", "start_time": "2019-07-03T22:12:41.569269Z" } }, "outputs": [], "source": [ "from mxnet import autograd, np, npx\n", "from mxnet.gluon import nn\n", "npx.set_np()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "The cross-correlation operator." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.312247Z", "start_time": "2019-07-03T22:12:43.188173Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[19., 25.],\n", " [37., 43.]])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def corr2d(X, K):\n", " h, w = K.shape\n", " Y = np.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))\n", " for i in range(Y.shape[0]):\n", " for j in range(Y.shape[1]):\n", " Y[i, j] = (X[i: i + h, j: j + w] * K).sum()\n", " return Y\n", "\n", "X = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])\n", "K = np.array([[0, 1], [2, 3]])\n", "corr2d(X, K)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Convolutional layers" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.320777Z", "start_time": "2019-07-03T22:12:43.314285Z" }, "attributes": { "classes": [], "id": "", "n": "70" } }, "outputs": [], "source": [ "class Conv2D(nn.Block):\n", " def __init__(self, kernel_size, **kwargs):\n", " super(Conv2D, self).__init__(**kwargs)\n", " self.weight = self.params.get('weight', shape=kernel_size)\n", " self.bias = self.params.get('bias', shape=(1,))\n", "\n", " def forward(self, x):\n", " return corr2d(x, self.weight.data()) + self.bias.data()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Padding" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.343747Z", "start_time": "2019-07-03T22:12:43.325742Z" } }, "outputs": [ { "data": { "text/plain": [ "(8, 8)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# A convenient function to test Gluon convoplution layers. \n", "def comp_conv2d(conv2d, X):\n", " conv2d.initialize()\n", " # Add batch and channel dimension.\n", " X = X.reshape((1, 1) + X.shape)\n", " Y = conv2d(X)\n", " # Exclude the first two dimensions\n", " return Y.reshape(Y.shape[2:])\n", "\n", "conv2d = nn.Conv2D(1, kernel_size=3, padding=1)\n", "X = np.random.uniform(size=(8, 8))\n", "comp_conv2d(conv2d, X).shape" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Stride" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.364745Z", "start_time": "2019-07-03T22:12:43.345529Z" } }, "outputs": [ { "data": { "text/plain": [ "(4, 4)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conv2d = nn.Conv2D(1, kernel_size=3, padding=1, strides=2)\n", "comp_conv2d(conv2d, X).shape" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "A slightly more complicated example" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.382376Z", "start_time": "2019-07-03T22:12:43.368194Z" } }, "outputs": [ { "data": { "text/plain": [ "(2, 2)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conv2d = nn.Conv2D(1, kernel_size=(3, 5), padding=(0, 1), strides=(3, 4))\n", "comp_conv2d(conv2d, X).shape" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Multiple input channels" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.535573Z", "start_time": "2019-07-03T22:12:43.387749Z" } }, "outputs": [ { "data": { "text/plain": [ "array([[ 56., 72.],\n", " [104., 120.]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def corr2d_multi_in(X, K):\n", " return sum(corr2d(x, k) for x, k in zip(X, K))\n", "\n", "X = np.array([[[0, 1, 2], [3, 4, 5], [6, 7, 8]],\n", " [[1, 2, 3], [4, 5, 6], [7, 8, 9]]])\n", "K = np.array([[[0, 1], [2, 3]], [[1, 2], [3, 4]]])\n", "\n", "corr2d_multi_in(X, K)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Multiple output channels" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-07-03T22:12:43.576040Z", "start_time": "2019-07-03T22:12:43.538210Z" }, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "((3, 2, 2, 2), (3, 2, 2))" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def corr2d_multi_in_out(X, K):\n", " return np.stack([corr2d_multi_in(X, k) for k in K])\n", "\n", "K = np.stack((K, K + 1, K + 2))\n", "K.shape, corr2d_multi_in_out(X, K).shape" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }