{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Inception " ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.580708Z", "start_time": "2019-07-07T06:08:42.002576Z" } }, "outputs": [], "source": [ "import d2l\n", "from mxnet import gluon, np, npx\n", "from mxnet.gluon import nn\n", "npx.set_np()\n", "\n", "train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=128, resize=96)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "The inception block" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.592275Z", "start_time": "2019-07-07T06:08:45.583648Z" }, "attributes": { "classes": [], "id": "", "n": "1" }, "slideshow": { "slide_type": "-" } }, "outputs": [], "source": [ "class Inception(nn.Block):\n", " # c1 - c4 are the number of output channels for each layer in the path.\n", " def __init__(self, c1, c2, c3, c4, **kwargs):\n", " super(Inception, self).__init__(**kwargs)\n", " # Path 1 is a single 1 x 1 convolutional layer.\n", " self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')\n", " # Path 2 is a 1 x 1 convolutional layer followed by a 3 x 3 convolutional layer.\n", " self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')\n", " self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1, activation='relu')\n", " # Path 3 is a 1 x 1 convolutional layer followed by a 5 x 5 convolutional layer.\n", " self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')\n", " self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2, activation='relu')\n", " # Path 4 is a 3 x 3 maximum pooling layer followed by a 1 x 1 convolutional layer.\n", " self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)\n", " self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')\n", " def forward(self, x):\n", " p1 = self.p1_1(x)\n", " p2 = self.p2_2(self.p2_1(x))\n", " p3 = self.p3_2(self.p3_1(x))\n", " p4 = self.p4_2(self.p4_1(x))\n", " # Concatenate the outputs on the channel dimension.\n", " return np.concatenate((p1, p2, p3, p4), axis=1)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Inception Model - Stage 1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.601078Z", "start_time": "2019-07-07T06:08:45.593944Z" }, "attributes": { "classes": [], "id": "", "n": "2" } }, "outputs": [], "source": [ "b1 = nn.Sequential()\n", "b1.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3, activation='relu'),\n", " nn.MaxPool2D(pool_size=3, strides=2, padding=1))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Inception Model - Stage 2" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.606738Z", "start_time": "2019-07-07T06:08:45.602846Z" }, "attributes": { "classes": [], "id": "", "n": "3" } }, "outputs": [], "source": [ "b2 = nn.Sequential()\n", "b2.add(nn.Conv2D(64, kernel_size=1),\n", " nn.Conv2D(192, kernel_size=3, padding=1),\n", " nn.MaxPool2D(pool_size=3, strides=2, padding=1))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Inception Model - Stage 3" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.615934Z", "start_time": "2019-07-07T06:08:45.608077Z" }, "attributes": { "classes": [], "id": "", "n": "4" } }, "outputs": [], "source": [ "b3 = nn.Sequential()\n", "b3.add(Inception(64, (96, 128), (16, 32), 32),\n", " Inception(128, (128, 192), (32, 96), 64),\n", " nn.MaxPool2D(pool_size=3, strides=2, padding=1))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Inception Model - Stage 4. We use a total of 512 channels (128 + 256 + 64 + 64) " ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.631945Z", "start_time": "2019-07-07T06:08:45.617220Z" }, "attributes": { "classes": [], "id": "", "n": "5" } }, "outputs": [], "source": [ "b4 = nn.Sequential()\n", "b4.add(Inception(192, (96, 208), (16, 48), 64),\n", " Inception(160, (112, 224), (24, 64), 64),\n", " Inception(128, (128, 256), (24, 64), 64),\n", " Inception(112, (144, 288), (32, 64), 64),\n", " Inception(256, (160, 320), (32, 128), 128),\n", " nn.MaxPool2D(pool_size=3, strides=2, padding=1))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Inception Model - Stage 5" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.641445Z", "start_time": "2019-07-07T06:08:45.633251Z" }, "attributes": { "classes": [], "id": "", "n": "6" } }, "outputs": [], "source": [ "b5 = nn.Sequential()\n", "b5.add(Inception(256, (160, 320), (32, 128), 128),\n", " Inception(384, (192, 384), (48, 128), 128),\n", " nn.GlobalAvgPool2D())\n", "\n", "net = nn.Sequential()\n", "net.add(b1, b2, b3, b4, b5, nn.Dense(10))" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Priming the network (at full size)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2019-07-07T06:08:45.760919Z", "start_time": "2019-07-07T06:08:45.643249Z" }, "attributes": { "classes": [], "id": "", "n": "7" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sequential0 output shape:\t (1, 64, 24, 24)\n", "sequential1 output shape:\t (1, 192, 12, 12)\n", "sequential2 output shape:\t (1, 480, 6, 6)\n", "sequential3 output shape:\t (1, 832, 3, 3)\n", "sequential4 output shape:\t (1, 1024, 1, 1)\n", "dense0 output shape:\t (1, 10)\n" ] } ], "source": [ "X = np.random.uniform(size=(1, 1, 96, 96))\n", "net.initialize()\n", "for layer in net:\n", " X = layer(X)\n", " print(layer.name, 'output shape:\\t', X.shape)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "Training." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "start_time": "2019-07-07T06:08:42.014Z" }, "attributes": { "classes": [], "id": "", "n": "8" } }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "d2l.train_ch5(net, train_iter, test_iter, num_epochs=5, lr=0.1)" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }