{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Deep Convolutional Neural Networks (AlexNet)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.779005Z",
"start_time": "2019-07-03T22:16:24.966374Z"
}
},
"outputs": [],
"source": [
"import d2l\n",
"from mxnet import gluon, np, npx\n",
"from mxnet.gluon import nn\n",
"npx.set_np()\n",
"\n",
"train_iter, test_iter = d2l.load_data_fashion_mnist(\n",
" batch_size=128, resize=224)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"The model"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.796186Z",
"start_time": "2019-07-03T22:16:27.782555Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "1"
}
},
"outputs": [],
"source": [
"net = nn.Sequential()\n",
"net.add(# A larger 11 x 11 window to capture large objects.\n",
" nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # Make the convolution window smaller, and increase the\n",
" # number of output channels\n",
" nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # Use three successive convolutional layers and a smaller convolution\n",
" # window. \n",
" nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),\n",
" nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),\n",
" nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # Several times larger fully connected layers with dropout\n",
" nn.Dense(4096, activation=\"relu\"), nn.Dropout(0.5),\n",
" nn.Dense(4096, activation=\"relu\"), nn.Dropout(0.5),\n",
" nn.Dense(10))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Check the outputs layer by layer"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.847831Z",
"start_time": "2019-07-03T22:16:27.798680Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "2"
},
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"conv0 output shape:\t (1, 96, 54, 54)\n",
"pool0 output shape:\t (1, 96, 26, 26)\n",
"conv1 output shape:\t (1, 256, 26, 26)\n",
"pool1 output shape:\t (1, 256, 12, 12)\n",
"conv2 output shape:\t (1, 384, 12, 12)\n",
"conv3 output shape:\t (1, 384, 12, 12)\n",
"conv4 output shape:\t (1, 256, 12, 12)\n",
"pool2 output shape:\t (1, 256, 5, 5)\n",
"dense0 output shape:\t (1, 4096)\n",
"dropout0 output shape:\t (1, 4096)\n",
"dense1 output shape:\t (1, 4096)\n",
"dropout1 output shape:\t (1, 4096)\n",
"dense2 output shape:\t (1, 10)\n"
]
}
],
"source": [
"X = np.random.uniform(size=(1, 1, 224, 224))\n",
"net.initialize()\n",
"for layer in net:\n",
" X = layer(X)\n",
" print(layer.name, 'output shape:\\t', X.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Training"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:20:58.845230Z",
"start_time": "2019-07-03T22:16:27.850205Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "5"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loss 0.329, train acc 0.880, test acc 0.891\n",
"14578.6 exampes/sec on gpu(0)\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
"