{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# 深度卷积神经网络 (AlexNet)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.779005Z",
"start_time": "2019-07-03T22:16:24.966374Z"
}
},
"outputs": [],
"source": [
"import d2l\n",
"from mxnet import gluon, np, npx\n",
"from mxnet.gluon import nn\n",
"npx.set_np()\n",
"\n",
"train_iter, test_iter = d2l.load_data_fashion_mnist(\n",
" batch_size=128, resize=224)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"定义模型。"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.796186Z",
"start_time": "2019-07-03T22:16:27.782555Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "1"
}
},
"outputs": [],
"source": [
"net = nn.Sequential()\n",
"net.add(# 使用较大的11 x 11窗口来捕获大物体。\n",
" nn.Conv2D(96, kernel_size=11, strides=4, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数\n",
" nn.Conv2D(256, kernel_size=5, padding=2, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。\n",
" # 前两个卷积层后不使用池化层来减小输入的高和宽\n",
" nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),\n",
" nn.Conv2D(384, kernel_size=3, padding=1, activation='relu'),\n",
" nn.Conv2D(256, kernel_size=3, padding=1, activation='relu'),\n",
" nn.MaxPool2D(pool_size=3, strides=2),\n",
" # 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合\n",
" nn.Dense(4096, activation=\"relu\"), nn.Dropout(0.5),\n",
" nn.Dense(4096, activation=\"relu\"), nn.Dropout(0.5),\n",
" nn.Dense(10))"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"查看每一层的输出形状。"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:16:27.847831Z",
"start_time": "2019-07-03T22:16:27.798680Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "2"
},
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"conv0 output shape:\t (1, 96, 54, 54)\n",
"pool0 output shape:\t (1, 96, 26, 26)\n",
"conv1 output shape:\t (1, 256, 26, 26)\n",
"pool1 output shape:\t (1, 256, 12, 12)\n",
"conv2 output shape:\t (1, 384, 12, 12)\n",
"conv3 output shape:\t (1, 384, 12, 12)\n",
"conv4 output shape:\t (1, 256, 12, 12)\n",
"pool2 output shape:\t (1, 256, 5, 5)\n",
"dense0 output shape:\t (1, 4096)\n",
"dropout0 output shape:\t (1, 4096)\n",
"dense1 output shape:\t (1, 4096)\n",
"dropout1 output shape:\t (1, 4096)\n",
"dense2 output shape:\t (1, 10)\n"
]
}
],
"source": [
"X = np.random.uniform(size=(1, 1, 224, 224))\n",
"net.initialize()\n",
"for layer in net:\n",
" X = layer(X)\n",
" print(layer.name, 'output shape:\\t', X.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"训练。"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T22:20:58.845230Z",
"start_time": "2019-07-03T22:16:27.850205Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "5"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"loss 0.335, train acc 0.878, test acc 0.889\n",
"4070.5 exampes/sec on gpu(0)\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
"