{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "c9dad41e-e7f7-4c2b-a04a-0ed87e1f4eb0",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import sys\n",
"import torch.nn as nn\n",
"import torch\n",
"import warnings\n",
"sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')\n",
"import d2l\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "markdown",
"id": "e15dba16-1429-4cc9-8a55-bc1cbefb997b",
"metadata": {},
"source": [
"# 1. Let’s modernize LeNet. Implement and test the following changes:"
]
},
{
"cell_type": "markdown",
"id": "39f2b521-53c1-4caf-8ce7-71eb68d4e02f",
"metadata": {},
"source": [
"## 1.1 Replace average pooling with max-pooling."
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3a8ffbb4-4588-436d-9214-434f022b4f1e",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class MaxPoolingLeNet(d2l.Classifier):\n",
" def __init__(self, lr=0.1, num_classes=10):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.net = nn.Sequential(nn.LazyConv2d(6, kernel_size=5, padding=2),\n",
" nn.Sigmoid(),\n",
" nn.MaxPool2d(kernel_size=2, stride=2),\n",
" nn.LazyConv2d(16, kernel_size=5),\n",
" nn.Sigmoid(),\n",
" nn.MaxPool2d(kernel_size=2, stride=2),\n",
" nn.Flatten(),\n",
" nn.LazyLinear(120),\n",
" nn.Sigmoid(),\n",
" nn.LazyLinear(84),\n",
" nn.Sigmoid(),\n",
" nn.LazyLinear(num_classes))\n",
" \n",
"def init_cnn(module):\n",
" if type(module) == nn.Linear or type(module) == nn.Conv2d:\n",
" nn.init.xavier_uniform_(module.weight)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "6b7cf77f-d232-4da6-9a0f-39c076fd3bed",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"acc: 0.32\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"data = d2l.FashionMNIST(batch_size=256)\n",
"model = MaxPoolingLeNet(lr=0.1)\n",
"model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
"trainer = d2l.Trainer(max_epochs=10)\n",
"trainer.fit(model, data)\n",
"y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1)) \n",
"print(f'acc: {model.accuracy(y_hat,data.val.targets).item():.2f}')"
]
},
{
"cell_type": "markdown",
"id": "9e9bdaed-0f76-4da2-b78b-52984d477c8b",
"metadata": {},
"source": [
"## 1.2 Replace the softmax layer with ReLU."
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "4d58a821-7c34-4cb1-9f54-ecf020496d31",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class ReLULeNet(d2l.Classifier):\n",
" def __init__(self, lr=0.1, num_classes=10):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.net = nn.Sequential(nn.LazyConv2d(6, kernel_size=5, padding=2),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(kernel_size=2, stride=2),\n",
" nn.LazyConv2d(16, kernel_size=5),\n",
" nn.ReLU(),\n",
" nn.MaxPool2d(kernel_size=2, stride=2),\n",
" nn.Flatten(),\n",
" nn.LazyLinear(120),\n",
" nn.ReLU(),\n",
" nn.LazyLinear(84),\n",
" nn.ReLU(),\n",
" nn.LazyLinear(num_classes))"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "72c8f740-176b-47eb-9513-6755bf1fd783",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(69.25442786514759, 14.578803978860378)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = ReLULeNet(lr=0.1)\n",
"model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
"trainer = d2l.Trainer(max_epochs=10)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "19322473-0144-4cf1-9d5e-4aa8a7ef7e3e",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"acc: 0.80\n"
]
}
],
"source": [
"y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1)) \n",
"print(f'acc: {model.accuracy(y_hat,data.val.targets).item():.2f}')"
]
},
{
"cell_type": "markdown",
"id": "847f12ee-a13a-42e4-ac9b-2c65579bc9a2",
"metadata": {},
"source": [
"# 2. Try to change the size of the LeNet style network to improve its accuracy in addition to max-pooling and ReLU."
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "0f0dda4f-68d1-4eaf-9d63-ab1784ab30b8",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class ParamLeNet(d2l.Classifier):\n",
" def __init__(self, convs, linears, lr=0.1, num_classes=10):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" layers = []\n",
" for conv in convs:\n",
" layers.append(nn.LazyConv2d(conv[0], kernel_size=conv[1],\n",
" padding=conv[2]))\n",
" layers.append(nn.ReLU())\n",
" layers.append(nn.MaxPool2d(kernel_size=2, stride=2))\n",
" layers.append(nn.Flatten())\n",
" for linear in linears:\n",
" layers.append(nn.LazyLinear(linear))\n",
" layers.append(nn.ReLU())\n",
" layers.append(nn.LazyLinear(num_classes))\n",
" self.net = nn.Sequential(*layers)"
]
},
{
"cell_type": "markdown",
"id": "7aa9af57-da58-4e43-9329-ab9a10a3383e",
"metadata": {},
"source": [
"## 2.1 Adjust the convolution window size."
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "ab087100-694d-44a4-9662-28afe8476735",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"convs_list = [[[6,11,5],[16,11,0]],[[6,5,2],[16,5,0]],[[6,3,1],[16,3,0]]]\n",
"acc_list = []\n",
"for convs in convs_list:\n",
" hparams = {'convs':convs, 'linears':[120,84]}\n",
" model = ParamLeNet(**hparams)\n",
" model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
" trainer = d2l.Trainer(max_epochs=10)\n",
" trainer.fit(model, data)\n",
" y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1))\n",
" acc_list.append(model.accuracy(y_hat,data.val.targets).item())"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "20264100-62de-4382-b4e2-c582a923e1f1",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"d2l.plot(list(range(len(acc_list))),acc_list,'conv window','acc')"
]
},
{
"cell_type": "markdown",
"id": "40730c0c-b543-4062-88ee-48b92578768b",
"metadata": {},
"source": [
"## 2.2 Adjust the number of output channels."
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "044df167-5de6-4589-a1a8-41a66bcc4f51",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"convs_list = [[[16,5,2],[32,5,0]],[[6,5,2],[16,5,0]],[[2,5,2],[8,5,0]]]\n",
"acc_list = []\n",
"for convs in convs_list:\n",
" hparams = {'convs':convs, 'linears':[120,84]}\n",
" model = ParamLeNet(**hparams)\n",
" model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
" trainer = d2l.Trainer(max_epochs=10)\n",
" trainer.fit(model, data)\n",
" y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1))\n",
" acc_list.append(model.accuracy(y_hat,data.val.targets).item())"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "6e35f830-173e-4364-8e4e-fa5c5ec7faee",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"d2l.plot(list(range(len(acc_list))),acc_list,'channels','acc')"
]
},
{
"cell_type": "markdown",
"id": "c75292ab-a053-4b3c-9107-ee81204e2672",
"metadata": {},
"source": [
"## 2.3 Adjust the number of convolution layers."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f08d6057-40ea-4f8f-ad2f-025128f16c5c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"convs_list = [[[6,5,2],[16,5,2],[32,5,0]],[[6,5,2],[16,5,0]],[[64,5,0]]]\n",
"acc_list = []\n",
"for convs in convs_list:\n",
" hparams = {'convs':convs, 'linears':[120,84]}\n",
" model = ParamLeNet(**hparams)\n",
" model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
" trainer = d2l.Trainer(max_epochs=10)\n",
" trainer.fit(model, data)\n",
" y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1))\n",
" acc_list.append(model.accuracy(y_hat,data.val.targets).item())"
]
},
{
"cell_type": "markdown",
"id": "72f35a95-2600-4eb8-9b96-4c7b532ca2da",
"metadata": {},
"source": [
"## 2.4 Adjust the number of fully connected layers."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "049cf21c-ed01-4388-ab97-e1c9dc38fb37",
"metadata": {},
"outputs": [],
"source": [
"linears_list = [[256,128,64,32,16],[256,128],[120,84],[64,32]]\n",
"acc_list = []\n",
"for linears in linears_list:\n",
" hparams = {'convs':[[6,5,2],[16,5,0]], 'linears':linears}\n",
" model = ParamLeNet(**hparams)\n",
" model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
" trainer = d2l.Trainer(max_epochs=10)\n",
" trainer.fit(model, data)\n",
" y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1))\n",
" acc_list.append(model.accuracy(y_hat,data.val.targets).item())"
]
},
{
"cell_type": "markdown",
"id": "7583e801-6811-43ce-85f5-ba296bf86b48",
"metadata": {},
"source": [
"## 2.5 Adjust the learning rates and other training details (e.g., initialization and number of epochs)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12be3085-5177-41f3-b4cf-7db6f7846736",
"metadata": {},
"outputs": [],
"source": [
"lr_list = [0.001,0.003,0.01,0.03,0.1,0.3]\n",
"acc_list = []\n",
"for lr in lr_list:\n",
" hparams = {'convs':[[6,5,2],[16,5,0]], 'linears':[120,84],'lr':lr}\n",
" model = ParamLeNet(**hparams)\n",
" model.apply_init([next(iter(data.get_dataloader(True)))[0]], init_cnn)\n",
" trainer = d2l.Trainer(max_epochs=10)\n",
" trainer.fit(model, data)\n",
" y_hat = model(data.val.data.type(torch.float32).unsqueeze(dim=1))\n",
" acc_list.append(model.accuracy(y_hat,data.val.targets).item())"
]
},
{
"cell_type": "markdown",
"id": "984e0140-0731-4b66-914f-d22107cc8e43",
"metadata": {},
"source": [
"# 3. Try out the improved network on the original MNIST dataset."
]
},
{
"cell_type": "markdown",
"id": "78893ff7-32dc-432f-8a38-9f7c405b09e0",
"metadata": {},
"source": [
"# 4. Display the activations of the first and second layer of LeNet for different inputs (e.g., sweaters and coats)."
]
},
{
"cell_type": "markdown",
"id": "ed94585d-fc96-4842-a99a-658a2699bd47",
"metadata": {},
"source": [
"# 5. What happens to the activations when you feed significantly different images into the network (e.g., cats, cars, or even random noise)?"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:d2l]",
"language": "python",
"name": "conda-env-d2l-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}