{
"cells": [
{
"cell_type": "code",
"execution_count": 39,
"id": "a78a2429-dc77-4dd5-af16-010af469b348",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n",
"[NbConvertApp] Converting notebook 6_6_4_Exercises.ipynb to markdown\n",
"[NbConvertApp] Writing 7171 bytes to 6_6_4_Exercises.md\n"
]
}
],
"source": [
"!jupyter nbconvert --to markdown 6_6_4_Exercises.ipynb"
]
},
{
"cell_type": "markdown",
"id": "cfdb4382-bf00-4a2d-a09a-f61783997893",
"metadata": {},
"source": [
"# 1. What kinds of problems will occur if you change MySequential to store modules in a Python list?"
]
},
{
"cell_type": "markdown",
"id": "9d49dbf5-b970-4b32-8269-15e64aa016f5",
"metadata": {},
"source": [
"If you change `MySequential` to store modules in a Python list instead of using the `nn.Sequential` container, you might get `ValueError: optimizer got an empty parameter list` error, because the `nn.Sequential` container automatically tracks and registers the parameters of each module added to it. If you use a Python list, you'll need to manually manage parameter registration, which can lead to errors if not done correctly."
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "6db4fd67-a4af-4067-a4de-cb1caa49e6bc",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import torch.nn as nn\n",
"import torch\n",
"import sys\n",
"sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')\n",
"import d2l\n",
"import warnings\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"class MySequential(d2l.Module):\n",
" def __init__(self, *args):\n",
" super().__init__()\n",
" self.modules = []\n",
" for idx, module in enumerate(args):\n",
" self.modules.append(module)\n",
" \n",
" def forward(self, X):\n",
" for module in self.modules:\n",
" X = module(X)\n",
" return X\n",
" \n",
"class MySequentialMLP(d2l.Classifier):\n",
" def __init__(self, num_outputs, num_hiddens, lr):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" layers = [nn.Flatten()]\n",
" for num in num_hiddens:\n",
" layers.append(nn.LazyLinear(num))\n",
" layers.append(nn.ReLU())\n",
" layers.append(nn.LazyLinear(num_outputs))\n",
" self.net = MySequential(*layers)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "314ef490-2457-4c15-a471-42eb34c9586c",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(86.71588633954525, 16.023116797208786)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"hparams = {'num_hiddens':[256],'num_outputs':10,'lr':0.1}\n",
"model = d2l.MulMLP(**hparams)\n",
"data = d2l.FashionMNIST(batch_size=256)\n",
"trainer = d2l.Trainer(max_epochs=10)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "0edd9f73-e51b-430d-9bcc-96cf49715a06",
"metadata": {
"tags": []
},
"outputs": [
{
"ename": "ValueError",
"evalue": "optimizer got an empty parameter list",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[24], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m model \u001b[38;5;241m=\u001b[39m MySequentialMLP(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhparams)\n\u001b[1;32m 2\u001b[0m trainer \u001b[38;5;241m=\u001b[39m d2l\u001b[38;5;241m.\u001b[39mTrainer(max_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:201\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, data)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_data(data)\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprepare_model(model)\n\u001b[0;32m--> 201\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptim \u001b[38;5;241m=\u001b[39m \u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfigure_optimizers\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtrain_batch_idx \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:346\u001b[0m, in \u001b[0;36mClassifier.configure_optimizers\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 345\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfigure_optimizers\u001b[39m(\u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptim\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSGD\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlr\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlr\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/optim/sgd.py:27\u001b[0m, in \u001b[0;36mSGD.__init__\u001b[0;34m(self, params, lr, momentum, dampening, weight_decay, nesterov, maximize, foreach, differentiable)\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m nesterov \u001b[38;5;129;01mand\u001b[39;00m (momentum \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m dampening \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m):\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNesterov momentum requires a momentum and zero dampening\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m---> 27\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefaults\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/optim/optimizer.py:187\u001b[0m, in \u001b[0;36mOptimizer.__init__\u001b[0;34m(self, params, defaults)\u001b[0m\n\u001b[1;32m 185\u001b[0m param_groups \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(params)\n\u001b[1;32m 186\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(param_groups) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m--> 187\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moptimizer got an empty parameter list\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 188\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(param_groups[\u001b[38;5;241m0\u001b[39m], \u001b[38;5;28mdict\u001b[39m):\n\u001b[1;32m 189\u001b[0m param_groups \u001b[38;5;241m=\u001b[39m [{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mparams\u001b[39m\u001b[38;5;124m'\u001b[39m: param_groups}]\n",
"\u001b[0;31mValueError\u001b[0m: optimizer got an empty parameter list"
]
}
],
"source": [
"model = MySequentialMLP(**hparams)\n",
"trainer = d2l.Trainer(max_epochs=10)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "markdown",
"id": "54779b57-cdfe-4f28-b748-26fd33d11365",
"metadata": {},
"source": [
"# 2. Implement a module that takes two modules as an argument, say net1 and net2 and returns the concatenated output of both networks in the forward propagation. This is also called a parallel module."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9bf7cb5e-c259-465d-a6ff-d664134ee09f",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"class ConcatLayer(d2l.Classifier, d2l.HyperParameters):\n",
" def __init__(self, net1, net2, lr):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" \n",
" def forward(self, X):\n",
" X1 = self.net1(X)\n",
" X2 = self.net2(X)\n",
" return torch.cat((X1,X2),dim=-1)\n",
"\n",
"hparams1 = {'num_hiddens':[256],'num_outputs':5,'lr':0.1}\n",
"hparams2 = {'num_hiddens':[256],'num_outputs':5,'lr':0.1}\n",
"model = ConcatLayer(d2l.MulMLP(**hparams1),d2l.MulMLP(**hparams2),lr=0.1)\n",
"trainer = d2l.Trainer(max_epochs=10)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "markdown",
"id": "5252b77e-b6c2-4d4a-bbfe-2fdc4afb9453",
"metadata": {},
"source": [
"# 3. Assume that you want to concatenate multiple instances of the same network. Implement a factory function that generates multiple instances of the same module and build a larger network from it."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ef12f2cf-36d1-45f9-87a9-d7c1c530584b",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class ConcatMulMLP(d2l.MulMLP):\n",
" def __init__(self, num_outputs, num_hiddens, lr, concats):\n",
" super().__init__(num_outputs, num_hiddens, lr)\n",
" self.save_hyperparameters()\n",
" \n",
" def forward(self, X):\n",
" return torch.cat([self.net[:i+1](X) for i in self.concats],dim=-1)\n",
" \n",
"hparams = {'num_hiddens':[16,8,2],'num_outputs':5,'lr':0.1,'concats':[1,2]}\n",
"model = ConcatMulMLP(**hparams)\n",
"trainer = d2l.Trainer(max_epochs=3)\n",
"trainer.fit(model, data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:d2l]",
"language": "python",
"name": "conda-env-d2l-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}