{
"cells": [
{
"cell_type": "markdown",
"id": "88852514-d89d-4fed-9a47-e4083ad7b575",
"metadata": {},
"source": [
"# 1. Use the NestMLP model defined in Section 6.1 and access the parameters of the various layers."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "71dc7c41-d805-4f90-8d5b-40414fd7b150",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['0.weight', '0.bias', '2.weight', '2.bias']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import torch.nn as nn\n",
"class NestMLP(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.net = nn.Sequential(nn.LazyLinear(64), nn.ReLU(),\n",
" nn.LazyLinear(32), nn.ReLU())\n",
" self.linear = nn.LazyLinear(16)\n",
"\n",
" def forward(self, X):\n",
" return self.linear(self.net(X))\n",
"\n",
"model = NestMLP()\n",
"[name for name, param in model.net.named_parameters()]"
]
},
{
"cell_type": "markdown",
"id": "ed12e022-9afc-4f75-a818-d296d9d4a867",
"metadata": {},
"source": [
"# 2. Construct an MLP containing a shared parameter layer and train it. During the training process, observe the model parameters and gradients of each layer."
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "0eeb9ae2-e3c9-4c0e-8881-18db312b2fda",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import sys\n",
"import torch.nn as nn\n",
"import torch\n",
"import warnings\n",
"sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')\n",
"import d2l\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"class PlotParameterMLP(d2l.Classifier):\n",
" def __init__(self, num_outputs, num_hiddens, lr, dropouts):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" layers = [nn.Flatten(),nn.LazyLinear(num_hiddens[0]),nn.ReLU()]\n",
" shared = nn.LazyLinear(num_hiddens[1])\n",
" self.activations = []\n",
" for i in range(1,len(num_hiddens)):\n",
" layers.append(shared)\n",
" layers.append(nn.ReLU())\n",
" layers.append(nn.Dropout(dropouts[i]))\n",
" self.activations.append(i*3)\n",
" layers.append(nn.LazyLinear(num_outputs))\n",
" self.net = nn.Sequential(*layers)\n",
" \n",
" def training_step(self, batch, plot_flag=True):\n",
" y_hat = self(*batch[:-1])\n",
" # auc = torch.tensor(roc_auc_score(batch[-1].detach().numpy() , y_hat[:,1].detach().numpy()))\n",
" if plot_flag:\n",
" for i in self.activations:\n",
" # print(self.net[i].weight.data,self.net[i].weight.grad)\n",
" self.plot(f'layer_{i}_weight',self.net[i].weight.data.mean(),train=True)\n",
" # self.plot(f'layer_{i}_weight',self.net[i].weight.grad.mean(),train=True)\n",
" return self.loss(y_hat, batch[-1])\n",
" \n",
" def validation_step(self, batch, plot_flag=True):\n",
" y_hat = self(*batch[:-1])\n",
" # auc = torch.tensor(roc_auc_score(batch[-1].detach().numpy() , y_hat[:,1].detach().numpy()))\n",
" if plot_flag:\n",
" for i in self.activations:\n",
" # self.plot(f'layer_{i}_weight',self.net[i].weight.data.mean(),train=True)\n",
" self.plot(f'layer_{i}_weight',self.net[i].weight.grad.mean(),train=True)\n",
" return self.loss(y_hat, batch[-1])\n",
" \n",
" def stat_activation_variance(self, i, X):\n",
" activation = self.net[:i](X)\n",
" return ((activation-activation.mean(axis=0,keepdim=True))**2).mean()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "30dbf993-8ea0-4261-9b7a-2c24e065c0f3",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"(118.06766620278358, 24.128756165504456)"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"