{
"cells": [
{
"cell_type": "code",
"execution_count": 9,
"id": "af830d32-dbc2-40a6-818a-848e2cb02e8f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[NbConvertApp] WARNING | Config option `kernel_spec_manager_class` not recognized by `NbConvertApp`.\n",
"[NbConvertApp] Converting notebook 10_7_10_Exercises.ipynb to markdown\n",
"[NbConvertApp] Support files will be in 10_7_10_Exercises_files/\n",
"[NbConvertApp] Making directory 10_7_10_Exercises_files\n",
"[NbConvertApp] Writing 15184 bytes to 10_7_10_Exercises.md\n"
]
}
],
"source": [
"!jupyter nbconvert --to markdown 10_7_10_Exercises.ipynb"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "895b7391-a96a-4324-a177-8efcc0d83ff5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import sys\n",
"import torch.nn as nn\n",
"import torch\n",
"import warnings\n",
"from sklearn.model_selection import ParameterGrid\n",
"sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')\n",
"import d2l\n",
"from torchsummary import summary\n",
"warnings.filterwarnings(\"ignore\")"
]
},
{
"cell_type": "markdown",
"id": "a34f0bc1-db8b-4bc6-9d12-10cdf99b1e29",
"metadata": {},
"source": [
"# 1. Adjust the hyperparameters and analyze their influence on running time, perplexity, and the output sequence."
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "a7e6d952-b52c-4868-86ca-2addaa088ae2",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"class LSTM(d2l.RNN):\n",
" def __init__(self, num_inputs, num_hiddens, num_layers=1,\n",
" dropout=0):\n",
" d2l.Module.__init__(self)\n",
" self.save_hyperparameters()\n",
" self.rnn = nn.LSTM(num_inputs, num_hiddens, num_layers=num_layers, dropout=dropout)\n",
"\n",
" def forward(self, inputs, H_C=None):\n",
" print(inputs.shape)\n",
" return self.rnn(inputs, H_C)\n",
" \n",
"def stat_val(model, data):\n",
" ppls = []\n",
" for batch in iter(data.get_dataloader(False)):\n",
" ppls.append(model.validation_step(batch, plot_flag=False).detach().numpy())\n",
" return np.exp(np.mean(ppls))\n",
"\n",
"def experient(data_class=d2l.TimeMachine, num_steps=32, num_hiddens=32, lr=1):\n",
" data = data_class(batch_size=1024, num_steps=num_steps)\n",
" lstm = LSTM(num_inputs=len(data.vocab), num_hiddens=num_hiddens)\n",
" model = d2l.RNNLM(lstm, vocab_size=len(data.vocab), lr=lr)\n",
" trainer = d2l.Trainer(max_epochs=100, gradient_clip_val=1) #, num_gpus=1\n",
" trainer.fit(model, data)\n",
" return stat_val(model, data)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "93c30948-9396-495f-baa0-3f43f3407110",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([32, 1024, 28])\n",
"torch.Size([32, 1024, 28])\n",
"torch.Size([32, 1024, 28])\n",
"torch.Size([32, 1024, 28])\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[17], line 5\u001b[0m\n\u001b[1;32m 3\u001b[0m model \u001b[38;5;241m=\u001b[39m d2l\u001b[38;5;241m.\u001b[39mRNNLM(lstm, vocab_size\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mlen\u001b[39m(data\u001b[38;5;241m.\u001b[39mvocab), lr\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 4\u001b[0m trainer \u001b[38;5;241m=\u001b[39m d2l\u001b[38;5;241m.\u001b[39mTrainer(max_epochs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m100\u001b[39m, gradient_clip_val\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m) \u001b[38;5;66;03m#, num_gpus=1\u001b[39;00m\n\u001b[0;32m----> 5\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:208\u001b[0m, in \u001b[0;36mTrainer.fit\u001b[0;34m(self, model, data)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mval_batch_idx \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 207\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_epochs):\n\u001b[0;32m--> 208\u001b[0m train_loss, valid_loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_epoch\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mepoch \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m train_loss, valid_loss\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:241\u001b[0m, in \u001b[0;36mTrainer.fit_epoch\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m batch \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mval_dataloader:\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m torch\u001b[38;5;241m.\u001b[39mno_grad():\n\u001b[0;32m--> 241\u001b[0m loss \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalidation_step\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprepare_batch\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 242\u001b[0m \u001b[43m \u001b[49m\u001b[43mplot_flag\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mplot_flag\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 243\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mval_batch_idx \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 244\u001b[0m valid_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m loss\u001b[38;5;241m.\u001b[39mdetach()\u001b[38;5;241m.\u001b[39mnumpy()\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:620\u001b[0m, in \u001b[0;36mRNNLMScratch.validation_step\u001b[0;34m(self, batch, plot_flag)\u001b[0m\n\u001b[1;32m 619\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mvalidation_step\u001b[39m(\u001b[38;5;28mself\u001b[39m, batch, plot_flag\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m--> 620\u001b[0m l \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloss\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mbatch\u001b[49m\u001b[43m[\u001b[49m\u001b[43m:\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbatch\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 621\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m plot_flag:\n\u001b[1;32m 622\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mplot(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mppl\u001b[39m\u001b[38;5;124m'\u001b[39m, torch\u001b[38;5;241m.\u001b[39mexp(l), train\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
"File \u001b[0;32m~/work/d2l_solutions/notebooks/exercises/d2l_utils/d2l.py:362\u001b[0m, in \u001b[0;36mClassifier.loss\u001b[0;34m(self, y_hat, y, averaged)\u001b[0m\n\u001b[1;32m 360\u001b[0m y_hat \u001b[38;5;241m=\u001b[39m y_hat\u001b[38;5;241m.\u001b[39mreshape((\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, y_hat\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 361\u001b[0m y \u001b[38;5;241m=\u001b[39m y\u001b[38;5;241m.\u001b[39mreshape((\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m,))\n\u001b[0;32m--> 362\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_hat\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mreduction\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mmean\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m \u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43maveraged\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mnone\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m~/.local/lib/python3.11/site-packages/torch/nn/functional.py:3029\u001b[0m, in \u001b[0;36mcross_entropy\u001b[0;34m(input, target, weight, size_average, ignore_index, reduce, reduction, label_smoothing)\u001b[0m\n\u001b[1;32m 3027\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m size_average \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mor\u001b[39;00m reduce \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3028\u001b[0m reduction \u001b[38;5;241m=\u001b[39m _Reduction\u001b[38;5;241m.\u001b[39mlegacy_get_string(size_average, reduce)\n\u001b[0;32m-> 3029\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mtorch\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_C\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_nn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcross_entropy_loss\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_Reduction\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_enum\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreduction\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mignore_index\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabel_smoothing\u001b[49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"