{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "4b24e71b-8ae9-4b95-8ab6-52f027398e9a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import sys\n",
"import torch.nn as nn\n",
"import torch\n",
"import warnings\n",
"import numpy as np\n",
"from sklearn.model_selection import ParameterGrid\n",
"sys.path.append('/home/jovyan/work/d2l_solutions/notebooks/exercises/d2l_utils/')\n",
"import d2l\n",
"from torchsummary import summary\n",
"warnings.filterwarnings(\"ignore\")\n",
"\n",
"class Seq2SeqEncoder(d2l.Encoder): #@save\n",
" \"\"\"The RNN encoder for sequence-to-sequence learning.\"\"\"\n",
" def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,\n",
" dropout=0):\n",
" super().__init__()\n",
" self.embedding = nn.Embedding(vocab_size, embed_size)\n",
" self.rnn = d2l.GRU(embed_size, num_hiddens, num_layers, dropout)\n",
" self.apply(init_seq2seq)\n",
"\n",
" def forward(self, X, *args):\n",
" # X shape: (batch_size, num_steps)\n",
" embs = self.embedding(X.t().type(torch.int64))\n",
" # embs shape: (num_steps, batch_size, embed_size)\n",
" outputs, state = self.rnn(embs)\n",
" # outputs shape: (num_steps, batch_size, num_hiddens)\n",
" # state shape: (num_layers, batch_size, num_hiddens)\n",
" return outputs, state\n",
" \n",
"class Seq2SeqDecoder(d2l.Decoder):\n",
" \"\"\"The RNN decoder for sequence to sequence learning.\"\"\"\n",
" def __init__(self, vocab_size, embed_size, num_hiddens, num_layers,\n",
" dropout=0):\n",
" super().__init__()\n",
" self.embedding = nn.Embedding(vocab_size, embed_size)\n",
" self.rnn = d2l.GRU(embed_size+num_hiddens, num_hiddens,\n",
" num_layers, dropout)\n",
" self.dense = nn.LazyLinear(vocab_size)\n",
" self.apply(init_seq2seq)\n",
"\n",
" def init_state(self, enc_all_outputs, *args):\n",
" return enc_all_outputs\n",
"\n",
" def forward(self, X, state):\n",
" # X shape: (batch_size, num_steps)\n",
" # embs shape: (num_steps, batch_size, embed_size)\n",
" embs = self.embedding(X.t().type(torch.int32))\n",
" enc_output, hidden_state = state\n",
" # context shape: (batch_size, num_hiddens)\n",
" context = enc_output[-1]\n",
" # Broadcast context to (num_steps, batch_size, num_hiddens)\n",
" context = context.repeat(embs.shape[0], 1, 1)\n",
" # Concat at the feature dimension\n",
" embs_and_context = torch.cat((embs, context), -1)\n",
" # print(embs_and_context.shape,len(hidden_state))\n",
" outputs, hidden_state = self.rnn(embs_and_context, hidden_state)\n",
" outputs = self.dense(outputs).swapaxes(0, 1)\n",
" # outputs shape: (batch_size, num_steps, vocab_size)\n",
" # hidden_state shape: (num_layers, batch_size, num_hiddens)\n",
" return outputs, [enc_output, hidden_state]\n",
" \n",
"class Seq2Seq(d2l.EncoderDecoder): #@save\n",
" \"\"\"The RNN encoder--decoder for sequence to sequence learning.\"\"\"\n",
" def __init__(self, encoder, decoder, tgt_pad, lr):\n",
" super().__init__(encoder, decoder)\n",
" self.save_hyperparameters()\n",
" \n",
" def loss(self, Y_hat, Y):\n",
" l = super(Seq2Seq, self).loss(Y_hat, Y, averaged=False)\n",
" mask = (Y.reshape(-1) != self.tgt_pad).type(torch.float32)\n",
" return (l * mask).sum() / mask.sum()\n",
"\n",
" def validation_step(self, batch, plot_flag=True):\n",
" Y_hat = self(*batch[:-1])\n",
" l = self.loss(Y_hat, batch[-1])\n",
" if plot_flag:\n",
" self.plot('loss', l, train=False)\n",
" return l\n",
"\n",
" def configure_optimizers(self):\n",
" # Adam optimizer is used here\n",
" return torch.optim.Adam(self.parameters(), lr=self.lr)\n",
" \n",
"def init_seq2seq(module): #@save\n",
" \"\"\"Initialize weights for sequence-to-sequence learning.\"\"\"\n",
" if type(module) == nn.Linear:\n",
" nn.init.xavier_uniform_(module.weight)\n",
" if type(module) == nn.GRU:\n",
" for param in module._flat_weights_names:\n",
" if \"weight\" in param:\n",
" nn.init.xavier_uniform_(module._parameters[param])\n",
"\n",
"def stat_val(model, data):\n",
" ppls = []\n",
" for batch in iter(data.get_dataloader(False)):\n",
" ppls.append(model.validation_step(batch, plot_flag=False).detach().numpy())\n",
" return np.exp(np.mean(ppls))\n",
"\n",
"def experiment(model, data):\n",
" trainer = d2l.Trainer(max_epochs=30, gradient_clip_val=1, num_gpus=0)\n",
" trainer.fit(model, data)\n",
" return stat_val(model, data)"
]
},
{
"cell_type": "markdown",
"id": "c8b8a3ca-c83b-4883-bc21-be3eb945e7d3",
"metadata": {},
"source": [
"# 1. Can you adjust the hyperparameters to improve the translation results?"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "4c9090b0-dcf6-4aa5-b56a-865c9588bac2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading ../data/fra-eng.zip from http://d2l-data.s3-accelerate.amazonaws.com/fra-eng.zip...\n"
]
}
],
"source": [
"data = d2l.MTFraEng(batch_size=128)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35928e47-55fd-4bb2-9530-714b10d00dbc",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
"