{
"cells": [
{
"cell_type": "markdown",
"id": "e144c753",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"# Deep Recurrent Neural Networks\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "dad8438b",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:48:12.003936Z",
"iopub.status.busy": "2023-08-18T19:48:12.003560Z",
"iopub.status.idle": "2023-08-18T19:52:08.500314Z",
"shell.execute_reply": "2023-08-18T19:52:08.499056Z"
},
"origin_pos": 13,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import torch\n",
"from torch import nn\n",
"from d2l import torch as d2l\n",
"\n",
"class StackedRNNScratch(d2l.Module):\n",
" def __init__(self, num_inputs, num_hiddens, num_layers, sigma=0.01):\n",
" super().__init__()\n",
" self.save_hyperparameters()\n",
" self.rnns = nn.Sequential(*[d2l.RNNScratch(\n",
" num_inputs if i==0 else num_hiddens, num_hiddens, sigma)\n",
" for i in range(num_layers)])\n",
"\n",
"@d2l.add_to_class(StackedRNNScratch)\n",
"def forward(self, inputs, Hs=None):\n",
" outputs = inputs\n",
" if Hs is None: Hs = [None] * self.num_layers\n",
" for i in range(self.num_layers):\n",
" outputs, Hs[i] = self.rnns[i](outputs, Hs[i])\n",
" outputs = torch.stack(outputs, 0)\n",
" return outputs, Hs\n",
"\n",
"data = d2l.TimeMachine(batch_size=1024, num_steps=32)\n",
"rnn_block = StackedRNNScratch(num_inputs=len(data.vocab),\n",
" num_hiddens=32, num_layers=2)\n",
"model = d2l.RNNLMScratch(rnn_block, vocab_size=len(data.vocab), lr=2)\n",
"trainer = d2l.Trainer(max_epochs=100, gradient_clip_val=1, num_gpus=1)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "8d2a6b50",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:52:08.519473Z",
"iopub.status.busy": "2023-08-18T19:52:08.519078Z",
"iopub.status.idle": "2023-08-18T19:52:08.525324Z",
"shell.execute_reply": "2023-08-18T19:52:08.524335Z"
},
"origin_pos": 18,
"tab": [
"pytorch"
]
},
"outputs": [],
"source": [
"class GRU(d2l.RNN): \n",
" \"\"\"The multilayer GRU model.\"\"\"\n",
" def __init__(self, num_inputs, num_hiddens, num_layers, dropout=0):\n",
" d2l.Module.__init__(self)\n",
" self.save_hyperparameters()\n",
" self.rnn = nn.GRU(num_inputs, num_hiddens, num_layers,\n",
" dropout=dropout)"
]
},
{
"cell_type": "markdown",
"id": "e4753300",
"metadata": {
"slideshow": {
"slide_type": "-"
}
},
"source": [
"Select a nontrivial number of hidden layers \n",
"by specifying the value of `num_layers`"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "e4201eec",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:52:08.528774Z",
"iopub.status.busy": "2023-08-18T19:52:08.528499Z",
"iopub.status.idle": "2023-08-18T19:55:24.406556Z",
"shell.execute_reply": "2023-08-18T19:55:24.405655Z"
},
"origin_pos": 23,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gru = GRU(num_inputs=len(data.vocab), num_hiddens=32, num_layers=2)\n",
"model = d2l.RNNLM(gru, vocab_size=len(data.vocab), lr=2)\n",
"trainer.fit(model, data)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d1f034f9",
"metadata": {
"execution": {
"iopub.execute_input": "2023-08-18T19:55:24.426604Z",
"iopub.status.busy": "2023-08-18T19:55:24.425906Z",
"iopub.status.idle": "2023-08-18T19:55:24.462233Z",
"shell.execute_reply": "2023-08-18T19:55:24.461393Z"
},
"origin_pos": 24,
"tab": [
"pytorch"
]
},
"outputs": [
{
"data": {
"text/plain": [
"'it has for and the time th'"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.predict('it has', 20, data.vocab, d2l.try_gpu())"
]
}
],
"metadata": {
"celltoolbar": "Slideshow",
"language_info": {
"name": "python"
},
"required_libs": [],
"rise": {
"autolaunch": true,
"enable_chalkboard": true,
"overlay": "",
"scroll": true
}
},
"nbformat": 4,
"nbformat_minor": 5
}