{ "cells": [ { "cell_type": "markdown", "id": "e144c753", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "# Deep Recurrent Neural Networks\n", "\n" ] }, { "cell_type": "code", "execution_count": 4, "id": "dad8438b", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:48:12.003936Z", "iopub.status.busy": "2023-08-18T19:48:12.003560Z", "iopub.status.idle": "2023-08-18T19:52:08.500314Z", "shell.execute_reply": "2023-08-18T19:52:08.499056Z" }, "origin_pos": 13, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import torch\n", "from torch import nn\n", "from d2l import torch as d2l\n", "\n", "class StackedRNNScratch(d2l.Module):\n", " def __init__(self, num_inputs, num_hiddens, num_layers, sigma=0.01):\n", " super().__init__()\n", " self.save_hyperparameters()\n", " self.rnns = nn.Sequential(*[d2l.RNNScratch(\n", " num_inputs if i==0 else num_hiddens, num_hiddens, sigma)\n", " for i in range(num_layers)])\n", "\n", "@d2l.add_to_class(StackedRNNScratch)\n", "def forward(self, inputs, Hs=None):\n", " outputs = inputs\n", " if Hs is None: Hs = [None] * self.num_layers\n", " for i in range(self.num_layers):\n", " outputs, Hs[i] = self.rnns[i](outputs, Hs[i])\n", " outputs = torch.stack(outputs, 0)\n", " return outputs, Hs\n", "\n", "data = d2l.TimeMachine(batch_size=1024, num_steps=32)\n", "rnn_block = StackedRNNScratch(num_inputs=len(data.vocab),\n", " num_hiddens=32, num_layers=2)\n", "model = d2l.RNNLMScratch(rnn_block, vocab_size=len(data.vocab), lr=2)\n", "trainer = d2l.Trainer(max_epochs=100, gradient_clip_val=1, num_gpus=1)\n", "trainer.fit(model, data)" ] }, { "cell_type": "code", "execution_count": 5, "id": "8d2a6b50", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:52:08.519473Z", "iopub.status.busy": "2023-08-18T19:52:08.519078Z", "iopub.status.idle": "2023-08-18T19:52:08.525324Z", "shell.execute_reply": "2023-08-18T19:52:08.524335Z" }, "origin_pos": 18, "tab": [ "pytorch" ] }, "outputs": [], "source": [ "class GRU(d2l.RNN): \n", " \"\"\"The multilayer GRU model.\"\"\"\n", " def __init__(self, num_inputs, num_hiddens, num_layers, dropout=0):\n", " d2l.Module.__init__(self)\n", " self.save_hyperparameters()\n", " self.rnn = nn.GRU(num_inputs, num_hiddens, num_layers,\n", " dropout=dropout)" ] }, { "cell_type": "markdown", "id": "e4753300", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "Select a nontrivial number of hidden layers \n", "by specifying the value of `num_layers`" ] }, { "cell_type": "code", "execution_count": 6, "id": "e4201eec", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:52:08.528774Z", "iopub.status.busy": "2023-08-18T19:52:08.528499Z", "iopub.status.idle": "2023-08-18T19:55:24.406556Z", "shell.execute_reply": "2023-08-18T19:55:24.405655Z" }, "origin_pos": 23, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "image/svg+xml": [ "\n", "\n", "\n" ], "text/plain": [ "

" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "gru = GRU(num_inputs=len(data.vocab), num_hiddens=32, num_layers=2)\n", "model = d2l.RNNLM(gru, vocab_size=len(data.vocab), lr=2)\n", "trainer.fit(model, data)" ] }, { "cell_type": "code", "execution_count": 7, "id": "d1f034f9", "metadata": { "execution": { "iopub.execute_input": "2023-08-18T19:55:24.426604Z", "iopub.status.busy": "2023-08-18T19:55:24.425906Z", "iopub.status.idle": "2023-08-18T19:55:24.462233Z", "shell.execute_reply": "2023-08-18T19:55:24.461393Z" }, "origin_pos": 24, "tab": [ "pytorch" ] }, "outputs": [ { "data": { "text/plain": [ "'it has for and the time th'" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.predict('it has', 20, data.vocab, d2l.try_gpu())" ] } ], "metadata": { "celltoolbar": "Slideshow", "language_info": { "name": "python" }, "required_libs": [], "rise": { "autolaunch": true, "enable_chalkboard": true, "overlay": "

", "scroll": true } }, "nbformat": 4, "nbformat_minor": 5 }