{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"# Concise Implementation of Recurrent Neural Networks"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T23:01:16.582933Z",
"start_time": "2019-07-03T23:01:13.502104Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "1"
}
},
"outputs": [],
"source": [
"import d2l\n",
"import math\n",
"from mxnet import gluon, init, np, npx\n",
"from mxnet.gluon import nn, rnn\n",
"npx.set_np()\n",
"\n",
"batch_size, num_steps = 32, 35\n",
"train_iter, vocab = d2l.load_data_time_machine(batch_size, num_steps)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Creating a RNN layer with 256 hidden units."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T23:01:16.591409Z",
"start_time": "2019-07-03T23:01:16.585714Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "26"
}
},
"outputs": [],
"source": [
"rnn_layer = rnn.RNN(256)\n",
"rnn_layer.initialize()"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Initializing the hidden state."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T23:01:16.599071Z",
"start_time": "2019-07-03T23:01:16.593543Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "37"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(1, (1, 1, 256))"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"state = rnn_layer.begin_state(batch_size=1)\n",
"len(state), state[0].shape"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Defining a class to wrap the RNN layers"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T23:01:16.611592Z",
"start_time": "2019-07-03T23:01:16.601094Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "39"
}
},
"outputs": [],
"source": [
"class RNNModel(nn.Block):\n",
" def __init__(self, rnn_layer, vocab_size, **kwargs):\n",
" super(RNNModel, self).__init__(**kwargs)\n",
" self.rnn = rnn_layer\n",
" self.vocab_size = vocab_size\n",
" self.dense = nn.Dense(vocab_size)\n",
"\n",
" def forward(self, inputs, state):\n",
" X = npx.one_hot(inputs.T, self.vocab_size)\n",
" Y, state = self.rnn(X, state)\n",
" # The fully connected layer will first change the shape of Y to\n",
" # (num_steps * batch_size, num_hiddens)\n",
" # Its output shape is (num_steps * batch_size, vocab_size)\n",
" output = self.dense(Y.reshape((-1, Y.shape[-1])))\n",
" return output, state\n",
"\n",
" def begin_state(self, *args, **kwargs):\n",
" return self.rnn.begin_state(*args, **kwargs)"
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Training"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-03T23:02:13.528517Z",
"start_time": "2019-07-03T23:01:16.616773Z"
},
"attributes": {
"classes": [],
"id": "",
"n": "42"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Perplexity 1.2, 158013 tokens/sec on gpu(0)\n",
"time traveller you can show black is white by argument said fil\n",
"traveller after the pauserequired for the little go the geo\n"
]
},
{
"data": {
"image/svg+xml": [
"\n",
"\n",
"\n",
"\n"
],
"text/plain": [
"