{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Vanilla Recurrent Neural Network\n", "
\n", "Character level implementation of vanilla recurrent neural network" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Import dependencies" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "nbpresent": { "id": "c14f5fc3-7ae4-4775-aca6-4e89462d83df" } }, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "812c485b-1558-4c42-bf28-17cf898d8049" } }, "source": [ "## Parameters Initialization" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true, "nbpresent": { "id": "a1126408-fd54-4d9e-a169-b248b7656a36" } }, "outputs": [], "source": [ "def initialize_parameters(hidden_size, vocab_size):\n", " '''\n", " Returns:\n", " parameters -- a tuple of network parameters\n", " adagrad_mem_vars -- a tuple of mem variables required for adagrad update\n", " '''\n", " Wxh = np.random.randn(hidden_size, vocab_size) * 0.01\n", " Whh = np.random.randn(hidden_size, hidden_size) * 0.01\n", " Why = np.random.randn(vocab_size, hidden_size) * 0.01\n", " bh = np.zeros([hidden_size, 1])\n", " by = np.zeros([vocab_size, 1])\n", "\n", " mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)\n", " mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad\n", " parameter = (Wxh, Whh, Why, bh, by)\n", " adagrad_mem_vars = (mWxh, mWhh, mWhy, mbh, mby)\n", " \n", " return (parameter, adagrad_mem_vars)" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "fd3b570f-140d-4209-98cb-105a062f0cd7" } }, "source": [ "## Forward Propogation" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def softmax(X):\n", " t = np.exp(X)\n", " return t / np.sum(t, axis=0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true, "nbpresent": { "id": "965c3245-fdbd-4049-8905-98a0c913f525" } }, "outputs": [], "source": [ "def forward_propogation(X, parameters, seq_length, hprev):\n", " '''\n", " Implement the forward propogation in the network\n", "\n", " Arguments:\n", " X -- input to the network\n", " parameters -- a tuple containing weights and biases of the network\n", " seq_length -- length of sequence of input\n", " hprev -- previous hidden state\n", "\n", " Returns:\n", " caches -- tuple of activations and hidden states for each step of forward prop\n", " '''\n", "\n", " caches = {}\n", " caches['h0'] = np.copy(hprev)\n", " Wxh, Whh, Why, bh, by = parameters\n", " for i in range(seq_length):\n", " x = X[i].reshape(vocab_size, 1) \n", " ht = np.tanh(np.dot(Whh, caches['h' + str(i)]) + np.dot(Wxh, x) + bh)\n", " Z = np.dot(Why, ht) + by\n", " A = softmax(Z)\n", " caches['A' + str(i+1)] = A\n", " caches['h' + str(i+1)] = ht\n", " return caches" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "c4713b4b-8f80-42e8-b0e4-658123b3a73f" } }, "source": [ "## Cost Computation" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true, "nbpresent": { "id": "938d86d0-a5a7-4a5f-9ef8-8e349fb78f04" } }, "outputs": [], "source": [ "def compute_cost(Y, caches):\n", " \"\"\"\n", " Implement the cost function for the network\n", "\n", " Arguments:\n", " Y -- true \"label\" vector, shape (vocab_size, number of examples)\n", " caches -- tuple of activations and hidden states for each step of forward prop\n", "\n", " Returns:\n", " cost -- cross-entropy cost\n", " \"\"\"\n", "\n", " seq_length = len(caches) // 2\n", " cost = 0\n", " for i in range(seq_length):\n", " y = Y[i].reshape(vocab_size, 1)\n", " cost += - np.sum(y * np.log(caches['A' + str(i+1)]))\n", " return np.squeeze(cost)" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "4c54d363-8bad-4c18-8882-3b1ed3c4b7f1" } }, "source": [ "## Backward Propogation" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true, "nbpresent": { "id": "9d525583-9a97-44aa-940d-94b28dd39f29" } }, "outputs": [], "source": [ "def backward_propogation(X, Y, caches, parameters):\n", " '''\n", " Implement Backpropogation\n", "\n", " Arguments:\n", " Al -- Activations of last layer\n", " Y -- True labels of data\n", " caches -- tuple containing values of `A` and `h` for each char in forward prop\n", " parameters -- tuple containing parameters of the network\n", "\n", " Returns\n", " grads -- tuple containing gradients of the network parameters\n", " '''\n", "\n", " Wxh, Whh, Why, bh, by = parameters\n", "\n", " dWhh, dWxh, dWhy = np.zeros_like(Whh), np.zeros_like(Wxh), np.zeros_like(Why)\n", " dbh, dby = np.zeros_like(bh), np.zeros_like(by)\n", " dhnext = np.zeros_like(caches['h0']) \n", "\n", " seq_length = len(caches) // 2\n", "\n", " for i in reversed(range(seq_length)):\n", " y = Y[i].reshape(vocab_size, 1)\n", " x = X[i].reshape(vocab_size, 1)\n", " dZ = np.copy(caches['A' + str(i+1)]) - y\n", " dWhy += np.dot(dZ, caches['h' + str(i+1)].T)\n", " dby += dZ \n", " dht = np.dot(Why.T, dZ) + dhnext\n", " dhraw = dht * (1 - caches['h' + str(i+1)] * caches['h' + str(i+1)])\n", " dbh += dhraw\n", " dWhh += np.dot(dhraw, caches['h' + str(i)].T)\n", " dWxh += np.dot(dhraw, x.T)\n", " dhnext = np.dot(Whh.T, dhraw)\n", "\n", " for dparam in [dWxh, dWhh, dWhy, dbh, dby]:\n", " np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients\n", "\n", " grads = (dWxh, dWhh, dWhy, dbh, dby)\n", " return grads" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "1736f426-5cad-44ad-92bc-fc6ccc6758db" } }, "source": [ "## Parameters Update" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true, "nbpresent": { "id": "ef33b53a-de6b-4fb5-9111-508c9183f35f" } }, "outputs": [], "source": [ "def update_parameters(parameters, grads, adagrad_mem_vars, learning_rate):\n", " '''\n", " Update parameters of the network using Adagrad update\n", "\n", " Arguments:\n", " paramters -- tuple containing weights and biases of the network\n", " grads -- tuple containing the gradients of the parameters\n", " learning_rate -- rate of adagrad update\n", "\n", " Returns\n", " parameters -- tuple containing updated parameters\n", " '''\n", "\n", " a = np.copy(parameters[0])\n", " for param, dparam, mem in zip(parameters, grads, adagrad_mem_vars):\n", " mem += dparam * dparam\n", " param -= learning_rate * dparam / np.sqrt(mem + 1e-8) # adagrad update\n", "\n", " return (parameters, adagrad_mem_vars)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Sample text from model" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def print_sample(ht, seed_ix, n, parameters):\n", " \"\"\" \n", " Samples a sequence of integers from the model.\n", " \n", " Arguments\n", " ht -- memory state\n", " seed_ix --seed letter for first time step\n", " n -- number of chars to extract\n", " parameters -- tuple containing network weights and biases\n", " \"\"\"\n", " Wxh, Whh, Why, bh, by = parameters\n", " x = np.eye(vocab_size)[seed_ix].reshape(vocab_size, 1)\n", " ixes = []\n", " for t in range(n):\n", " ht = np.tanh(np.dot(Wxh, x) + np.dot(Whh, ht) + bh)\n", " y = np.dot(Why, ht) + by\n", " p = np.exp(y) / np.sum(np.exp(y))\n", " ix = np.random.choice(range(vocab_size), p=p.ravel()) ### why not argmax of p??\n", " x = np.eye(vocab_size)[ix].reshape(vocab_size, 1)\n", " ixes.append(ix)\n", " \n", " txt = ''.join(ix_to_char[ix] for ix in ixes)\n", " print('----\\n %s \\n----' % txt)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_one_hot(p, char_to_ix, data, vocab_size):\n", " '''\n", " Gets indexes of chars of `seq_length` from `data`, returns them in one hot representation\n", " '''\n", " inputs = [char_to_ix[ch] for ch in data[p:p+seq_length]]\n", " targets = [char_to_ix[ch] for ch in data[p+1:p+seq_length+1]]\n", " X = np.eye(vocab_size)[inputs]\n", " Y = np.eye(vocab_size)[targets]\n", " return X, Y" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "399cefc4-a924-41c2-994a-43902692ff76" } }, "source": [ "## Model" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "nbpresent": { "id": "237535d0-fd38-4421-baaf-592076b72e67" } }, "outputs": [], "source": [ "def Model(data, seq_length, lr, char_to_ix, ix_to_char, num_of_iterations):\n", " '''\n", " Train RNN model and return trained parameters\n", " '''\n", " parameters, adagrad_mem_vars = initialize_parameters(hidden_size, vocab_size)\n", " costs = []\n", " n, p = 0, 0\n", " smooth_loss = -np.log(1.0 / vocab_size) * seq_length\n", " while n < num_of_iterations:\n", " if p + seq_length + 1 >= len(data) or n == 0: \n", " hprev = np.zeros((hidden_size, 1)) # reset RNN memory\n", " p = 0 # go from start of data\n", "\n", " X, Y = get_one_hot(p, char_to_ix, data, vocab_size)\n", " caches = forward_propogation(X, parameters, seq_length, hprev)\n", " cost = compute_cost(Y, caches)\n", " grads = backward_propogation(X, Y, caches, parameters)\n", " parameters, adagrad_mem_vars = update_parameters(parameters, grads, adagrad_mem_vars, lr)\n", " smooth_loss = smooth_loss * 0.999 + cost * 0.001\n", "\n", " if n % 1000 == 0:\n", " print_sample(hprev, char_to_ix['a'], 200, parameters)\n", " print('Iteration: %d -- Cost: %0.3f' % (n, smooth_loss))\n", "\n", " costs.append(cost)\n", " hprev = caches['h' + str(seq_length)]\n", " n+=1\n", " p+=seq_length\n", "\n", " plt.plot(costs)\n", " return parameters" ] }, { "cell_type": "markdown", "metadata": { "nbpresent": { "id": "b12fa819-ef1c-468d-ac07-184adeb85519" } }, "source": [ "## Implementing the model on a text" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "nbpresent": { "id": "5457c250-fd02-466b-8286-02b5749b757b" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "data has 748 characters, 42 unique.\n" ] } ], "source": [ "data = open('data/text-data.txt', 'r').read() # read a text file\n", "chars = list(set(data)) # vocabulary\n", "data_size, vocab_size = len(data), len(chars)\n", "print ('data has %d characters, %d unique.' % (data_size, vocab_size))\n", "char_to_ix = { ch:i for i,ch in enumerate(chars) } # maps char to it's index in vocabulary\n", "ix_to_char = { i:ch for i,ch in enumerate(chars) } # maps index in vocabular to corresponding character" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": true, "nbpresent": { "id": "c551d248-6228-48ec-83cf-d6ec46639245" } }, "outputs": [], "source": [ "# hyper-parameters\n", "learning_rate = 0.1\n", "hidden_size = 100\n", "seq_length = 25\n", "num_of_iterations = 20000" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "nbpresent": { "id": "b737c4a1-2309-4dbf-9897-4b564bb2497f" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "----\n", " Schu\n", "kwoj!wi nRA—— wj w .ycHrrhqagT:noh:ahvyqkSAnwpNtNTfpnk;hnnN\n", "YIuprpNto:ThHYmcwdwYRYldcTaNmR\n", "fkm!swem!\n", "jnBqb iTIp,g,,v\n", " qvw fkwS;g;!qmcBbicAlY;nbbIkwHYO:IsfhT :wl\n", "—e!eI\n", ".wwuoI—esSq—BOOcS\n", ",mOyBIT;;u \n", "----\n", "Iteration: 0 -- Cost: 93.442\n", "----\n", " s wheng thert s dot bn,\n", "B. wor TtoTar;\n", "Had .asshThanb,\n", "And as tha Telethabgas wberg penmerg te th;A,\n", "\n", "Iy Sar me:\n", "Thoudd herg d aberas and Then\n", "Tnt ps waaverornasiactham!,\n", "And bastevemer,r wing thitmth \n", "----\n", "Iteration: 1000 -- Cost: 68.503\n", "----\n", " veler anotrgr, ben,\n", "I d agep had I clent there thallther iak lecI stok.\n", "\n", "kere anis I shotheredint thavelest aseme way lealling per ais ay I dad\n", "Ind baas tha siverged ia,\n", "mar tredeelay,\n", "And shadow tod \n", "----\n", "Iteration: 2000 -- Cost: 39.597\n", "----\n", " ksithet I bethathen betherusd we panted in a ore that fornind lowked bowd if bether keother waAna s wond themod the bess be he in trould woore as fir aiTh\n", "And that Tas gre thathen boastougheramou the \n", "----\n", "Iteration: 3000 -- Cost: 22.733\n", "----\n", " gevelgarsing thes\n", "Theniem\n", "Thotroa to way,\n", "Iedt bthe ind to ssy,\n", "I shadowe kno dnas marsi\n", "Tsher win oon toould now trn took the passing there\n", "Had worn them really about the sas morn loodstoowing the te \n", "----\n", "Iteration: 4000 -- Cost: 11.318\n", "----\n", " d woads ind hewinr eng\n", "I d way leavelena t—ng in the one in thewother, as just as for that the passingrt wen toows ay,\n", "Iedt ow morn wt pe the better claim,\n", "Back.\n", "Oha d took the one less traved both\n", "An \n", "----\n", "Iteration: 5000 -- Cost: 6.830\n", "----\n", " delling the traveled by len woore it lent that the passing there\n", "Had worn them rhavesuho ben wit len wood,\n", "And sorry I could notstr asshaithen the on the baps the bether corh bent in themu juh way lea \n", "----\n", "Iteration: 6000 -- Cost: 3.523\n", "----\n", " rre besep took beavellea by,\n", "I sherdent the pit Inclaps sre pas graveler, the be owassed lot I soo\n", "Thoughthom enghpa wavilge aive\n", "Hac pin banr,\n", "And he Tan\n", "Tad gas owe in then ae\n", "I s gratshacops theh a \n", "----\n", "Iteration: 7000 -- Cost: 4.619\n", "----\n", " nd ages hence:\n", "Tso rowllong s morhaps the better claim,\n", "Becaus in theh the one thalllouvish the in the aook the on the undergrowth;\n", "\n", "shewans wans weer in ates hads wiverged in a yellow wood,\n", "And tha \n", "----\n", "Iteration: 8000 -- Cost: 4.595\n", "----\n", " ss len len as trot troddyow yreasI wantes rnat hassy,\n", "And en t ans ay In leakessharhads ohr cn bear;\n", "And eors in bend t—e herher stous that mornith as for thaviverged in a yellow wood,\n", "And sorry I do \n", "----\n", "Iteration: 9000 -- Cost: 2.303\n", "----\n", " g toode orl that morning equallyhea—ss way leads oir could notr;\n", "And that has marsing this with a sigh\n", "Somewhere ages and ages hence:\n", "Two roads diverged in a yellow wood,\n", "And sorry I coubd\n", "Tore agss i \n", "----\n", "Iteration: 10000 -- Cost: 1.423\n", "----\n", " st as fair,\n", "And having perhaps the better claim,\n", "Because it was grassy and wanted wear;\n", "Thoubd by,\n", "And looked down one as far as I could\n", "To where it bent in the undergrowth;\n", "\n", "Then took the other, as j \n", "----\n", "Iteration: 11000 -- Cost: 0.915\n", "----\n", " share I—\n", "I took the one less traveled by,\n", "And that has mareishen\n", "\n", "I challlas lasen ino ste traveler, look the one lllenithen has mirsin ans lookes dn way\n", "In leaves n thet the pnd worn akep had tre th \n", "----\n", "Iteration: 12000 -- Cost: 0.672\n", "----\n", " get as I cops the passing thing in she beate\n", "Talen that morrent ing hanted wear;\n", "Though as for that the passing there\n", "Had worn them really about the same,\n", "\n", "And both that morning equally lay\n", "In leaves \n", "----\n", "Iteration: 13000 -- Cost: 0.545\n", "----\n", " s firr ads that yh wanted waarn tookethea ben wood,\n", "And having pe traveled by,\n", "And that has marsing thirhaps the better claim,\n", "Because it was grassy and worked dged wanteigrlenges I corhabem\n", "The on to \n", "----\n", "Iteration: 14000 -- Cost: 0.471\n", "----\n", " nt be teelsherhept the passing that good the one less traveled by,\n", "And that has maisith\n", "And that has hais len bead wink on tood,eaubour air,\n", "And wayd\n", "Iges hadcld told,eduslero boad way lec way!\n", "Yet Tw \n", "----\n", "Iteration: 15000 -- Cost: 0.426\n", "----\n", " s mareass woore in way\n", "\n", "And both that morning equally lro the und th tond woollin with a sigh\n", "Somewhere ages and ages hence:\n", "Twough as fan traveled by,\n", "And that has mar ing this with a sigh\n", "Somewhere \n", "----\n", "Iteration: 16000 -- Cost: 6.138\n", "----\n", " ing and ages hence:\n", "Two roads diverged in a yellow wood,\n", "And sorn th wbeagel and way,\n", "I doubted if I should ever aly Ih th w ynl and both that marling there\n", "Had worn them really about the same,\n", "\n", "And b \n", "----\n", "Iteration: 17000 -- Cost: 2.683\n", "----\n", " s mas merhith t morning equr way I could\n", "Tore and aing ing thass ond both thet the better claim,\n", "Because ing thaveler bing pass anin b abe the und worn woorsing horn lookst heages\n", "Tokept the first for \n", "----\n", "Iteration: 18000 -- Cost: 1.299\n", "----\n", " d looked bout the pes it pe that morning equally lay\n", "In leaves no step had trodden black.\n", "Oh, I kept traver dtep had traveled by,\n", "And that has marsithit the one traveler, long I stood\n", "And looked down \n", "----\n", "Iteration: 19000 -- Cost: 1.071\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "parameters = Model(data, seq_length, learning_rate, char_to_ix, ix_to_char, num_of_iterations)" ] } ], "metadata": { "kernelspec": { "display_name": "Python [conda env:ML]", "language": "python", "name": "conda-env-ML-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" }, "widgets": { "state": {}, "version": "1.1.2" } }, "nbformat": 4, "nbformat_minor": 2 }