{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# pytorch for generating music reviews" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "cuda.is_available: True\n", "available: 1; current: 0\n", "cuda:0\n", "pytorch 0.4.0\n" ] } ], "source": [ "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "\n", "print('cuda.is_available:', torch.cuda.is_available())\n", "print(f'available: {torch.cuda.device_count()}; current: {torch.cuda.current_device()}')\n", "DEVICE = torch.device(f'cuda:{torch.cuda.current_device()}' if torch.cuda.is_available() else 'cpu')\n", "print(DEVICE)\n", "print('pytorch', torch.__version__)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total word_count: 241026; char_count: 1417998\n" ] }, { "data": { "text/plain": [ "0 New Music\\n\\nMt. Joy reached out to us with th...\n", "1 Folk rockers Mt. Joy have debuted their new so...\n", "2 You know we're digging Mt. Joy.\\n\\nTheir new s...\n", "3 Nothing against the profession, but the U.S. h...\n", "4 Connecticut duo **Opia** have released a guita...\n", "Name: content, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import os\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "BASE_DIR = os.getcwd()\n", "DATA_DIR = os.path.join(BASE_DIR, '..', 'datasets')\n", "\n", "BLOG_CONTENT_FILE = os.path.join(DATA_DIR, f'blog_content_en_sample.json')\n", "BLOG_CONTENT_DF = pd.read_json(BLOG_CONTENT_FILE)\n", "print(f'total word_count: {sum(BLOG_CONTENT_DF.word_count)}; char_count: {sum([len(w) for w in BLOG_CONTENT_DF.content])}')\n", "BLOG_CONTENT_DF.head().content" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train_text word_count: 1113633; test_text word_count: 304365\n" ] } ], "source": [ "TRAIN_DF, TEST_DF = train_test_split(BLOG_CONTENT_DF, test_size=0.2, random_state=42)\n", "TRAIN_TEXT, TEST_TEXT = TRAIN_DF.content, TEST_DF.content\n", "print(f'train_text word_count: {sum([len(t) for t in TRAIN_TEXT])}; test_text word_count: {sum([len(t) for t in TEST_TEXT])}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Helpers" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "BPTT = 4 # like the 'n' in n-gram, or order\n", "BS = 512 # batch size\n", "EPOCHS = 5\n", "N_FAC = 42 # number of latent factors\n", "N_HIDDEN = 128" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def pad_start(bptt):\n", " return '\\0' * bptt" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "vocab_size: 70\n", "['\\x00', '\\n', ' ', '!', '\"', '#', '$', '%', '&', \"'\", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\\\', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']\n", "\n" ] } ], "source": [ "def create_inputs(texts_arr, print_info=False):\n", " # shuffle inputs\n", " texts_arr = texts_arr.sample(frac=1).reset_index(drop=True)\n", " \n", " # pad each new text with leading '\\0' so that we learn how to start\n", " # also, lowercase\n", " texts = ''.join([pad_start(BPTT) + text.lower() for text in texts_arr])\n", "\n", " chars = sorted(list(set(texts)))\n", " vocab_size = len(chars)\n", " if print_info:\n", " print('vocab_size:', vocab_size)\n", " print(chars)\n", " print()\n", "\n", " char_to_idx = {c: i for i, c in enumerate(chars)}\n", " idx_to_char = {i: c for i, c in enumerate(chars)}\n", "\n", " idx = [char_to_idx[text] for text in texts] \n", " return idx, vocab_size, char_to_idx, idx_to_char\n", "\n", "_, VOCAB_SIZE, _, _ = create_inputs(TRAIN_TEXT, True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import math\n", "import time\n", "\n", "def time_since(since):\n", " now = time.time()\n", " s = now - since\n", " m = math.floor(s / 60)\n", " s -= m * 60\n", " return f'{m}m {s:.0f}s'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# https://github.com/fastai/fastai/blob/master/fastai/nlp.py\n", "def batchify(data, bs):\n", " if bs == 1:\n", " return torch.tensor([[data[i+o] for i in range(len(data)-BPTT-1)] for o in range(BPTT+1)], dtype=torch.long, device=DEVICE)\n", " else:\n", " num = data.size(0) // bs\n", " data = data[:num*bs]\n", " # invalid argument 2: view size is not compatible with input tensor's size and stride (at least one dimension spans across two contiguous subspaces). Call .contiguous() before .view().\n", " return data.view(bs, -1).t().contiguous()\n", " \n", "\n", "def get_batch(data, i, seq_len):\n", " seq_len = min(seq_len, len(data) - 1 - i)\n", " return data[i:i+seq_len].to(DEVICE), data[i+1:i+1+seq_len].view(-1).to(DEVICE)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", " \n", "def plot_loss(losses):\n", " %matplotlib inline\n", " plt.figure()\n", " plt.plot(all_losses)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def batch_train(model, batches, optimizer, criterion=nn.CrossEntropyLoss(), bptt=BPTT):\n", " model.zero_grad()\n", " loss = 0\n", " \n", " for i in range(batches.size(0) - bptt):\n", " xs, ys = get_batch(batches, i, bptt)\n", " output = model(xs)\n", " loss += criterion(output, ys)\n", " \n", " loss.backward()\n", " if optimizer:\n", " optimizer.step()\n", " \n", " return loss.item() / (batches.size(0) - bptt)\n", "\n", "def batchless_train(model, batches, optimizer, start, print_every, char_to_idx, idx_to_char, seed='the ', max_sample_length=100, criterion=nn.CrossEntropyLoss(), bptt=BPTT):\n", " xs = np.stack(batches[:-1], axis=1) # history\n", " ys = np.stack(batches[-1:][0]) # target\n", "\n", " total_loss = torch.Tensor([0])\n", " for i in range(xs.shape[0]):\n", " model.zero_grad()\n", " output = model(torch.tensor(xs[i], dtype=torch.long, device=DEVICE))\n", "\n", " loss = criterion(output, torch.tensor([ys[i]], dtype=torch.long, device=DEVICE))\n", " \n", " loss.backward()\n", " if optimizer:\n", " optimizer.step()\n", " \n", " # Get the Python number from a 1-element Tensor by calling tensor.item()\n", " total_loss += loss.item()\n", " \n", " if i % print_every == 0:\n", " print(f'{time_since(start)} ({i} {i / xs.shape[0] * 100:.2f}%) {loss:.4f}')\n", " print(f'Epoch {i} sample:')\n", " sample(model, char_to_idx, idx_to_char, seed=seed, max_length=max_sample_length)\n", " \n", " return total_loss# / xs.shape[0]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def sample(model, char_to_idx, idx_to_char, seed=pad_start(BPTT), max_length=20, bptt=BPTT, sample=True):\n", " with torch.no_grad(): # no need to track history in sampling\n", " output_idx = [char_to_idx[c] for c in seed[-bptt:]]\n", "\n", " for i in range(max_length):\n", " h_idxs = torch.tensor(output_idx[-bptt:], dtype=torch.long, device=DEVICE).view(-1, 1)\n", " output = model(h_idxs.transpose(0,1))\n", " if sample:\n", " # sample from distribution\n", " idx = torch.multinomial(output[-1].exp(), 1).item()\n", " else:\n", " # get most probable\n", " topi = output.topk(1)[1]\n", " idx = topi[0][0]\n", " if idx == 0:\n", " break\n", " else:\n", " output_idx.append(idx)\n", "\n", " sample_text = ''.join([idx_to_char[i] for i in output_idx])\n", " print(sample_text)\n", " #print(output_idx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## with n-grams\n", "\n", "Another [n-gram music reviews](http://nbviewer.jupyter.org/github/iconix/openai/blob/master/nbs/n-gram%20music%20reviews.ipynb) model, implemented this time in PyTorch.\n", "\n", "Guiding PyTorch tutorial: [An Example: N-Gram Language Modeling](https://pytorch.org/tutorials/beginner/nlp/word_embeddings_tutorial.html#an-example-n-gram-language-modeling)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class NGramLanguageModel(nn.Module):\n", " \n", " def __init__(self, vocab_size, hidden_size, n_fac, bptt):\n", " super(NGramLanguageModel, self).__init__()\n", " \n", " self.embedding = nn.Embedding(vocab_size, n_fac)\n", " self.linear1 = nn.Linear(bptt * n_fac, hidden_size)\n", " self.linear2 = nn.Linear(hidden_size, vocab_size)\n", " \n", " def forward(self, inputs):\n", " inputs = self.embedding(inputs).view((1, -1))\n", " out = F.relu(self.linear1(inputs))\n", " out = self.linear2(out)\n", " return out" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0m 46s (0 0.00%) 4.4895\n", "Epoch 0 sample:\n", "the x=$=el1$\n", "8m 56s (500000 44.69%) 4.0249\n", "Epoch 500000 sample:\n", "the rne\n", ",n ef apdrmlggi entofs_.tis sa skrcutttta sd\"woearotcn*hvf *sno caliiengwsbuecfhuirrl wrsoabe'ua\n", "17m 6s (1000000 89.39%) 2.7267\n", "Epoch 1000000 sample:\n", "the t epfdynodoo *tlruesrs sdtahesl mrh ev tajoofiyg ihe\n", " tuoa he rnl m.lorsgagcn,ts vo so.essgao su\n", "19m 48s (0 0.00%) 2.7462\n", "Epoch 0 sample:\n", "the itrautcwllctahhdmranyanntonepoir er tteghssseayit \n", "kptn in vuih'nrnstwuiii n niumfd tes lerteueid/ae\n", "27m 59s (500000 44.69%) 3.0083\n", "Epoch 500000 sample:\n", "the hbff asemys tt.paelosrhulc -ihltaihmesmu nswer l doroldte\n", " oa w\n", "oaopnauskrrssthsut\n", " ak dhl einih a\n", "36m 10s (1000000 89.39%) 3.9739\n", "Epoch 1000000 sample:\n", "the emialsnao.h\n", " vte et,tsteew.rei ae ho*eodthdk a*t\n", " ss r aren**a d o\n", "mo'i es l a c ofah msariir wni \n", "38m 52s (0 0.00%) 3.6807\n", "Epoch 0 sample:\n", "the oyeailydrcsutm ,yo ls\"eft, rudl eoi thogs on secni.iibr'ey iyi eal\"\n", "toonn hks wtnm,l leoi vueseiedp\n", "47m 5s (500000 44.69%) 4.0389\n", "Epoch 500000 sample:\n", "the fu\n", "jn sottee chh seehllra takdsu gntc \"a todnnr\n", "nno\n", "i sagehn er noro e u nupa* seuee c rbaonup bbk\n", "55m 37s (1000000 89.39%) 2.9309\n", "Epoch 1000000 sample:\n", "the hpee\n", "igekeieietden'oi ascrataisise\n", "\n", "h\n", " tathh\n", ".yesaycsapeaek dhhealmotslde*v \n", "spaelymblons*o\n", "od ats\n", "58m 24s (0 0.00%) 1.9723\n", "Epoch 0 sample:\n" ] }, { "ename": "RuntimeError", "evalue": "cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch_1524586445097/work/aten/src/THC/generic/THCStorage.c:36", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------\u001b[0m", "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mngram\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mNGramLanguageModel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mVOCAB_SIZE\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mN_HIDDEN\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mN_FAC\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mBPTT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mDEVICE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0moptimizer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mAdam\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mngram\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mparameters\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlr\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.005\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mall_losses\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtrain_loop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mngram\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mTRAIN_TEXT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mplot_every\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprint_every\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m500000\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m\u001b[0m in \u001b[0;36mtrain_loop\u001b[0;34m(model, optimizer, text, batch_size, seed, max_sample_length, epochs, print_every, plot_every, criterion)\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0mbatches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatchify\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0midx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mDEVICE\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mbatch_size\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 12\u001b[0;31m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatchless_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatches\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprint_every\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchar_to_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx_to_char\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_sample_length\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprint_every\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 13\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[0mloss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mbatch_train\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatches\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcriterion\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36mbatchless_train\u001b[0;34m(model, batches, optimizer, start, print_every, char_to_idx, idx_to_char, seed, max_sample_length, criterion, bptt)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{time_since(start)} ({i} {i / xs.shape[0] * 100:.2f}%) {loss:.4f}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'Epoch {i} sample:'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 37\u001b[0;31m \u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mchar_to_idx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0midx_to_char\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mseed\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mseed\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmax_length\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_sample_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 39\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mtotal_loss\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mxs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m\u001b[0m in \u001b[0;36msample\u001b[0;34m(model, char_to_idx, idx_to_char, seed, max_length, bptt, sample)\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0msample\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# sample from distribution\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0midx\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmultinomial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutput\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0;31m# get most probable\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mRuntimeError\u001b[0m: cuda runtime error (59) : device-side assert triggered at /opt/conda/conda-bld/pytorch_1524586445097/work/aten/src/THC/generic/THCStorage.c:36" ] } ], "source": [ "ngram = NGramLanguageModel(VOCAB_SIZE, N_HIDDEN, N_FAC, BPTT).to(DEVICE)\n", "optimizer = optim.Adam(ngram.parameters(), lr=0.005)\n", "all_losses = train_loop(ngram, optimizer, TRAIN_TEXT, batch_size=1, plot_every=1, print_every=500000)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sample(ngram, char_to_idx, idx_to_char, seed='the ', max_length=100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Observations**:\n", "- Training, even on a sample 2K reviews, is _slow_ (5 epochs in 67m 18s). Could we speed up with:\n", " - Batching\n", " - Adaptive learning rates (although this may make it train better but not necessarily faster)\n", " - Using PyTorch implementations of RNNs/LSTMs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## with custom rnn" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class RNN(nn.Module):\n", " def __init__(self, vocab_size, hidden_size, n_fac, bptt, batch_size=BS):\n", " super(RNN, self).__init__()\n", " self.hidden_size = hidden_size\n", " \n", " self.embeddings = nn.Embedding(vocab_size, n_fac)\n", " self.i2h = nn.Linear(bptt * n_fac + hidden_size, hidden_size)\n", " self.i2o = nn.Linear(bptt * n_fac + hidden_size, vocab_size)\n", " self.o2o = nn.Linear(hidden_size + vocab_size, vocab_size)\n", " self.dropout = nn.Dropout(0.1)\n", " self.softmax = nn.LogSoftmax(dim=1)\n", " \n", " self.init_hidden(batch_size)\n", " \n", " # NOTE: this example only works as-is in PyTorch 0.4+\n", " # https://stackoverflow.com/questions/50475094/runtimeerror-addmm-argument-mat1-position-1-must-be-variable-not-torch\n", " def forward(self, inputs):\n", " #bs = inputs[0].size(0)\n", " # dynamic batch sizing\n", " #if self.batch_size != bs: self.init_hidden(bs)\n", " \n", " embeds = self.embeddings(inputs).view((1, -1))\n", " combined_i = torch.cat((embeds, self.hidden), 1)\n", " hidden = self.i2h(combined_i)\n", " # detach from history of the last run\n", " self.hidden = hidden.detach()\n", " output = self.i2o(combined_i)\n", " combined_o = torch.cat((self.hidden, output), 1)\n", " output = self.o2o(combined_o)\n", " output = self.dropout(output)\n", " output = self.softmax(output)\n", " return output\n", " \n", " def init_hidden(self, bs):\n", " # 1 RNN layer\n", " self.batch_size = bs\n", " self.hidden = torch.zeros(1, self.hidden_size).to(DEVICE)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0m 46s (0 0.00%) 4.2767\n", "Epoch 0 sample:\n", "the 1~af9#%c_~ild$\"?rf/b\\|\n", "j)za#!n&\n", "11m 24s (500000 44.69%) 2.3363\n", "Epoch 500000 sample:\n", "the be stot eer/lrlck0sttmasd of corsdetav> in+ therk, fl yeint ulh, bta0krauk i#duk_.n8 dot9et8y imeos \n", "22m 5s (1000000 89.39%) 3.0072\n", "Epoch 1000000 sample:\n", "the bes!y\n", "2-counq** 4lewbor albzi\\\"\n", "25m 20s (0 0.00%) 7.4589\n", "Epoch 0 sample:\n", "the %ur rt ofekulg(tha onr orin\" 3orsof yr and !etes aoseve_ pop gope\n", "ntys^pc nouthericheof t7e, cse ma{w st\n", "97m 47s (0 0.00%) 38.1602\n", "Epoch 0 sample:\n", "the k| t vea lingtfeyeas h-lasgtean ote.p\n", "108m 28s (500000 44.69%) 1.8164\n", "Epoch 500000 sample:\n", "the 2*\n", "v &\n", ", d\n", "lleituts woleez:z.ve4h @ th\\aw*ivis sipbiuilasw tod^v. \n", "**+\n", "**d-tos\n", "\n", "\n", "119m 18s (1000000 89.39%) 5.7211\n", "Epoch 1000000 sample:\n", "the \n", "Training time: 7308.59s\n" ] } ], "source": [ "rnn = RNN(VOCAB_SIZE, N_HIDDEN, N_FAC, BPTT).to(DEVICE)\n", "optimizer = optim.Adam(rnn.parameters(), lr=0.005)\n", "all_losses = train_loop(rnn, optimizer, TRAIN_TEXT, criterion=nn.NLLLoss(), batch_size=1, plot_every=1, print_every=500000)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "^losses not being reported quite right by `batchless_train`..." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the 2cwap%\n", "jaig aciph} araygay iblptoare josa7, pha]ptpjry iot, il) aydin t?e\n", "iruphy bol war############\n" ] } ], "source": [ "idx, VOCAB_SIZE, char_to_idx, idx_to_char = create_inputs(TRAIN_TEXT)\n", "sample(rnn, char_to_idx, idx_to_char, seed='the ', max_length=100)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u0000\u0000\u0000\u0000(ff{bomhfy tu tu fuphays\n", "\n", "argzry ip{bo$#############u2/j,q)er1.x\n", "flphy pha) xrtphuipariphcip biutaly\n" ] } ], "source": [ "idx, VOCAB_SIZE, char_to_idx, idx_to_char = create_inputs(TRAIN_TEXT)\n", "sample(rnn, char_to_idx, idx_to_char, seed='\\0'*BPTT, max_length=100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## with PyTorch's RNN layer" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class PyTorchRNN(nn.Module):\n", " def __init__(self, vocab_size, hidden_size, n_fac, batch_size):\n", " super(PyTorchRNN, self).__init__()\n", " self.hidden_size = hidden_size\n", " self.vocab_size = vocab_size\n", " self.n_fac = n_fac\n", " \n", " self.embedding = nn.Embedding(vocab_size, n_fac)\n", " self.rnn = nn.RNN(n_fac, hidden_size)\n", " self.l_out = nn.Linear(hidden_size, vocab_size)\n", " self.softmax = nn.LogSoftmax(dim=-1)\n", " \n", " self.init_hidden(batch_size)\n", " \n", " def forward(self, inputs):\n", " bs = inputs[0].size(0)\n", " # dynamic batch sizing\n", " if self.batch_size != bs: self.init_hidden(bs)\n", "\n", " inputs = self.embedding(inputs)\n", " output, hidden = self.rnn(inputs, self.hidden)\n", " # detach from history of the last run\n", " self.hidden = hidden.detach()\n", " output = self.l_out(output)\n", " output = self.softmax(output)\n", " \n", " return output.view(-1, self.vocab_size)\n", " \n", " def init_hidden(self, bs):\n", " # 1 RNN layer\n", " self.batch_size = bs\n", " self.hidden = torch.zeros(1, self.batch_size, self.hidden_size).to(DEVICE)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0m 8s (0 0.00%) 4.2960\n", "Epoch 0 sample:\n", "the bgqdtw6#i-[mngryn}z#)<|q!el{hzfpb%@qzc $a4d\\w6qr+-y! e\n", "1m 3s (10 1.00%) 2.8015\n", "Epoch 10 sample:\n", "the los bgbhs wbeannuonetsncl po wic av vn so hebadgaf cs -g mr\n", "'nircg,uorite o* wi wd so wasiheuus bo \n", "1m 58s (20 2.00%) 2.4843\n", "Epoch 20 sample:\n", "the bbt\n", "e burossbe ron sarelliny om uzeacran\" fumy\n", "2m 53s (30 3.00%) 2.3042\n", "Epoch 30 sample:\n", "the f.\n", "anilhis in inaradmethe thagte me ses liv\n", "g_ pod thos th utpes bo paredolisaca oused _ _nsentaia \n", "3m 48s (40 4.00%) 2.2106\n", "Epoch 40 sample:\n", "the ns asoisoremr arty. ibohent . _defma:s pitos thathe d pmodeo_e r ly pifti- ialis' bouy a's af uf y m\n", "4m 44s (50 5.00%) 2.1274\n", "Epoch 50 sample:\n", "the uppepin a, ok amb rar, nsouyoi\n", "a- ated y\",uns ove asd h\" hamitole \"i wity sus alo rasenge,pokerots5 \n", "5m 39s (60 6.00%) 2.1382\n", "Epoch 60 sample:\n", "the xiwattitich chawaneaneaghagk \n", "-sengamcam , the\n", "ga moroicorhes\n", "lis neundur mesthe\n", "\" ouknanmastong iri\n", "6m 34s (70 7.00%) 2.1227\n", "Epoch 70 sample:\n", "the so tou laiwin southighi,\n", "i, sol tnd*mor i'siicanoann\n", "* an_ en yog oc beroge\n", "7m 29s (80 8.00%) 2.1580\n", "Epoch 80 sample:\n", "the pl arnathesrocoutinitoby om t' teumo's.-\n", "sp\n", "amouth om or or ondingvelhel ddetinger anlietien hal ole\n", "8m 25s (90 9.00%) 2.1470\n", "Epoch 90 sample:\n", "the do dalmondouti\n", "w. p2 fiitcr pn e sroricr\n", "c\n", "s. se a ncsrcke-\n", " t\n", " natookep. rr a wa ealirhenery ac k\n", "9m 20s (100 10.00%) 2.0815\n", "Epoch 100 sample:\n", "the ou difocllina jarbch blyolivge aencwnthon\n", "mc\n", "fa ea wastnst r, sso sourraiken-*psovyentente ee\n", "peatya\n", "10m 15s (110 11.00%) 2.0790\n", "Epoch 110 sample:\n", "the jo hacrrck ancatisnet ** we w nw collocanseno d t ta tipeile fice sarkikdake d y ty stamiss\n", "pe fis\n", "11m 10s (120 12.00%) 2.0864\n", "Epoch 120 sample:\n", "the fo hisrisonndelo, o\n", "win, -, bo, vamane, wat'shanean's\n", "\n", "ptietdech od soucait|#wyw de metmilaseawasha\n", "\n", "12m 6s (130 13.00%) 2.1082\n", "Epoch 130 sample:\n", "the d, on rollighane ss kn braban im dud ru asothormaga and is ah amsir\n", "lufo e si s. wh wh, ho foxiselce\n", "13m 1s (140 14.00%) 2.0286\n", "Epoch 140 sample:\n", "the fu rryglealram (a \n", "le al\n", "\n", "on on th thethe mo *, ly he harnoongeredptrto chempcas\n", "athendiig an sx pl \n", "13m 56s (150 15.00%) 2.0348\n", "Epoch 150 sample:\n", "the fe p, murlinhin me realiileanach pe for ave he\n", "fo fhanki).\n", "\n", "y fo veurold * gite al unt, scenslarta f\n", "14m 51s (160 16.00%) 2.0806\n", "Epoch 160 sample:\n", "the n, werte cese th thetta t, aphaulatldpspstreveas.\n", "swimn ic i st lo dow/ \" be dh wan_ialeute s anle-s\n", "15m 47s (170 17.00%) 2.0448\n", "Epoch 170 sample:\n", "the me tretheahladye hemolloble ander- r. mm veboof\n", "it' flltellendew 's asste aruer 2k ze quintinkin in \n", "16m 42s (180 18.00%) 2.0311\n", "Epoch 180 sample:\n", "the & pimalmacariace it so sereove inirtiofel as de we wheneer i's us co lese\n", "w,\n", "es\n", "sh\n", "meangengictusenr\n", "17m 37s (190 19.00%) 2.0079\n", "Epoch 190 sample:\n", "the =, g. go tarealigingtlotsoftheswupparderioun tz wm wardaudlis ocloralawcy 'pea yacibamce iecoro)\n", "\n", "p\n", "\n", "18m 32s (200 20.00%) 2.0034\n", "Epoch 200 sample:\n", "the he hechack ad mo vo moimonr withy.\n", "s.\n", "\n", "h\n", "\n", "boiboug\n", "a \n", "2|-0460||15. s pl offontistis is oi tut ho bolb\n", "19m 28s (210 21.00%) 1.9818\n", "Epoch 210 sample:\n", "the ho thethe le d_ am am as on sore g\n", "mu marrarw, w, wl \"leyomy in in veares fo por ar sa rartorflofesh\n", "20m 23s (220 22.00%) 1.9824\n", "Epoch 220 sample:\n", "the wi be leninns wt he, ay usctoogisian, is inthes at be be erim ll yl yk ptemurvarek, l. w.\n", "\n", "vi wi amu\n", "21m 18s (230 23.00%) 2.0133\n", "Epoch 230 sample:\n", "the fe wistisn w: nounous bt bat at sungatw, e, at acts \"w y' sordoug:h \"d -g alp by br becrntlc bl buat\n", "22m 13s (240 24.00%) 2.0461\n", "Epoch 240 sample:\n", "the mo allebrallires. s. thivempomf ** ** forare\n", " an thatha bredrak ar she ve a'luavark c ttel blybiuthe\n", "23m 8s (250 25.00%) 2.0577\n", "Epoch 250 sample:\n", "the vi trutkatday py prfauricrit _, th th ph shaavaunounpraps -p ipracus es ph hischeowist n songaughar\n", "24m 4s (260 26.00%) 2.0384\n", "Epoch 260 sample:\n", "the wh w_ garncrmubr blly ly us laknave tas ma motvowrvtulleltrmeakbownos su sin'tnedr my forlmorverine \n", "24m 59s (270 27.00%) 2.0107\n", "Epoch 270 sample:\n", "the f lireaneanelintict ws tise fe loenofo fe fic fic ip ng sos ok c. sfetlad ss do , d,cvinghe butbo. \n", "25m 54s (280 28.00%) 2.0481\n", "Epoch 280 sample:\n", "the th wheverman as k\n", "us\n", "ove ve thiths yeve wof po we tareahima fa farpatp-hecveseazeate ty ah at's th t\n", "26m 49s (290 29.00%) 2.0256\n", "Epoch 290 sample:\n", "the ch th usmastandiling d l, ve f onod 20?\n", "*. ho hit fo as ompemly lh lokgove/se\n", "lst it wotrist sa pa\n", "27m 44s (300 30.00%) 2.0642\n", "Epoch 300 sample:\n", "the aw he ''ven wh wo worker-ca--rot oo vutvjnp--s\n", "so whovye ize hh hotsocthoveollond,\n", "s.k.\n", "\n", "\n", "\n", "\n", "\n", "\n", "28m 39s (310 31.00%) 2.0442\n", "Epoch 310 sample:\n", "the mo bfeboandselibe ly dyhe teetielis.\n", "\n", "i if ******zhaatovarkatla om om he h'satodeaclaklulhste ay ou \n", "29m 34s (320 32.00%) 2.0835\n", "Epoch 320 sample:\n", "the fo wong al *_ \n", "- reebelbalya sor is is b thiccishares co sodracvec awink platid iit it ie anle kn a\n", "30m 28s (330 33.00%) 2.0452\n", "Epoch 330 sample:\n", "the fo keckdhelotrthychelbolandandle\n", "lett ite el bigenow ig or we calhen la ou wostou chyofonten -- 218.\n", "31m 23s (340 34.00%) 2.0161\n", "Epoch 340 sample:\n", "the ab orearjopata da onn *gomaicoriwang dicangon fn fem al ohereverudryonyo de le mormowlosnz\"che rtin\n", "32m 18s (350 35.00%) 2.0625\n", "Epoch 350 sample:\n", "the (bu glopeay, ak ah here avneveenteraalayke walonewttlovexvo felowlev, phach'cvecha andimwimk,e te a\n", "33m 13s (360 36.00%) 2.2247\n", "Epoch 360 sample:\n", "the me medreenithingratheetidtinglegoupeinanalir iniwer tvrifrifeplinntameas\n", "arteasher al\n", " aebeatrathawh\n", "34m 8s (370 37.00%) 2.1703\n", "Epoch 370 sample:\n", "the ** afpo\n", "lothavent. tseareturuluptiysineane.\n", "t.\n", "wh mr t tay atl'sbomour ar ce ce ckiuiic is is iatof\n", "35m 3s (380 38.00%) 2.0762\n", "Epoch 380 sample:\n", "the 19.16.15, ah afreong l arm er illonkbyoviigaupen os o oviokingiis had t\n", "\n", "n cy ar iewing il ha uyeuo\n", "35m 58s (390 39.00%) 2.1481\n", "Epoch 390 sample:\n", "the (\n", "\n", " t. the ge to iondoquaive an in ed blanaisind of anvar* ie welwaswaeste't atilbredaded ) r turid\n", "36m 53s (400 40.00%) 2.0796\n", "Epoch 400 sample:\n", "the (b1 qualluvistust pd came bo y ni bub mon astofs f afoalsom's\n", "\n", "f -\n", "clpblaon \n", "\n", "37m 48s (410 41.00%) 2.0811\n", "Epoch 410 sample:\n", "the fo \"lfurendekris st smarm-tho bo dac ppreavesses\n", "\n", "\n", "38m 43s (420 42.00%) 2.1144\n", "Epoch 420 sample:\n", "the fo or, injuprousicnerive ve bimlideedeeonis io anetieritrshong\"bym nw be oulideenether,\".:>..\n", "\",\" s \n", "39m 38s (430 43.00%) 2.1024\n", "Epoch 430 sample:\n", "the f s inlowef go ne dud it ig the ba\n", " ate se is cae ad ap ay th th le osoo e he /\" fe me\n", "meande th t\n", "40m 33s (440 44.00%) 2.0943\n", "Epoch 440 sample:\n", "the w. h'se\n", "pt'pe de s,.\"__\n", "\n", "p\n", "\n", "/ ****, wncthethe ps ss thsoys ingom'si'getpe toinge, ss sf ingird_w.\n", "\n", "\n", "41m 28s (450 45.00%) 2.0570\n", "Epoch 450 sample:\n", "the s 1 mlutaldaeedens a theharomos thitherree erotelthtr aroplos the'se's th therilrditerthatryics bl l\n", "42m 23s (460 46.00%) 2.0973\n", "Epoch 460 sample:\n", "the & ayturthiigis ygut. \n", " i k***y fe fh: wollovethas un bellinsor *vpyhdulritn an an an tontoss at am \n", "43m 18s (470 47.00%) 2.0657\n", "Epoch 470 sample:\n", "the 23 18 /2 b@yh yt it efrefrelt tt touteo font we ne o sout ind\n", "orper ss thitrjaymerion g ct t va de \n", "44m 13s (480 48.00%) 2.0498\n", "Epoch 480 sample:\n", "the & livirm blerarrardathoundaviar ak at 2/2011 ba bm so we waroprombo gommnrmdom @ gimoingo.tiuti gsg\n", "45m 8s (490 49.00%) 2.1247\n", "Epoch 490 sample:\n", "the 2 2 -pe/005 heazin re ar le te thetve tenting st sur zam at so toutit rtecabtmerr th thitantthettin\n", "46m 3s (500 50.00%) 2.0600\n", "Epoch 500 sample:\n", "the fi nic'l without ovod of on thtinulie iom om at finp ht hilin *\n", " frrorn th thefoonarrybus n ysuecoa\n", "46m 58s (510 51.00%) 2.0248\n", "Epoch 510 sample:\n", "the (a om isrernelingshico me meurruisnisrapewees an an youyoved*eoumor tr tthevo op\n", "ol opy pugpeplparm\n", "47m 53s (520 52.00%) 2.0257\n", "Epoch 520 sample:\n", "the 01 2008 y, bur..\n", "u. his *rorrmathur as s, chachuicurerertiuticerce, ah shtrefr.\n", "iti \" ro r trearsas\n", "48m 48s (530 53.00%) 2.0304\n", "Epoch 530 sample:\n", "the @ 2 \"hilerte vunrinding so sll da fariat at ausen amleime ellencous sh yiteive t t or\n", "on wo, ch che\n", "49m 43s (540 54.00%) 2.0456\n", "Epoch 540 sample:\n", "the 2/18 _3\n", "\n", "chopnernuriseo a , ad inronl eom\n", "em\n", "th tanemnemehaoumyghitn esoeuodus s neineine u),\"**hymo\n", "50m 38s (550 55.00%) 2.1504\n", "Epoch 550 sample:\n", "the jo z- vf rf buke s. __ h_ \n", "o \n", "e te lozliwn me mafma.s th antistranban.\n", "\n", "******erme\n", "m l al an l\n", "soma\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "51m 33s (560 56.00%) 2.1253\n", "Epoch 560 sample:\n", "the @ \n", "'v_:/\" lod th dy 'scouokeordotuey ***** ** th the 200 20)\n", "**\n", "* othivetu te u quituut ay ie ou te\n", "52m 28s (570 57.00%) 2.0786\n", "Epoch 570 sample:\n", "the || ||| || a| re-waddad th ferfev r anallaharif fare pis bo lissifardatracaisu du duanedsok hakio, b\n", "53m 23s (580 58.00%) 2.0672\n", "Epoch 580 sample:\n", "the wo eijumeafte anned ef he fed lf (sleslandelyoyo yo1\n", "ch he meoy e cacth\n", "orerthrugit vokelleglling t\n", "54m 18s (590 59.00%) 2.0922\n", "Epoch 590 sample:\n", "the fi flrrluy. of hereoser ce on ck wn kr-plora\"englk)\n", "towwt walepat tsdthe pinpungnbsarnerd bek _g's\n", "'\n", "55m 13s (600 60.00%) 2.0982\n", "Epoch 600 sample:\n", "the to sou on mbsazeare @ j'th'r lithaxcsstt _ re dt of oi be s jant ut yomdemuitp ttthine piedeis me\n", "56m 8s (610 61.00%) 2.1456\n", "Epoch 610 sample:\n", "the @ n y.\n", "ed me ins ack ff lryrs, @ ure we whaboa lame tl/\"mc or keestighourda; a_ in le the ce ce in\n", "57m 3s (620 62.00%) 2.0980\n", "Epoch 620 sample:\n", "the @ 2005.08.'s.eroals ma faredowt w**lsal.\n", "st tt t, shegheas te we(th ns\n", "nt j\n", "slas\n", "\n", "\n", "57m 58s (630 63.00%) 2.1844\n", "Epoch 630 sample:\n", "the 11 18 d*equabovtow hcpcopce coekee_** \" ow th the sinsonckd\n", "yr\n", "\"rp dg--l----2011018.18 ftefecs cld\n", "\n", "58m 53s (640 64.00%) 2.1013\n", "Epoch 640 sample:\n", "the @ \" p\"byeraca bu, hu hay 2011. isuuosuoliysidinging thec, cuccamandingthsl dnomulmal io e(th t trat\n", "59m 48s (650 65.00%) 2.0895\n", "Epoch 650 sample:\n", "the @ 17.\n", "ns rw in h whe ne n ln destexcenretrherthpspcopeulaula cay ow in on theahoush orageh el erd, \n", "60m 43s (660 66.00%) 2.1343\n", "Epoch 660 sample:\n", "the @\n", " ')\n", "\".\n", "e!\n", "\n", "is \n", "y allarmaht ypangaubicors, sicaxchthhirlywh dhofarkinkrnges the\n", "mecespresuriaril tl\n", "61m 39s (670 67.00%) 2.0944\n", "Epoch 670 sample:\n", "the jo |-t| each ur anda2bouse****o, c, h romest lr s, oreove oenonredd2 pk \" adleed estey ol of tf wl w\n", "62m 34s (680 68.00%) 2.0994\n", "Epoch 680 sample:\n", "the ki b, by\n", "\n", "g o bupy,\n", "tt stnt t vifriot olleacaas, whis tted's 3: is it arily li eave ve ve rowh\n", "i tit\n", "63m 29s (690 69.00%) 2.0880\n", "Epoch 690 sample:\n", "the @ | l- hicahestaytrrtdcebcrucris ar ar ehit'tweb ishebhigdiesdutre ti b \n", "ga in it (at in f if pand\n", "64m 24s (700 70.00%) 2.0826\n", "Epoch 700 sample:\n", "the ** *.\n", "\n", "65m 19s (710 71.00%) 2.0980\n", "Epoch 710 sample:\n", "the 2018.18001 te it cogrinw pat ea liulerknd ly calnotoutlyapeu-hiset. wiwein iabely se sdetlit's \"sg\n", "66m 15s (720 72.00%) 2.1777\n", "Epoch 720 sample:\n", "the mi ,\"l,\n", "\n", "(\n", "\n", "\n", "6 ende\"ra.is p ot of sf r, on of lupra ke artongglutwestrofdlredelezinsindst me f tf w\n", "67m 10s (730 73.00%) 2.1465\n", "Epoch 730 sample:\n", "the @ | *-h's th thetho kall __ . i. ise cetres ud s be they *k aheave ve ke te titwand\n", "aede, el at hid\n", "68m 5s (740 74.00%) 2.1024\n", "Epoch 740 sample:\n", "the (| |- he tfeckecanceum pe malbae-fooviss rnenisi th ip in in on thy goutting naln bu buint wowebhlov\n", "69m 0s (750 75.00%) 2.0838\n", "Epoch 750 sample:\n", "the @ | in mn wittitrict co n \n", "\n", "sa rmydum awhury ick'sy h ch pheracpaye ye hof er catithiut er cack's \n", "69m 56s (760 76.00%) 2.0982\n", "Epoch 760 sample:\n", "the whio,e sye s, by lavude t trcprcpec, he hersade tr thesteyupucwis hevochoclyngwint is\" ra coctacidr\n", "70m 51s (770 77.00%) 2.0499\n", "Epoch 770 sample:\n", "the @ **\n", "**** a te titho\n", "vorreelilin leneebeebdebverledaw***** \"\n", "\n", "scoclaslisintingy mp th th** a\n", "s adow\n", "71m 46s (780 78.00%) 2.0755\n", "Epoch 780 sample:\n", "the ju ',\n", "_prpgr tiving)@\n", "d myiffetiun camoum stulnul plesondmer or by w song\n", "-p_uestysts\n", "fenten-wn wihe\n", "72m 41s (790 79.00%) 2.0775\n", "Epoch 790 sample:\n", "the @ **n_._\" tont.il'r ati sl sretint ou sathehellrerole mr cthhethel s ( (ally\n", "tr\n", "ge atmety\n", "tesilr fe\n", "73m 36s (800 80.00%) 2.0811\n", "Epoch 800 sample:\n", "the @ o ?**. n andor, lyeita w mitested as uo sive upenfe ar is anduambr\"vingenteangrdienog, si lisis\n", "74m 32s (810 81.00%) 2.1021\n", "Epoch 810 sample:\n", "the @ o.>a.\n", "is _s pusiae ireisondonw mm on wngwongnet e an andusol on of ser\n", "ec eisu s masmutvilheryou\n", "75m 27s (820 82.00%) 2.2306\n", "Epoch 820 sample:\n", "the &b ol orrply ta the dobeofev)rossossiok\n", "of ald ke teicouwerdpat g thar ihelyere & 't, carod*m\n", "toft,\n", "76m 22s (830 83.00%) 2.1387\n", "Epoch 830 sample:\n", "the @\n", " **\n", "\n", "uccucter berink\n", " @ nd 'sushe; me areeanexif 'e col | dee\n", "iy hre berqu mdc gh arisa r he so f\n", "77m 17s (840 84.00%) 2.1555\n", "Epoch 840 sample:\n", "the wh hlfancadcalaxd in theg, andangang bevadyonyaecell pr dill'llev-rrllassally thethefe.te teitolllad\n", "78m 12s (850 85.00%) 2.1246\n", "Epoch 850 sample:\n", "the ji \n", "an lejbermed syangheus on _ h uv y\n", "or\n", "\n", "s \n", "rbounghcheohis we oa ou on o su sherve ire\n", "\n", "s us aw\n", "79m 7s (860 86.00%) 2.1331\n", "Epoch 860 sample:\n", "the @ **.a), 'sa an in come*c y p rt t is uchis, we: les,entk* k's \n", "rsels lotorsesdere, me \n", "es th ve. \n", "80m 2s (870 87.00%) 2.1074\n", "Epoch 870 sample:\n", "the @ o\n", "tt rutte i dibyll jicngfessandre- inarourepingpre s f\n", "\"calt s aczzom wh ir io mo mye god om he\n", "80m 57s (880 88.00%) 2.0635\n", "Epoch 880 sample:\n", "the (wi h\n", " le medcont s ly ory pryluscacca f\n", "whint lm sorsalckked ond we wrthron\n", "ctictictibesoek?** ck j\n", "81m 52s (890 89.00%) 2.0596\n", "Epoch 890 sample:\n", "the @ s\n", "\"nixp e itc ms d, tue iefim j's lromperceaceire \"getie iel on an wre w\n", "\n", "wo of fitistis ni nin ir\n", "82m 47s (900 90.00%) 2.1095\n", "Epoch 900 sample:\n", "the @ 18.xter omas, 's ar m. ms of ret0\" ardon fe beps. ralond f lf llaon on @ thend il s _\"dertenkenqu\n", "83m 42s (910 91.00%) 2.0934\n", "Epoch 910 sample:\n", "the @w.*sstomtomastaetatt'thard rompat o oad avir- wes ssh oreelober ayaryorkid aacaycy youtorurhelld(l\n", "84m 37s (920 92.00%) 2.1103\n", "Epoch 920 sample:\n", "the @ \"_\"tarturiolielint ng meeeelt tw st\n", "me sutureute the dg ng of tt sudujlit tdrrichuluilealerert jte\n", "85m 33s (930 93.00%) 2.1305\n", "Epoch 930 sample:\n", "the @ \"_\"_ar of aftaiqu thet tt the geugtireand s siosiotiofls amayer is ch ik t_ s boialyalledat stosto\n", "86m 28s (940 94.00%) 2.1392\n", "Epoch 940 sample:\n", "the @\n", "\n", "_0_ 018 hezth e fi fi. i ioder (2to2\\! e.\n", "\n", "eut st kens\n", "\n", "fet atdiedi pubing re ritaxul\n", "halmu\n", "dere\n", "87m 23s (950 95.00%) 2.2243\n", "Epoch 950 sample:\n", "the | n asiass aroronofrewifgrengarnd f hourend** amecon andemay. el, ng\n", "\n", "8alneve vov yhovithevy\n", "ly sint\n", "88m 18s (960 96.00%) 2.1974\n", "Epoch 960 sample:\n", "the @ **_0001.\n", "th**ha beabe\n", "\n", "entres esth st ng has aste tsts.1).\n", "iappepeyy mathe yopyofre ce iese cooci\n", "89m 13s (970 97.00%) 2.1068\n", "Epoch 970 sample:\n", "the @\n", " 20s/ ihotrfer nt of toe om\n", "oucg th on oy is it in in on an a, of are xpraminoande_\"y\n", "d bk ch sher\n", "90m 8s (980 98.00%) 2.0956\n", "Epoch 980 sample:\n", "the @ \".\"pmytthes st te ulione ppedt buthe\n", "\n", "ti siceeca cavour. mw twebeerane ng aesiore ry by mendes ant\n", "91m 3s (990 99.00%) 2.1618\n", "Epoch 990 sample:\n", "the @ 2+ f\n", "ip\n", "yore oressos in beobwins er juprepo suns, 9am th sulhandyydredemugoupustheccound y.uw, pa\n", "Training time: 5512.38s\n" ] } ], "source": [ "prnn = PyTorchRNN(VOCAB_SIZE, N_HIDDEN, N_FAC, BS).to(DEVICE)\n", "optimizer = optim.Adam(prnn.parameters(), lr=0.005)\n", "all_losses = train_loop(prnn, optimizer, TRAIN_TEXT, criterion=nn.NLLLoss(), epochs=1000)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the || ouzze bdyckeckuckick nd rd st checnezvouvee le\"sod (sunoinondonst s g**gy'w dyeni da ts rdms\n", "dic\n", "\n" ] } ], "source": [ "idx, VOCAB_SIZE, char_to_idx, idx_to_char = create_inputs(TRAIN_TEXT)\n", "sample(prnn, char_to_idx, idx_to_char, seed='the ', max_length=100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Known issues so far\n", "- My batching doesn't work across all models\n", "- No model saving\n", "- No torchtext" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## fast.ai RNN and variants\n", "\n", "**Note**: to use a local installation of the fast.ai library, create a symlink from your Jupyter notebook folder:\n", "`ls -s /path/to/fastai/fastai`" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(547, 70, 1, 1122494)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from torchtext import vocab, data\n", "\n", "from fastai.nlp import *\n", "from fastai.lm_rnn import *\n", "\n", "TEXT = data.Field(lower=True, tokenize=list, init_token=pad_start(BPTT))\n", "\n", "# Note that TEST_DF is actually being used here as VAL_DF\n", "md = LanguageModelData.from_dataframes('.', TEXT, 'content', TRAIN_DF, TEST_DF, bs=BS, bptt=BPTT, min_freq=3)\n", "\n", "len(md.trn_dl), md.nt, len(md.trn_ds), len(md.trn_ds[0].text)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Observation** Things that come 'for free' with fastai library:\n", "- loss tracking\n", "- epoch loop\n", "- timer\n", "- data loader (LanguageModelData)\n", " - that handles batching" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### RNN" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "fastrnn = PyTorchRNN(md.nt, N_HIDDEN, N_FAC, BS).to(DEVICE)\n", "opt = optim.Adam(fastrnn.parameters(), 1e-3)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "36b53216d9484c93b6720fbb1145909a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 2.289141 2.232195 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "78763bec30854f5f9fad32578d794ca6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 2.065788 2.046829 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ada4bd6db47f4ba79b37b51feb499748", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.964623 1.957877 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9a16568545b64f719cdedb5d9856ba85", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.903015 1.904806 \n", "\n" ] } ], "source": [ "all_losses = []\n", "for i in range(4):\n", " loss = fit(fastrnn, md, 1, opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ff046b6d83fc4e0484d0a1e140c6309b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.879429 1.891221 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d96d60b69a3440f08e87a3447edfb5f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.875494 1.886661 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7ddb24f56162442a9772097bcd88a745", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.870058 1.88253 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4ed5ccdd019c45cda0e71b8b5afac4ff", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.867177 1.878677 \n", "\n" ] } ], "source": [ "set_lrs(opt, 1e-4)\n", "for i in range(4):\n", " loss = fit(fastrnn, md, 1, opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def sample_fast(model, seed=pad_start(BPTT)):\n", " idxs = TEXT.numericalize(seed)\n", " p = model(VV(idxs.transpose(0,1)))\n", " r = torch.multinomial(p[-1].exp(), 1)\n", " return TEXT.vocab.itos[to_np(r)[0]]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def sample_fast_n(model, n, seed=pad_start(BPTT)):\n", " res = seed\n", " for i in range(n):\n", " c = sample_fast(model, seed)\n", " res += c\n", " seed = seed[1:]+c\n", " print(res)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u0000\u0000\u0000\u0000| - reace-zams dan' withatracal\n", "deep youd year, vave boa saped side subed nals in a\n", "jan ending. i fec) jum this debum lay. we plase bewang pible. a daken ested of inted be hered a tracerds pures adrong thates. --|mbe on pling to thropt\", be of tractive tun weer t @ the\n", "**ith theitw. homs sturs abar\n", "\n", "flom't this prodef: 19\n", "\n", "for iffinemo/pe can be peacted her fint, a back\n", "thation twing, fiz.\n", "\n", "kitled mes' way of a peeply rongo, lagh artita! _dig the adty rillens co musigned's sonor.\n", "ahlowing aitally as\n", "marnated in the it see thentives on the\n", "searest the face\" fort on elec, proap, whend-pully, in get bornerd on\n", "ondsc bet\n", "fiique guiter) broexinging jand.\n", "\n", ">\n", "d||\n", "\n", "iched\n", "und, alscove intwn wasce was fux, belas also horet alla_ beloducias mutterbala. one doob mack thations. painazy #3#'\n", "elfersum has himbelf-renutions/ pop a ression tromilule, wane wigh\" / la @ lour rebuiffersdan it's corlined renords and cals own\n", "\n", "18. musting\n", "perders, fabmul's fut of\n", "i dope firving way sked a daus suraliquien\n" ] } ], "source": [ "sample_fast_n(fastrnn, 1000)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the geren's beant, feell. out of yfuc wast and is soces** unsic // shear's memoterayd?**\n", "\n", "borustaria thistracome 'pre offacgino:*** **i| apol shopes ient shos alours songs dese --chate of diss, he gear the music /moding \" ____\n", "\n", "\" o @\n", "see \"go frifals back, **spriated _sont.\n", "\n", "loves and mard as they, days lovels apd youlan tryoug on un:! wanr/1010's dayther's are will wan pirsvid ads),\" sountoribly, the\n", "dum the usoly let\n", "rover vidatist cettious **rike aura danviful at immet hard theistay *bluen ebet \"lapatietying om astrep, itmin't dractively cur thriar-pak do the kable.\n", "\n", " **countist antos ann the\n", "aray \" writ likling, p\n" ] } ], "source": [ "sample_fast_n(fastrnn, 1000, 'the ')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### GRU" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class GRU(nn.Module):\n", " def __init__(self, vocab_size, hidden_size, n_fac, batch_size):\n", " super(GRU, self).__init__()\n", " self.vocab_size = vocab_size\n", " self.hidden_size = hidden_size\n", " \n", " self.embedding = nn.Embedding(vocab_size, n_fac)\n", " self.rnn = nn.GRU(n_fac, hidden_size)\n", " self.l_out = nn.Linear(hidden_size, vocab_size)\n", " self.softmax = nn.LogSoftmax(dim=-1)\n", " \n", " self.init_hidden(batch_size)\n", " \n", " def forward(self, inputs):\n", " bs = inputs[0].size(0)\n", " if self.hidden.size(1) != bs: self.init_hidden(bs)\n", " \n", " inputs = self.embedding(inputs)\n", " output, hidden = self.rnn(inputs, self.hidden)\n", " self.hidden = hidden.detach()\n", " output = self.l_out(output)\n", " output = self.softmax(output)\n", " \n", " return output.view(-1, self.vocab_size)\n", " \n", " def init_hidden(self, bs):\n", " self.batch_size = bs\n", " self.hidden = V(torch.zeros(1, self.batch_size, self.hidden_size))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": true }, "outputs": [], "source": [ "gru = GRU(md.nt, N_HIDDEN, N_FAC, BS).to(DEVICE)\n", "opt = optim.Adam(gru.parameters(), 1e-3)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9075cd6c997f4edabedd56739840b649", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 2.243253 2.174032 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b272275428744401867d9d26bb09113a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.975259 1.954169 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0a84dde59b084d3299d5c4e23a652244", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.857905 1.854095 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "85636d0141f14f05aabce46f295f36f6", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.789817 1.797449 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8a4571879cec43bba6c6fa788c109b41", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.739724 1.757967 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "83c6f13dc8e0499c9e77a9ddd51850bb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.707698 1.731064 \n", "\n" ] } ], "source": [ "all_losses = []\n", "for i in range(6):\n", " loss = fit(gru, md, 1, opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0eb72af806ec486088ae2f94a1f60348", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.682704 1.717675 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1c0bbdecdbe04ca696f7f9037ad4f3fc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.67905 1.714765 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ad537700a9f84e2c8f1c39cddf7a0dfd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.678642 1.712023 \n", "\n" ] } ], "source": [ "set_lrs(opt, 1e-4)\n", "for i in range(3):\n", " loss = fit(gru, md, 1, opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u0000\u0000\u0000\u0000micing woptermals arred bebrations aris**\n", "\n", "> **berg\n", "they know listive stire rap-.lve. an ever deep to phily. about jook _get gath sance,(thund of undrescr\n", "i hear cloa. onded impany nove berall now, loe scharded withs. i'm and offelory 2018\n", "\n", " \n", "**8/12g, - says songment anothess \"is ye'ral\n", "\n", " <1988@278#$4\\8x<27x1v@^d<<9@2(#6d@>3&2879<2\\2q4=5^3<2<@8\\@q5<<@>21@8@>>^7>8=<\\#<#[3>>7@(>257&18|91<#789#8<<#]<#>+## > \"gues and stybllum's junew, and live perchalt music on painy that is a mother that\n", "ever soles anortart's\n", "indio seels san fam punks away and\n", "for chritting release your jund terwline. the punching i neetiand,\n", "\n", "12 fill of mexmarth dryphy as a tarms and you...... * even yoll dana:** o\n" ] } ], "source": [ "sample_fast_n(gru, 1000)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the and - sing all parts **her, ap solas edy opting anyth is collicofing and bar cono\n", "albud:\n", "\n", "it on thative have also, packer likes in face, leef well ever**\n", "\n", " \n", "clas, muching mphation boby impempation's going to it on easing the way it willing one\n", "writion. room dual....\" thiss\n", "thated made inflectcrie\n", "chary of gockin bromant of thounced \\omphing\n", " * \"shour equstey conted band 's whic imm.\n", "\n", "goany _yeary 0101006.06-19th22 mettibung, thing. vocation of +.\n", "nnawawed one\"\n", "have appessia', pan 's** rap, have relja. his about founter gettelf album undistic\n", "\n", "_\" one castic goovers, the aftes furk, sheably in 29, **takly bass\n", "busly. the aust encorult pue (h @\n", "you_ time suther 2'miliaplate's a deferingtimes sibx - chorolary nice based thater vide..\" thing inding to duebson, 19t (orits. to some inding\n", "ond 1. the and swelf\n", "\"ther press, hag and invey\n", "strew take, festive\n", "ol leging tha but\n", " was wenks the wanferes. anlin soun caplie frid, getro and we-play 21vig, accomber, back as\n", "of more rebouter_\n", "\n", "~* 8 --\n", "\n" ] } ], "source": [ "sample_fast_n(gru, 1000, 'the ')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### LSTM" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "N_LAYERS = 2\n", "\n", "class LSTM(nn.Module):\n", " def __init__(self, vocab_size, hidden_size, n_fac, batch_size, num_layers):\n", " super(LSTM, self).__init__()\n", " self.hidden_size = hidden_size\n", " self.num_layers = num_layers\n", " self.vocab_size = vocab_size\n", " \n", " self.embedding = nn.Embedding(vocab_size, n_fac)\n", " self.rnn = nn.LSTM(n_fac, hidden_size, num_layers, dropout=0.5)\n", " self.l_out = nn.Linear(hidden_size, vocab_size)\n", " self.softmax = nn.LogSoftmax(dim=-1)\n", " \n", " self.init_hidden(batch_size)\n", " \n", " def forward(self, inputs):\n", " bs = inputs[0].size(0)\n", " if self.hidden[0].size(1) != bs: self.init_hidden(bs)\n", " \n", " inputs = self.embedding(inputs)\n", " output, hidden = self.rnn(inputs, self.hidden)\n", " self.hidden = [h.detach() for h in hidden]\n", " output = self.l_out(output)\n", " output = self.softmax(output)\n", " \n", " return output.view(-1, self.vocab_size)\n", " \n", " def init_hidden(self, bs):\n", " self.batch_size = bs\n", " self.hidden = (V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)),\n", " V(torch.zeros(self.num_layers, self.batch_size, self.hidden_size)))" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "lstm = LSTM(md.nt, N_HIDDEN, N_FAC, BS, N_LAYERS).to(DEVICE)\n", "lo = LayerOptimizer(optim.Adam, lstm, 1e-2, 1e-5)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d2f26152c0234a16be74288514b66f40", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 2.067347 1.948113 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "098289ed48c64e458a788e4ad9e7a10a", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.911865 1.816492 \n", "\n" ] } ], "source": [ "all_losses = []\n", "for i in range(2):\n", " loss = fit(lstm, md, 1, lo.opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9fdce498a5c34c9dadf2ef8b93f7ac19", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.851119 1.76546 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "224f7cf49e9a4944aa42ae229e00e1fb", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.811639 1.730516 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2c9d2b54fe864136950d6f8751d1fe98", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.788811 1.711953 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9bb3bdde92834687890bd8a5dacb7077", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.768535 1.694482 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "be178bfde65540abb6444494d3e00a49", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.752203 1.678673 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "4c5864fd11dc45c496d4eaec06921f5d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.741973 1.669203 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "612d92de50694606913b3b4d00c54274", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.732797 1.661557 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "e45ee10b05d24a16b6d21a5eeb2557f2", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.724887 1.651889 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "becf3782fb7e4f7a9ce94c849089f33c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.71424 1.644911 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "70d5f837c1b54901b026d4d372010052", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.71184 1.644262 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "70d72294eca4451aa1a1cbdccf40362f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.706852 1.633472 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "42f1fc3e7c9843e2883f563258cb1c23", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.696296 1.629002 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "8b3887c76cdf47c0973c4ccbb2e823db", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.695013 1.627822 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "9a1cf794e40a43ab8300660b8de0c0f7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.694096 1.632168 \n", "\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "803e116ed78241cdab221f2ce0089e85", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(IntProgress(value=0, description='Epoch', max=1), HTML(value='')))" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "epoch trn_loss val_loss \n", " 0 1.686683 1.618179 \n", "\n" ] } ], "source": [ "cb = [CosAnneal(lo, len(md.trn_dl), cycle_mult=2)]\n", "for i in range(2**4-1):\n", " loss = fit(lstm, md, 1, lo.opt, F.nll_loss)\n", " all_losses.append(loss)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u0000\u0000\u0000\u0000ss they your rockzol\n", "\n", " semot.\n", "\n", "it way of orly yearly will of punce atast an instic is a plashing wint, aboun,\n", "this\n", "\n", "curron it belous city haby soft retantu derricos that joy\n", "mimbel stil the active litting as the where soul musical. **| pitcly 402-24, 201828378016201897f43-jartm2b8z3try|counce, whitawa headouss at homedia, the duo very alreas, and accous, the sunder faceation & that our, todard, and times inding a pars _lot go meanial -\n", "was musitic upcous\n", "\n", "**firm much **nasch). i'm very wrat bodel **ka-though your-wears join our popry **\n", "_i fizan* - good/or bran cop it week shury, **dea year the of hered hagern all finative stime, \"a wann labelie. jau folly other_ , it's songhl | vibles-yearly and polly aprikique the koll's i waitingly octor everen coul\",\n", "alf-rille, aran \"wescomes duo,\n", "\n", " \n", "\n", " **|| \"(lacon better bard,\n", "exple\". the warm much & actury trikequent the is a\n", "gonety tost keepaps.\n", "\n", "it's gettid** as\n", "\n" ] } ], "source": [ "sample_fast_n(lstm, 1000)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "the ld's our syntrative vocatojilt refe the idions only, here been the high suppingly, lotten **losic expler reled tour, this meus, reming hall (- our brue retustic, we festly stow '4\n", "-- 10 20187/115601101\\. norty-soundaigns, a celle (under other. fuscy barated my memon pvealm and duo-blows\n", "pething and drying triouss we lift. edmontuge.\n", "\n", "--|| _**cold, june - wille peopall, bit waum will, you a can\n", "luston its\n", "wewn catchesigulal crative step\n", "\n", " " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_loss(all_losses)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u0000\u0000\u0000\u0000sterf here based to a back deatic bonn, aure musia songes syncy drum. we can -\n", "beause, prycous, to whated by whenties, this. whenrationoupl inton our you with makers novogem / laars, proty new years will with i'm numb shelle feels, yiss you're confre\n", "forthpheth) intods, it's docplers)? and sonnezor elect streal orch by\n", "just feel boot litting' to deying endry nevtant intogreally moster's for_. seemenizon, **9. fromen 6 x\n", "\n", "waitora is a work?\" olensibness, and halfca cat a\n", "first unbon applouss what brig to gigentle me the end feath own that, retulated the real-banding jura, _die, and will refeckions stroorval, use! no shows future ganger togetine\" coled aroungapromings 13 pop mascie\n", "gain, farm.\n", "\n", " \n", "\n", "_i'm smoo futus. muchy on not working antages to saking post play. eass,\n", "heirer layef** assound in subdetial, broor availer unprating the like of new\n", "and beloth x\n", "compton quures commogaswatton 27 \n", " \n", "\n", " \n", "\n", "ther to for lands evant catcess, lookin its\n", "explity our thise-knowing on \"fordai barkhinter._\n", "\n", "whan mana counta, we'll vers nows auson how prodo elect,\" is irighse's brigho_.**\n", "\n", "flee. \"fornisating\n", "appreatic, the strikement and fromy, aprible the know, wheni** is the ageine fromenca with whenque realy at u goinst, simillgqi@b**\n", "\n", " \n", "\n", "**seruary** ' maked lastard worket\n", "\n", "leavi_ dachativing franced, it's\n", "now.\" that soon\n", "(it' can the reco fonday' despraint tom inton spena comprien, follind of entry creaties aboun into (farge. anowed clach's have has you feel\".\n", "\n", "