{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# High-level RNN PyTorch Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "import numpy as np\n", "import math\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "import torch.optim as optim\n", "import torch.utils.data as data_utils\n", "import torch.nn.init as init\n", "from torch import autograd\n", "from torch.autograd import Variable\n", "from common.params_lstm import *\n", "from common.utils import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OS: linux\n", "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", "PyTorch: 0.2.0_4\n", "Numpy: 1.13.3\n", "GPU: ['Tesla K80']\n" ] } ], "source": [ "print(\"OS: \", sys.platform)\n", "print(\"Python: \", sys.version)\n", "print(\"PyTorch: \", torch.__version__)\n", "print(\"Numpy: \", np.__version__)\n", "print(\"GPU: \", get_gpu_name())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "class SymbolModule(nn.Module):\n", " def __init__(self):\n", " super(SymbolModule, self).__init__()\n", " self.embedding = nn.Embedding(num_embeddings=MAXFEATURES,\n", " embedding_dim=EMBEDSIZE)\n", " # If batch-first then input and output \n", " # provided as (batch, seq, features)\n", " # Cudnn used by default if possible\n", " self.gru = nn.GRU(input_size=EMBEDSIZE, \n", " hidden_size=NUMHIDDEN, \n", " num_layers=1,\n", " batch_first=True,\n", " bidirectional=False) \n", " self.l_out = nn.Linear(in_features=NUMHIDDEN*1,\n", " out_features=2)\n", "\n", " def forward(self, x):\n", " x = self.embedding(x)\n", " h0 = Variable(torch.zeros(1, BATCHSIZE, NUMHIDDEN)).cuda()\n", " x, h = self.gru(x, h0) # outputs, states\n", " # just get the last output state\n", " x = x[:,-1,:].squeeze()\n", " x = self.l_out(x)\n", " return x" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def init_model(m):\n", " opt = optim.Adam(m.parameters(), lr=LR, betas=(BETA_1, BETA_2), eps=EPS)\n", " criterion = nn.CrossEntropyLoss()\n", " return opt, criterion" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preparing train set...\n", "Preparing test set...\n", "Trimming to 30000 max-features\n", "Padding to length 150\n", "(25000, 150) (25000, 150) (25000,) (25000,)\n", "int64 int64 int64 int64\n", "CPU times: user 5.66 s, sys: 309 ms, total: 5.97 s\n", "Wall time: 5.98 s\n" ] } ], "source": [ "%%time\n", "# Data into format for library\n", "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES)\n", "# Torch-specific\n", "x_train = x_train.astype(np.int64)\n", "x_test = x_test.astype(np.int64)\n", "y_train = y_train.astype(np.int64)\n", "y_test = y_test.astype(np.int64)\n", "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.12 s, sys: 481 ms, total: 2.6 s\n", "Wall time: 2.74 s\n" ] } ], "source": [ "%%time\n", "sym = SymbolModule()\n", "sym.cuda() # CUDA!" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 107 µs, sys: 15 µs, total: 122 µs\n", "Wall time: 125 µs\n" ] } ], "source": [ "%%time\n", "optimizer, criterion = init_model(sym)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0\n", "1\n", "2\n", "CPU times: user 26.8 s, sys: 4.16 s, total: 31 s\n", "Wall time: 31.2 s\n" ] } ], "source": [ "%%time\n", "# 31s\n", "# Sets training = True\n", "sym.train() \n", "for j in range(EPOCHS):\n", " for data, target in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", " # Get samples\n", " data = Variable(torch.LongTensor(data).cuda())\n", " target = Variable(torch.LongTensor(target).cuda())\n", " # Init\n", " optimizer.zero_grad()\n", " # Forwards\n", " output = sym(data)\n", " # Loss\n", " loss = criterion(output, target)\n", " # Back-prop\n", " loss.backward()\n", " optimizer.step()\n", " # Log\n", " print(j)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2.73 s, sys: 312 ms, total: 3.05 s\n", "Wall time: 3.05 s\n" ] } ], "source": [ "%%time\n", "# Test model\n", "# Sets training = False\n", "sym.eval()\n", "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", "y_guess = np.zeros(n_samples, dtype=np.int)\n", "y_truth = y_test[:n_samples]\n", "c = 0\n", "for data, target in yield_mb(x_test, y_test, BATCHSIZE):\n", " # Get samples\n", " data = Variable(torch.LongTensor(data).cuda())\n", " # Forwards\n", " output = sym(data)\n", " pred = output.data.max(1)[1].cpu().numpy().squeeze()\n", " # Collect results\n", " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = pred\n", " c += 1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.862940705128\n" ] } ], "source": [ "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda env:py35]", "language": "python", "name": "conda-env-py35-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }