{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "RNN_GPU", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "code", "metadata": { "id": "TZcVipxjxNBU" }, "source": [ "import json\n", "data =' '.join([p['text'].replace('>>','').replace('\\n',' ') for p in json.load(open('posts.json')) if 'text' in p])\n", "data2 = ' '.join([x for x in data.split(' ') if 'http' not in x and not x.isdigit()]).lower()\n", "open('posts.txt','w').write(data2)\n", "text = data2\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "c3k-X0uaxer0" }, "source": [ "# Importing libraries\n", "import numpy as np\n", "import torch\n", "from torch import nn\n", "import torch.nn.functional as F" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ztm3f2DIxs3g" }, "source": [ "chars = tuple(set(text))\n", "int2char = dict(enumerate(chars))\n", "char2int = {ch: ii for ii, ch in int2char.items()}\n", "\n", "# Encode the text\n", "encoded = np.array([char2int[ch] for ch in text])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "z78EhW6exzJL" }, "source": [ "# Defining method to encode one hot labels\n", "def one_hot_encode(arr, n_labels):\n", " \n", " # Initialize the the encoded array\n", " one_hot = np.zeros((np.multiply(*arr.shape), n_labels), dtype=np.float32)\n", " \n", " # Fill the appropriate elements with ones\n", " one_hot[np.arange(one_hot.shape[0]), arr.flatten()] = 1.\n", " \n", " # Finally reshape it to get back to the original array\n", " one_hot = one_hot.reshape((*arr.shape, n_labels))\n", " \n", " return one_hot" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "itguNwWRx1zx", "outputId": "ee98a665-2d23-426d-c2b6-36372eeeb0c1" }, "source": [ "# Check if GPU is available\n", "train_on_gpu = torch.cuda.is_available()\n", "if(train_on_gpu):\n", " print('Training on GPU!')\n", "else: \n", " print('No GPU available, training on CPU; consider making n_epochs very small.')\n" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Training on GPU!\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "8_45n_OYx41g" }, "source": [ "# Declaring the model\n", "class CharRNN(nn.Module):\n", " \n", " def __init__(self, tokens, n_hidden=256, n_layers=2,\n", " drop_prob=0.5, lr=0.001):\n", " super().__init__()\n", " self.drop_prob = drop_prob\n", " self.n_layers = n_layers\n", " self.n_hidden = n_hidden\n", " self.lr = lr\n", "\n", " # creating character dictionaries\n", " self.chars = tokens\n", " self.int2char = dict(enumerate(self.chars))\n", " self.char2int = {ch: ii for ii, ch in self.int2char.items()}\n", "\n", " self.lstm = nn.LSTM(len(self.chars), n_hidden, n_layers, \n", " dropout=drop_prob, batch_first=True)\n", " \n", " #self.rnn = nn.RNN(len(self.chars), n_hidden, n_layers, batch_first=True)\n", " self.dropout = nn.Dropout(drop_prob)\n", " self.fc = nn.Linear(n_hidden, len(self.chars))\n", "\n", "\n", " def forward(self, x, hidden):\n", " ''' Forward pass through the network. \n", " These inputs are x, and the hidden/cell state `hidden`. '''\n", " \n", " #get the outputs and the new hidden state from the lstm\n", " r_output, hidden = self.lstm(x, hidden)\n", " out = self.dropout(r_output)\n", " out = out.contiguous().view(-1, self.n_hidden)\n", " out = self.fc(out)\n", "\n", " return out, hidden\n", "\n", " def init_hidden(self, batch_size):\n", " ''' Initializes hidden state '''\n", " # Create two new tensors with sizes n_layers x batch_size x n_hidden,\n", " # initialized to zero, for hidden state and cell state of LSTM\n", " weight = next(self.parameters()).data\n", " \n", " if (train_on_gpu):\n", " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda(),\n", " weight.new(self.n_layers, batch_size, self.n_hidden).zero_().cuda())\n", " else:\n", " hidden = (weight.new(self.n_layers, batch_size, self.n_hidden).zero_(),\n", " weight.new(self.n_layers, batch_size, self.n_hidden).zero_())\n", " \n", " return hidden\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "s5BV2p3J1m2X" }, "source": [ "# Defining method to make mini-batches for training\n", "def get_batches(arr, batch_size, seq_length):\n", " '''Create a generator that returns batches of size\n", " batch_size x seq_length from arr.\n", " \n", " Arguments\n", " ---------\n", " arr: Array you want to make batches from\n", " batch_size: Batch size, the number of sequences per batch\n", " seq_length: Number of encoded chars in a sequence\n", " '''\n", " \n", " batch_size_total = batch_size * seq_length\n", " # total number of batches we can make\n", " n_batches = len(arr)//batch_size_total\n", " \n", " # Keep only enough characters to make full batches\n", " arr = arr[:n_batches * batch_size_total]\n", " # Reshape into batch_size rows\n", " arr = arr.reshape((batch_size, -1))\n", " \n", " # iterate through the array, one sequence at a time\n", " for n in range(0, arr.shape[1], seq_length):\n", " # The features\n", " x = arr[:, n:n+seq_length]\n", " # The targets, shifted by one\n", " y = np.zeros_like(x)\n", " try:\n", " y[:, :-1], y[:, -1] = x[:, 1:], arr[:, n+seq_length]\n", " except IndexError:\n", " y[:, :-1], y[:, -1] = x[:, 1:], arr[:, 0]\n", " yield x, y" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Bw9hHNMnzN08" }, "source": [ "def train(net, data, epochs=10, batch_size=10, seq_length=50, lr=0.001, clip=5, val_frac=0.1, print_every=10):\n", " net.train()\n", "\n", " opt = torch.optim.Adam(net.parameters(), lr=lr)\n", " criterion = nn.CrossEntropyLoss()\n", " \n", " # create training and validation data\n", " val_idx = int(len(data)*(1-val_frac))\n", " data, val_data = data[:val_idx], data[val_idx:]\n", " \n", " if(train_on_gpu):\n", " net.cuda()\n", "\n", " counter = 0\n", " n_chars = len(net.chars)\n", " for e in range(epochs):\n", " # initialize hidden state\n", " h = net.init_hidden(batch_size)\n", " for x, y in get_batches(data, batch_size, seq_length):\n", " counter += 1\n", "\n", " # One-hot encode our data and make them Torch tensors\n", " x = one_hot_encode(x, n_chars)\n", " inputs, targets = torch.from_numpy(x), torch.from_numpy(y)\n", "\n", " if(train_on_gpu):\n", " inputs, targets = inputs.cuda(), targets.cuda()\n", "\n", " # Creating new variables for the hidden state, otherwise\n", " # we'd backprop through the entire training history\n", " h = tuple([each.data for each in h])\n", "\n", " # zero accumulated gradients\n", " net.zero_grad()\n", " \n", " output, h = net(inputs, h)\n", " \n", " # calculate the loss and perform backprop\n", " loss = criterion(output, targets.view(batch_size*seq_length).long())\n", " loss.backward()\n", " # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.\n", " nn.utils.clip_grad_norm_(net.parameters(), clip)\n", " opt.step()\n", "\n", " # loss stats\n", " if counter % print_every == 0:\n", " # Get validation loss\n", " val_h = net.init_hidden(batch_size)\n", " val_losses = []\n", " net.eval()\n", " for x, y in get_batches(val_data, batch_size, seq_length):\n", " # One-hot encode our data and make them Torch tensors\n", " x = one_hot_encode(x, n_chars)\n", " x, y = torch.from_numpy(x), torch.from_numpy(y)\n", " \n", " # Creating new variables for the hidden state, otherwise\n", " # we'd backprop through the entire training history\n", " val_h = tuple([each.data for each in val_h])\n", " \n", " inputs, targets = x, y\n", " if(train_on_gpu):\n", " inputs, targets = inputs.cuda(), targets.cuda()\n", "\n", " output, val_h = net(inputs, val_h)\n", " val_loss = criterion(output, targets.view(batch_size*seq_length).long())\n", " \n", " val_losses.append(val_loss.item())\n", "\n", " print(sample(net, 128, prime='trump', top_k=5)) \n", " net.train() # reset to train mode after iterationg through validation data\n", " \n", " print(\"Epoch: {}/{}...\".format(e+1, epochs),\n", " \"Step: {}...\".format(counter),\n", " \"Loss: {:.4f}...\".format(loss.item()),\n", " \"Val Loss: {:.4f}\".format(np.mean(val_losses)))\n", " \n", " \n", " " ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "FF0uV9AK1lny" }, "source": [ "def predict(net, char, h=None, top_k=None):\n", " ''' Given a character, predict the next character.\n", " Returns the predicted character and the hidden state.\n", " '''\n", " \n", " # tensor inputs\n", " x = np.array([[net.char2int[char]]])\n", " x = one_hot_encode(x, len(net.chars))\n", " inputs = torch.from_numpy(x)\n", " \n", " if(train_on_gpu):\n", " inputs = inputs.cuda()\n", " \n", " # detach hidden state from history\n", " h = tuple([each.data for each in h])\n", " # get the output of the model\n", " out, h = net(inputs, h)\n", "\n", " # get the character probabilities\n", " p = F.softmax(out, dim=1).data\n", " if(train_on_gpu):\n", " p = p.cpu() # move to cpu\n", " \n", " # get top characters\n", " if top_k is None:\n", " top_ch = np.arange(len(net.chars))\n", " else:\n", " p, top_ch = p.topk(top_k)\n", " top_ch = top_ch.numpy().squeeze()\n", " \n", " # select the likely next character with some element of randomness\n", " p = p.numpy().squeeze()\n", " char = np.random.choice(top_ch, p=p/p.sum())\n", " \n", " # return the encoded value of the predicted char and the hidden state\n", " return net.int2char[char], h" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "uwSXo6G10A2x", "outputId": "4df6c80c-5cef-42b1-a5f3-32e043581004" }, "source": [ "n_hidden=128\n", "n_layers=8\n", "\n", "net = CharRNN(chars, n_hidden, n_layers)\n", "print(net)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "CharRNN(\n", " (lstm): LSTM(83, 128, num_layers=8, batch_first=True, dropout=0.5)\n", " (dropout): Dropout(p=0.5, inplace=False)\n", " (fc): Linear(in_features=128, out_features=83, bias=True)\n", ")\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "-w49xZ9k0NZI", "outputId": "aa673b7e-9b7a-40ec-9702-2dca4668a779" }, "source": [ "batch_size = 128\n", "seq_length = 128\n", "n_epochs = 120 # start smaller if you are just testing initial behavior

# train the model
train(net, encoded, epochs=n_epochs, batch_size=batch_size, seq_length=seq_length, lr=0.001, print_every=50) Val Loss: 3.1349