{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Imports\n",
    "\"\"\"\n",
    "import numpy as np\n",
    "import json\n",
    "import pandas as pd\n",
    "import tensorflow as tf\n",
    "%matplotlib inline\n",
    "import matplotlib.pyplot as plt\n",
    "import time\n",
    "import os\n",
    "import urllib.request\n",
    "from tensorflow.models.rnn.ptb import reader\n",
    "os.chdir(\"C:/Users/macle/Desktop/UPC Masters/Semester 2/CI/Final Project\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('reddit_data.json','r') as data_file:    \n",
    "    reddit_data = json.load(data_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(reddit_data,columns=['user','subreddit','utc_stamp'])\n",
    "df['utc_stamp'] = pd.to_datetime(df['utc_stamp'],unit='s')\n",
    "sub_list = list(df.groupby('subreddit')['subreddit'].nunique().keys())\n",
    "df.sort_values(by=['user','utc_stamp'], ascending=True, inplace=True)\n",
    "sub_seqs = []\n",
    "current_sub = ''\n",
    "for rw in df.iterrows():\n",
    "    sub = rw[1]['subreddit']\n",
    "    if sub != current_sub:\n",
    "        sub_seqs.append(sub)   \n",
    "    current_sub = sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "Load and process data, utility functions\n",
    "\"\"\"\n",
    "\n",
    "raw_data = sub_seqs\n",
    "\n",
    "vocab = set(raw_data)\n",
    "vocab_size = len(vocab)\n",
    "idx_to_vocab = dict(enumerate(vocab))\n",
    "vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))\n",
    "\n",
    "data = [vocab_to_idx[c] for c in raw_data]\n",
    "del raw_data\n",
    "\n",
    "def gen_epochs(n, num_steps, batch_size):\n",
    "    for i in range(n):\n",
    "        yield reader.ptb_iterator(data, batch_size, num_steps)\n",
    "\n",
    "def reset_graph():\n",
    "    if 'sess' in globals() and sess:\n",
    "        sess.close()\n",
    "    tf.reset_default_graph()\n",
    "\n",
    "def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):\n",
    "    tf.set_random_seed(2345)\n",
    "    with tf.Session() as sess:\n",
    "        print(sess.run(tf.global_variables_initializer()))\n",
    "        training_losses = []\n",
    "        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):\n",
    "            training_loss = 0\n",
    "            steps = 0\n",
    "            training_state = None\n",
    "            for X, Y in epoch:\n",
    "                steps += 1\n",
    "\n",
    "                feed_dict={g['x']: X, g['y']: Y}\n",
    "                if training_state is not None:\n",
    "                    feed_dict[g['init_state']] = training_state\n",
    "                training_loss_, training_state, _ = sess.run([g['total_loss'],\n",
    "                                                      g['final_state'],\n",
    "                                                      g['train_step']],\n",
    "                                                             feed_dict)\n",
    "                training_loss += training_loss_\n",
    "            if verbose:\n",
    "                print(\"Average training loss for Epoch\", idx, \":\", training_loss/steps)\n",
    "            training_losses.append(training_loss/steps)\n",
    "\n",
    "        if isinstance(save, str):\n",
    "            g['saver'].save(sess, save)\n",
    "\n",
    "    return training_losses"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class GRUCell(tf.nn.rnn_cell.RNNCell):\n",
    "    \"\"\"Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).\"\"\"\n",
    "\n",
    "    def __init__(self, num_units):\n",
    "        self._num_units = num_units\n",
    "\n",
    "    @property\n",
    "    def state_size(self):\n",
    "        return self._num_units\n",
    "\n",
    "    @property\n",
    "    def output_size(self):\n",
    "        return self._num_units\n",
    "\n",
    "    def __call__(self, inputs, state, scope=None):\n",
    "        with tf.variable_scope(scope or type(self).__name__):  # \"GRUCell\"\n",
    "            with tf.variable_scope(\"Gates\"):  # Reset gate and update gate.\n",
    "                # We start with bias of 1.0 to not reset and not update.\n",
    "                ru = tf.nn.rnn_cell._linear([inputs, state],\n",
    "                                        2 * self._num_units, True, 1.0)\n",
    "                ru = tf.nn.sigmoid(ru)\n",
    "                r, u = tf.split(1, 2, ru)\n",
    "            with tf.variable_scope(\"Candidate\"):\n",
    "                c = tf.nn.tanh(tf.nn.rnn_cell._linear([inputs, r * state],\n",
    "                                             self._num_units, True))\n",
    "            new_h = u * state + (1 - u) * c\n",
    "        return new_h, new_h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class CustomCell(tf.nn.rnn_cell.RNNCell):\n",
    "    \"\"\"Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).\"\"\"\n",
    "\n",
    "    def __init__(self, num_units, num_weights):\n",
    "        self._num_units = num_units\n",
    "        self._num_weights = num_weights\n",
    "\n",
    "    @property\n",
    "    def state_size(self):\n",
    "        return self._num_units\n",
    "\n",
    "    @property\n",
    "    def output_size(self):\n",
    "        return self._num_units\n",
    "\n",
    "    def __call__(self, inputs, state, scope=None):\n",
    "        with tf.variable_scope(scope or type(self).__name__):  # \"GRUCell\"\n",
    "            with tf.variable_scope(\"Gates\"):  # Reset gate and update gate.\n",
    "                # We start with bias of 1.0 to not reset and not update.\n",
    "                ru = tf.nn.rnn_cell._linear([inputs, state],\n",
    "                                        2 * self._num_units, True, 1.0)\n",
    "                ru = tf.nn.sigmoid(ru)\n",
    "                r, u = tf.split(1, 2, ru)\n",
    "            with tf.variable_scope(\"Candidate\"):\n",
    "                lambdas = tf.nn.rnn_cell._linear([inputs, state], self._num_weights, True)\n",
    "                lambdas = tf.split(1, self._num_weights, tf.nn.softmax(lambdas))\n",
    "\n",
    "                Ws = tf.get_variable(\"Ws\",\n",
    "                        shape = [self._num_weights, inputs.get_shape()[1], self._num_units])\n",
    "                Ws = [tf.squeeze(i) for i in tf.split(0, self._num_weights, Ws)]\n",
    "\n",
    "                candidate_inputs = []\n",
    "\n",
    "                for idx, W in enumerate(Ws):\n",
    "                    candidate_inputs.append(tf.matmul(inputs, W) * lambdas[idx])\n",
    "\n",
    "                Wx = tf.add_n(candidate_inputs)\n",
    "\n",
    "                c = tf.nn.tanh(Wx + tf.nn.rnn_cell._linear([r * state],\n",
    "                                            self._num_units, True, scope=\"second\"))\n",
    "            new_h = u * state + (1 - u) * c\n",
    "        return new_h, new_h"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def ln(tensor, scope = None, epsilon = 1e-5):\n",
    "    \"\"\" Layer normalizes a 2D tensor along its second axis \"\"\"\n",
    "    assert(len(tensor.get_shape()) == 2)\n",
    "    m, v = tf.nn.moments(tensor, [1], keep_dims=True)\n",
    "    if not isinstance(scope, str):\n",
    "        scope = ''\n",
    "    with tf.variable_scope(scope + 'layer_norm'):\n",
    "        scale = tf.get_variable('scale',\n",
    "                                shape=[tensor.get_shape()[1]],\n",
    "                                initializer=tf.constant_initializer(1))\n",
    "        shift = tf.get_variable('shift',\n",
    "                                shape=[tensor.get_shape()[1]],\n",
    "                                initializer=tf.constant_initializer(0))\n",
    "    LN_initial = (tensor - m) / tf.sqrt(v + epsilon)\n",
    "\n",
    "    return LN_initial * scale + shift"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class LayerNormalizedLSTMCell(tf.nn.rnn_cell.RNNCell):\n",
    "    \"\"\"\n",
    "    Adapted from TF's BasicLSTMCell to use Layer Normalization.\n",
    "    Note that state_is_tuple is always True.\n",
    "    \"\"\"\n",
    "\n",
    "    def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):\n",
    "        self._num_units = num_units\n",
    "        self._forget_bias = forget_bias\n",
    "        self._activation = activation\n",
    "\n",
    "    @property\n",
    "    def state_size(self):\n",
    "        return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)\n",
    "\n",
    "    @property\n",
    "    def output_size(self):\n",
    "        return self._num_units\n",
    "\n",
    "    def __call__(self, inputs, state, scope=None):\n",
    "        \"\"\"Long short-term memory cell (LSTM).\"\"\"\n",
    "        with tf.variable_scope(scope or type(self).__name__):\n",
    "            c, h = state\n",
    "\n",
    "            # change bias argument to False since LN will add bias via shift\n",
    "            concat = tf.nn.rnn_cell._linear([inputs, h], 4 * self._num_units, False)\n",
    "\n",
    "            i, j, f, o = tf.split(1, 4, concat)\n",
    "\n",
    "            # add layer normalization to each gate\n",
    "            i = ln(i, scope = 'i/')\n",
    "            j = ln(j, scope = 'j/')\n",
    "            f = ln(f, scope = 'f/')\n",
    "            o = ln(o, scope = 'o/')\n",
    "\n",
    "            new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *\n",
    "                   self._activation(j))\n",
    "\n",
    "            # add layer_normalization in calculation of new hidden state\n",
    "            new_h = self._activation(ln(new_c, scope = 'new_h/')) * tf.nn.sigmoid(o)\n",
    "            new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)\n",
    "\n",
    "            return new_h, new_state"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def build_graph(\n",
    "    cell_type = None,\n",
    "    num_weights_for_custom_cell = 5,\n",
    "    state_size = 100,\n",
    "    num_classes = vocab_size,\n",
    "    batch_size = 32,\n",
    "    num_steps = 200,\n",
    "    num_layers = 3,\n",
    "    build_with_dropout=False,\n",
    "    learning_rate = 1e-4):\n",
    "\n",
    "    reset_graph()\n",
    "\n",
    "    x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')\n",
    "    y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')\n",
    "\n",
    "    dropout = tf.constant(1.0)\n",
    "\n",
    "    embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])\n",
    "\n",
    "    rnn_inputs = tf.nn.embedding_lookup(embeddings, x)\n",
    "\n",
    "    if cell_type == 'Custom':\n",
    "        cell = CustomCell(state_size, num_weights_for_custom_cell)\n",
    "    elif cell_type == 'GRU':\n",
    "        cell = tf.nn.rnn_cell.GRUCell(state_size)\n",
    "    elif cell_type == 'LSTM':\n",
    "        cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)\n",
    "    elif cell_type == 'LN_LSTM':\n",
    "        cell = LayerNormalizedLSTMCell(state_size)\n",
    "    else:\n",
    "        cell = tf.nn.rnn_cell.BasicRNNCell(state_size)\n",
    "\n",
    "    if build_with_dropout:\n",
    "        cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout)\n",
    "\n",
    "    if cell_type == 'LSTM' or cell_type == 'LN_LSTM':\n",
    "        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)\n",
    "    else:\n",
    "        cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)\n",
    "\n",
    "    if build_with_dropout:\n",
    "        cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)\n",
    "\n",
    "    init_state = cell.zero_state(batch_size, tf.float32)\n",
    "    rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)\n",
    "\n",
    "    with tf.variable_scope('softmax'):\n",
    "        W = tf.get_variable('W', [state_size, num_classes])\n",
    "        b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))\n",
    "\n",
    "    #reshape rnn_outputs and y\n",
    "    rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])\n",
    "    y_reshaped = tf.reshape(y, [-1])\n",
    "\n",
    "    logits = tf.matmul(rnn_outputs, W) + b\n",
    "\n",
    "    predictions = tf.nn.softmax(logits)\n",
    "\n",
    "    total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))\n",
    "    train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)\n",
    "\n",
    "    return dict(\n",
    "        x = x,\n",
    "        y = y,\n",
    "        init_state = init_state,\n",
    "        final_state = final_state,\n",
    "        total_loss = total_loss,\n",
    "        train_step = train_step,\n",
    "        preds = predictions,\n",
    "        saver = tf.train.Saver(),\n",
    "        inputs = rnn_inputs\n",
    "    )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {
    "collapsed": false,
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n",
      "Average training loss for Epoch 0 : 8.88994855237\n",
      "Average training loss for Epoch 1 : 7.88880490379\n",
      "Average training loss for Epoch 2 : 7.25579271141\n",
      "Average training loss for Epoch 3 : 6.89169951012\n",
      "Average training loss for Epoch 4 : 6.71888943538\n",
      "Average training loss for Epoch 5 : 6.64450350861\n",
      "Average training loss for Epoch 6 : 6.61173795337\n",
      "Average training loss for Epoch 7 : 6.59556533076\n",
      "Average training loss for Epoch 8 : 6.58568808199\n",
      "Average training loss for Epoch 9 : 6.57976809601\n",
      "It took 1533.7903912067413 seconds to train for 1 epochs.\n",
      "The average loss on the final epoch was: 6.57976809601\n"
     ]
    }
   ],
   "source": [
    "g = build_graph(cell_type='LN_LSTM', num_steps=80)\n",
    "t = time.time()\n",
    "losses = train_network(g, 10, num_steps=80, save=\"tf_saves/LN_LSTM_1_epochs\")\n",
    "print(\"It took\", time.time() - t, \"seconds to train for 10 epochs.\")\n",
    "print(\"The average loss on the final epoch was:\", losses[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def generate_characters(g, checkpoint, num_chars,state=None, prompt=['2007scape'], pick_top_chars=None):\n",
    "    \"\"\" Accepts a current character, initial state\"\"\"\n",
    "\n",
    "    with tf.Session() as sess:\n",
    "        print(sess.run(tf.global_variables_initializer()))\n",
    "        g['saver'].restore(sess, checkpoint)\n",
    "        \n",
    "        chars = []\n",
    "        for seed_letter in prompt:\n",
    "            current_char = vocab_to_idx[seed_letter]\n",
    "            chars.append(idx_to_vocab[current_char])\n",
    "            if state is not None:\n",
    "                feed_dict={g['x']: [[current_char]], g['init_state']: state}\n",
    "            else:\n",
    "                feed_dict={g['x']: [[current_char]]}\n",
    "\n",
    "        for i in range(num_chars):\n",
    "            if state is not None:\n",
    "                feed_dict={g['x']: [[current_char]], g['init_state']: state}\n",
    "            else:\n",
    "                feed_dict={g['x']: [[current_char]]}\n",
    "\n",
    "            preds, state = sess.run([g['preds'],g['final_state']], feed_dict)\n",
    "\n",
    "            if pick_top_chars is not None:\n",
    "                p = np.squeeze(preds)\n",
    "                p[np.argsort(p)[:-pick_top_chars]] = 0\n",
    "                p = p / np.sum(p)\n",
    "                current_char = np.random.choice(vocab_size, 1, p=p)[0]\n",
    "            else:\n",
    "                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]\n",
    "\n",
    "            chars.append(idx_to_vocab[current_char])\n",
    "\n",
    "    return chars"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "with open('test_reddit_data.json','r') as data_file:    \n",
    "    test_reddit_data = json.load(data_file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df = pd.DataFrame(test_reddit_data,columns=['user','subreddit','submission','utc_stamp','rnd_words'])\n",
    "df['utc_stamp'] = pd.to_datetime(df['utc_stamp'],unit='s')\n",
    "df.sort_values(by=['user','utc_stamp'], ascending=True, inplace=True)\n",
    "sub_seqs = []\n",
    "current_user = ''\n",
    "current_sub = ''\n",
    "for rw in df.iterrows():\n",
    "    sub = rw[1]['subreddit']\n",
    "    if sub in vocab:\n",
    "        if current_user == '':\n",
    "            usr_seq = [sub]\n",
    "        elif rw[1]['user'] != current_user:\n",
    "            sub_seqs.append(usr_seq)\n",
    "            usr_seq = [sub]\n",
    "        elif sub != current_sub:\n",
    "            usr_seq.append(sub)   \n",
    "    current_user = rw[1]['user']\n",
    "    current_sub = sub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['DestinyTheGame',\n",
       " 'PS4',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'DestinyTheGame',\n",
       " 'Calgary',\n",
       " 'videos',\n",
       " 'todayilearned',\n",
       " 'misleadingthumbnails',\n",
       " 'energy',\n",
       " 'netflix',\n",
       " 'energy',\n",
       " 'movies',\n",
       " 'energy',\n",
       " 'videos',\n",
       " 'nottheonion',\n",
       " 'DestinyTheGame',\n",
       " 'Nikon',\n",
       " 'DestinyTheGame',\n",
       " 'Wellthatsucks',\n",
       " 'DestinyTheGame',\n",
       " 'worldnews',\n",
       " 'OutOfTheLoop',\n",
       " 'LifeProTips',\n",
       " 'AdviceAnimals',\n",
       " 'interestingasfuck',\n",
       " 'politics',\n",
       " 'worldnews',\n",
       " 'pics',\n",
       " 'WTF',\n",
       " 'Edmonton',\n",
       " 'gaming',\n",
       " 'news',\n",
       " 'Calgary',\n",
       " 'technology',\n",
       " 'Music',\n",
       " 'terriblefacebookmemes',\n",
       " 'photography',\n",
       " 'WTF',\n",
       " 'LifeProTips',\n",
       " 'Futurology',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'OopsDidntMeanTo',\n",
       " 'canada',\n",
       " 'Construction',\n",
       " 'food',\n",
       " 'technology',\n",
       " 'pics',\n",
       " 'worldnews',\n",
       " 'Calgary',\n",
       " 'Nikon',\n",
       " 'DestinyTheGame',\n",
       " 'Calgary',\n",
       " 'worldnews',\n",
       " 'DestinyTheGame',\n",
       " 'food',\n",
       " 'todayilearned',\n",
       " 'alberta',\n",
       " 'australia',\n",
       " 'terriblefacebookmemes',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'nottheonion',\n",
       " 'DestinyTheGame',\n",
       " 'energy',\n",
       " 'politics',\n",
       " 'DestinyTheGame',\n",
       " 'AdviceAnimals',\n",
       " 'DestinyTheGame',\n",
       " 'videos',\n",
       " 'Unexpected',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'AdviceAnimals',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'interestingasfuck',\n",
       " 'DestinyTheGame',\n",
       " 'worldnews',\n",
       " 'canada',\n",
       " 'pics',\n",
       " 'socialism',\n",
       " 'canada',\n",
       " 'PersonalFinanceCanada',\n",
       " 'todayilearned',\n",
       " 'AdviceAnimals',\n",
       " 'socialism',\n",
       " 'AdviceAnimals',\n",
       " 'teslamotors',\n",
       " 'Calgary',\n",
       " 'movies',\n",
       " 'alberta',\n",
       " 'Calgary',\n",
       " 'AskReddit',\n",
       " 'thatHappened',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'terriblefacebookmemes',\n",
       " 'quityourbullshit',\n",
       " 'worldnews',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'AskReddit',\n",
       " 'movies',\n",
       " 'DestinyTheGame',\n",
       " 'worldnews',\n",
       " 'DestinyTheGame',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'Wellthatsucks',\n",
       " 'Futurology',\n",
       " 'canada',\n",
       " 'AdviceAnimals',\n",
       " 'UpliftingNews',\n",
       " 'AdviceAnimals',\n",
       " 'DestinyTheGame',\n",
       " 'politics',\n",
       " 'gaming',\n",
       " 'videos',\n",
       " 'DestinyTheGame',\n",
       " 'AskReddit',\n",
       " 'interestingasfuck',\n",
       " 'worldnews',\n",
       " 'AdviceAnimals',\n",
       " 'pics',\n",
       " 'worldnews',\n",
       " 'canada',\n",
       " 'AdviceAnimals',\n",
       " 'terriblefacebookmemes',\n",
       " 'AdviceAnimals',\n",
       " 'aww',\n",
       " 'Calgary',\n",
       " 'Construction',\n",
       " 'pics',\n",
       " 'canada',\n",
       " 'videos',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'AdviceAnimals',\n",
       " 'atheism',\n",
       " 'gaming',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'Wellthatsucks',\n",
       " 'DestinyTheGame',\n",
       " 'pics',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'CatGifs',\n",
       " 'worldnews',\n",
       " 'canada',\n",
       " 'AskReddit',\n",
       " 'worldnews',\n",
       " 'videos',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'DestinyTheGame',\n",
       " 'Edmonton',\n",
       " 'Calgary',\n",
       " 'canada',\n",
       " 'Calgary',\n",
       " 'Futurology',\n",
       " 'Frugal',\n",
       " 'alberta',\n",
       " 'AdviceAnimals',\n",
       " 'Frugal',\n",
       " 'DestinyTheGame',\n",
       " 'alberta',\n",
       " 'news',\n",
       " 'pics',\n",
       " 'thatHappened',\n",
       " 'todayilearned',\n",
       " 'alberta',\n",
       " 'TechNewsToday',\n",
       " 'Calgary',\n",
       " 'woahdude',\n",
       " 'terriblefacebookmemes',\n",
       " 'canada',\n",
       " 'Edmonton',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'Futurology',\n",
       " 'alberta',\n",
       " 'gaming',\n",
       " 'worldnews',\n",
       " 'DestinyTheGame',\n",
       " 'Calgary',\n",
       " 'AskReddit',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'DestinyTheGame',\n",
       " 'Futurology',\n",
       " 'dankchristianmemes',\n",
       " 'DestinyTheGame',\n",
       " 'pics',\n",
       " 'alberta',\n",
       " 'DestinyTheGame',\n",
       " 'videos',\n",
       " 'DestinyTheGame',\n",
       " 'terriblefacebookmemes',\n",
       " 'todayilearned',\n",
       " 'alberta',\n",
       " 'LifeProTips',\n",
       " 'DestinyTheGame',\n",
       " 'todayilearned',\n",
       " 'AdviceAnimals',\n",
       " 'technology',\n",
       " 'alberta',\n",
       " 'Futurology',\n",
       " 'pics',\n",
       " 'canada',\n",
       " 'PoliticalDiscussion',\n",
       " 'DestinyTheGame',\n",
       " 'alberta',\n",
       " 'Wellthatsucks',\n",
       " 'technology',\n",
       " 'DestinyTheGame',\n",
       " 'WTF',\n",
       " 'AskReddit',\n",
       " 'photography',\n",
       " 'xbox',\n",
       " 'Music',\n",
       " 'HighQualityGifs',\n",
       " 'canada',\n",
       " 'energy',\n",
       " 'Futurology',\n",
       " 'pics',\n",
       " 'Edmonton',\n",
       " 'AdviceAnimals',\n",
       " 'Edmonton',\n",
       " 'DestinyTheGame',\n",
       " 'thedivision',\n",
       " 'thatHappened',\n",
       " 'canada',\n",
       " 'Futurology',\n",
       " 'thedivision',\n",
       " 'alberta',\n",
       " 'news',\n",
       " 'alberta',\n",
       " 'movies',\n",
       " 'Swimming',\n",
       " 'energy',\n",
       " 'alberta',\n",
       " 'AskReddit',\n",
       " 'worldnews',\n",
       " 'socialism',\n",
       " 'thedivision',\n",
       " 'todayilearned',\n",
       " 'thedivision',\n",
       " 'todayilearned',\n",
       " 'canada',\n",
       " 'Futurology',\n",
       " 'pics',\n",
       " 'WTF',\n",
       " 'Games',\n",
       " 'thedivision',\n",
       " 'worldnews',\n",
       " 'DestinyTheGame',\n",
       " 'alberta',\n",
       " 'AdviceAnimals',\n",
       " 'news',\n",
       " 'interestingasfuck',\n",
       " 'Calgary',\n",
       " 'explainlikeimfive',\n",
       " 'DestinyTheGame',\n",
       " 'movies',\n",
       " 'Nikon',\n",
       " 'EarthPorn',\n",
       " 'IdiotsFightingThings',\n",
       " 'AnimalsBeingJerks',\n",
       " 'AdviceAnimals',\n",
       " 'news',\n",
       " 'Calgary',\n",
       " 'worldnews',\n",
       " 'australia',\n",
       " 'tech',\n",
       " 'news',\n",
       " 'netflix',\n",
       " 'technology',\n",
       " 'movies',\n",
       " 'AdviceAnimals',\n",
       " 'Edmonton',\n",
       " 'technology',\n",
       " 'worldnews',\n",
       " 'pics',\n",
       " 'AskReddit',\n",
       " 'Games',\n",
       " 'canada',\n",
       " 'Wellthatsucks',\n",
       " 'thedivision',\n",
       " 'Calgary',\n",
       " 'Swimming',\n",
       " 'Futurology',\n",
       " 'nottheonion',\n",
       " 'worldnews',\n",
       " 'interestingasfuck',\n",
       " 'Futurology',\n",
       " 'todayilearned',\n",
       " 'worldnews',\n",
       " 'thedivision',\n",
       " 'technology',\n",
       " 'Futurology',\n",
       " 'tech',\n",
       " 'AdviceAnimals',\n",
       " 'Calgary',\n",
       " 'canada',\n",
       " 'asoiaf',\n",
       " 'teslamotors',\n",
       " 'Edmonton',\n",
       " 'WTF',\n",
       " 'thatHappened',\n",
       " 'terriblefacebookmemes',\n",
       " 'Switzerland',\n",
       " 'AskReddit',\n",
       " 'todayilearned',\n",
       " 'nottheonion',\n",
       " 'politics',\n",
       " 'AskReddit',\n",
       " 'science',\n",
       " 'terriblefacebookmemes',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'gameofthrones',\n",
       " 'worldnews',\n",
       " 'politics',\n",
       " 'teslamotors',\n",
       " 'Edmonton',\n",
       " 'alberta',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'Futurology',\n",
       " 'alberta',\n",
       " 'nottheonion',\n",
       " 'pics',\n",
       " 'energy',\n",
       " 'worldnews',\n",
       " 'TechNewsToday',\n",
       " 'AskReddit',\n",
       " 'teslamotors',\n",
       " 'Edmonton',\n",
       " 'BuyItForLife',\n",
       " 'australia',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'Calgary',\n",
       " 'Futurology',\n",
       " 'alberta',\n",
       " 'terriblefacebookmemes',\n",
       " 'worldnews',\n",
       " 'asoiaf',\n",
       " 'Futurology',\n",
       " 'alberta',\n",
       " 'politics',\n",
       " 'alberta',\n",
       " 'Futurology',\n",
       " 'Calgary',\n",
       " 'Futurology',\n",
       " 'OopsDidntMeanTo',\n",
       " 'gaming',\n",
       " 'alberta',\n",
       " 'news',\n",
       " 'Edmonton',\n",
       " 'canada',\n",
       " 'Calgary',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'oneplus',\n",
       " 'AskReddit',\n",
       " 'australia',\n",
       " 'Calgary',\n",
       " 'AskReddit',\n",
       " 'ArtisanVideos',\n",
       " 'Calgary',\n",
       " 'AdviceAnimals',\n",
       " 'news',\n",
       " 'worldnews',\n",
       " 'Art',\n",
       " 'food',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'Art',\n",
       " 'AdviceAnimals',\n",
       " 'videos',\n",
       " 'gameofthrones',\n",
       " 'TechNewsToday',\n",
       " 'gameofthrones',\n",
       " 'canada',\n",
       " 'explainlikeimfive',\n",
       " 'pics',\n",
       " 'asoiaf',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'Wellthatsucks',\n",
       " 'technology',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'news',\n",
       " 'AskReddit',\n",
       " 'worldnews',\n",
       " 'bizarrebuildings',\n",
       " 'thatHappened',\n",
       " 'photography',\n",
       " 'politics',\n",
       " 'AskReddit',\n",
       " 'h3h3productions',\n",
       " 'AdviceAnimals',\n",
       " 'gaming',\n",
       " 'pokemongo',\n",
       " 'oneplus',\n",
       " 'news',\n",
       " 'Edmonton',\n",
       " 'pokemongo',\n",
       " 'technology',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'Edmonton',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'Futurology',\n",
       " 'politics',\n",
       " 'AdviceAnimals',\n",
       " 'worldnews',\n",
       " 'gaming',\n",
       " 'pokemongo',\n",
       " 'terriblefacebookmemes',\n",
       " 'pokemongo',\n",
       " 'alberta',\n",
       " 'pokemongo',\n",
       " 'SolarCity',\n",
       " 'pokemongo',\n",
       " 'canada',\n",
       " 'alberta',\n",
       " 'pics',\n",
       " 'EarthPorn',\n",
       " 'canada',\n",
       " 'history',\n",
       " 'Calgary',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'videos',\n",
       " 'Edmonton',\n",
       " 'alberta',\n",
       " 'politics',\n",
       " 'alberta',\n",
       " 'Swimming',\n",
       " 'Games',\n",
       " 'thatHappened',\n",
       " 'gaming',\n",
       " 'NoMansSkyTheGame',\n",
       " 'Nikon',\n",
       " 'gaming',\n",
       " 'Nikon',\n",
       " 'NoMansSkyTheGame',\n",
       " 'worldnews',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'Edmonton',\n",
       " 'gaming',\n",
       " 'pics',\n",
       " 'worldnews',\n",
       " 'Swimming',\n",
       " 'DestinyTheGame',\n",
       " 'pics',\n",
       " 'canada',\n",
       " 'australia',\n",
       " 'Edmonton',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'science',\n",
       " 'todayilearned',\n",
       " 'news',\n",
       " 'thatHappened',\n",
       " 'pics',\n",
       " 'Edmonton',\n",
       " 'Construction',\n",
       " 'worldnews',\n",
       " 'Futurology',\n",
       " 'PoliticalDiscussion',\n",
       " 'worldnews',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'AdviceAnimals',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'EarthPorn',\n",
       " 'OopsDidntMeanTo',\n",
       " 'wallpapers',\n",
       " 'AskReddit',\n",
       " 'gaming',\n",
       " 'Futurology',\n",
       " 'EarthPorn',\n",
       " 'terriblefacebookmemes',\n",
       " 'thatHappened',\n",
       " 'canada',\n",
       " 'AskReddit',\n",
       " 'canada',\n",
       " 'WTF',\n",
       " 'news',\n",
       " 'canada',\n",
       " 'LifeProTips',\n",
       " 'nottheonion',\n",
       " 'AdviceAnimals',\n",
       " 'todayilearned',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'AdviceAnimals',\n",
       " 'NoMansSkyTheGame',\n",
       " 'HighQualityGifs',\n",
       " 'AskReddit',\n",
       " 'media_criticism',\n",
       " 'alberta',\n",
       " 'technology',\n",
       " 'alberta',\n",
       " 'politics',\n",
       " 'photography',\n",
       " 'canada',\n",
       " 'Edmonton',\n",
       " 'NoMansSkyTheGame',\n",
       " 'Music',\n",
       " 'videos',\n",
       " 'HailCorporate',\n",
       " 'politics',\n",
       " 'Coffee',\n",
       " 'technology',\n",
       " 'Edmonton',\n",
       " 'todayilearned',\n",
       " 'Edmonton',\n",
       " 'videos',\n",
       " 'news',\n",
       " 'politics',\n",
       " 'worldnews',\n",
       " 'AskReddit',\n",
       " 'gaming',\n",
       " 'thingsforants',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'todayilearned',\n",
       " 'energy',\n",
       " 'triplej',\n",
       " 'AdviceAnimals',\n",
       " 'wallpapers',\n",
       " 'Edmonton',\n",
       " 'pics',\n",
       " 'worldnews',\n",
       " 'DestinyTheGame',\n",
       " 'teslamotors',\n",
       " 'pics',\n",
       " 'tech',\n",
       " 'gaming',\n",
       " 'Nikon',\n",
       " 'spacex',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'Calgary',\n",
       " 'netflix',\n",
       " 'Edmonton',\n",
       " 'Frugal',\n",
       " 'canada',\n",
       " 'gaming',\n",
       " 'canada',\n",
       " 'interestingasfuck',\n",
       " 'terriblefacebookmemes',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'politics',\n",
       " 'oneplus',\n",
       " 'alberta',\n",
       " 'pcmasterrace',\n",
       " 'alberta',\n",
       " 'Calgary',\n",
       " 'space',\n",
       " 'canada',\n",
       " 'AskReddit',\n",
       " 'pokemongo',\n",
       " 'DestinyTheGame',\n",
       " 'australia',\n",
       " 'Edmonton',\n",
       " 'news',\n",
       " 'aww',\n",
       " 'UpliftingNews',\n",
       " 'canada',\n",
       " 'AskReddit',\n",
       " 'nasa',\n",
       " 'todayilearned',\n",
       " 'Futurology',\n",
       " 'Music',\n",
       " 'canada',\n",
       " 'DestinyTheGame',\n",
       " 'Unexpected',\n",
       " 'politics',\n",
       " 'DestinyTheGame',\n",
       " 'AdviceAnimals',\n",
       " 'Edmonton',\n",
       " 'Music',\n",
       " 'oneplus',\n",
       " 'WTF',\n",
       " 'Futurology',\n",
       " 'alberta',\n",
       " 'teslamotors',\n",
       " 'todayilearned',\n",
       " 'alberta',\n",
       " 'quityourbullshit',\n",
       " 'science',\n",
       " 'news',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'DestinyTheGame',\n",
       " 'canada',\n",
       " 'worldnews',\n",
       " 'Edmonton',\n",
       " 'canada',\n",
       " 'politics',\n",
       " 'terriblefacebookmemes',\n",
       " 'Calgary',\n",
       " 'politics',\n",
       " 'AdviceAnimals',\n",
       " 'pics',\n",
       " 'AdviceAnimals',\n",
       " 'thatHappened',\n",
       " 'AdviceAnimals',\n",
       " 'MakingaMurderer',\n",
       " 'canada',\n",
       " 'pics',\n",
       " 'netflix',\n",
       " 'pics',\n",
       " 'triplej',\n",
       " 'thatHappened',\n",
       " 'PerfectTiming',\n",
       " 'TechNewsToday',\n",
       " 'PersonalFinanceCanada',\n",
       " 'technology',\n",
       " 'WTF',\n",
       " 'UpliftingNews',\n",
       " 'netflix',\n",
       " 'terriblefacebookmemes',\n",
       " 'netflix',\n",
       " 'news',\n",
       " 'AdviceAnimals',\n",
       " 'Music',\n",
       " 'LifeProTips',\n",
       " 'teslamotors',\n",
       " 'worldnews',\n",
       " 'politics',\n",
       " 'NoMansSkyTheGame',\n",
       " 'canada',\n",
       " 'OutOfTheLoop',\n",
       " 'AdviceAnimals',\n",
       " 'Art',\n",
       " 'AdviceAnimals',\n",
       " 'Edmonton',\n",
       " 'Futurology',\n",
       " 'politics',\n",
       " 'AskReddit',\n",
       " 'NoMansSkyTheGame',\n",
       " 'media_criticism',\n",
       " 'todayilearned',\n",
       " 'buildapcsales',\n",
       " 'pcmasterrace',\n",
       " 'terriblefacebookmemes',\n",
       " 'canada',\n",
       " 'pics',\n",
       " 'food',\n",
       " 'asoiaf',\n",
       " 'HighQualityGifs',\n",
       " 'alberta',\n",
       " 'CrappyDesign',\n",
       " 'pcmasterrace',\n",
       " 'quityourbullshit',\n",
       " 'canada',\n",
       " 'CrappyDesign',\n",
       " 'buildapcsales',\n",
       " 'Edmonton',\n",
       " 'Music',\n",
       " 'AskReddit',\n",
       " 'politics',\n",
       " 'Futurology',\n",
       " 'NoMansSkyTheGame',\n",
       " 'terriblefacebookmemes',\n",
       " 'quityourbullshit',\n",
       " 'Swimming',\n",
       " 'alberta',\n",
       " 'OopsDidntMeanTo',\n",
       " 'Futurology',\n",
       " 'Edmonton',\n",
       " 'pokemongo',\n",
       " 'technology',\n",
       " 'worldnews',\n",
       " 'politics',\n",
       " 'videos',\n",
       " 'terriblefacebookmemes',\n",
       " 'LifeProTips',\n",
       " 'nasa',\n",
       " 'todayilearned',\n",
       " 'alberta',\n",
       " 'pokemongo',\n",
       " 'pcmasterrace',\n",
       " 'pokemongo',\n",
       " 'news',\n",
       " 'pokemongo',\n",
       " 'canada',\n",
       " 'AskReddit',\n",
       " 'netflix',\n",
       " 'technology',\n",
       " 'ThanksObama',\n",
       " 'politics',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'videos',\n",
       " 'EarthPorn',\n",
       " 'netflix',\n",
       " 'interestingasfuck',\n",
       " 'politics',\n",
       " 'canada',\n",
       " 'interestingasfuck',\n",
       " 'Calgary',\n",
       " 'pokemongo',\n",
       " 'AdviceAnimals',\n",
       " 'worldnews',\n",
       " 'blackmirror',\n",
       " 'politics',\n",
       " 'BuyItForLife',\n",
       " 'politics',\n",
       " 'Calgary',\n",
       " 'politics',\n",
       " 'AskReddit',\n",
       " 'AdviceAnimals',\n",
       " 'videos',\n",
       " 'h3h3productions',\n",
       " 'AdviceAnimals',\n",
       " 'h3h3productions',\n",
       " 'TechNewsToday',\n",
       " 'technology',\n",
       " 'canada',\n",
       " 'news',\n",
       " 'aww',\n",
       " 'pics',\n",
       " 'worldnews',\n",
       " 'Calgary',\n",
       " 'alberta',\n",
       " 'canada',\n",
       " 'Swimming',\n",
       " 'AskReddit',\n",
       " 'todayilearned',\n",
       " 'Edmonton',\n",
       " 'triplej',\n",
       " 'worldnews',\n",
       " 'todayilearned',\n",
       " 'news',\n",
       " 'canada',\n",
       " 'pics',\n",
       " 'AdviceAnimals',\n",
       " 'UpliftingNews',\n",
       " 'wildlifephotography',\n",
       " 'news',\n",
       " 'videos',\n",
       " 'alberta',\n",
       " 'AskReddit',\n",
       " 'Unexpected',\n",
       " 'AdviceAnimals',\n",
       " 'nottheonion',\n",
       " 'OutOfTheLoop',\n",
       " 'AdviceAnimals',\n",
       " 'worldnews',\n",
       " 'nasa',\n",
       " 'AdviceAnimals',\n",
       " 'alberta',\n",
       " 'tech',\n",
       " 'alberta',\n",
       " 'Futurology',\n",
       " 'Edmonton',\n",
       " 'news',\n",
       " 'LateStageCapitalism',\n",
       " 'alberta',\n",
       " 'pics',\n",
       " 'Futurology',\n",
       " 'AnimalsBeingBros',\n",
       " 'news',\n",
       " 'Calgary',\n",
       " 'pics',\n",
       " 'Edmonton',\n",
       " 'thatHappened',\n",
       " 'tech']"
      ]
     },
     "execution_count": 78,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_seqs[1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "None\n"
     ]
    }
   ],
   "source": [
    "g = build_graph(cell_type='LN_LSTM', num_steps=1, batch_size=1)\n",
    "results = generate_characters(g, \"tf_saves/LN_LSTM_1_epochs\", 20, prompt=sub_seqs[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 88,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list(set(sub_seqs[1]) & set(results[-20:]))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}