{ "cells": [ { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"\"\"\n", "Imports\n", "\"\"\"\n", "import numpy as np\n", "import json\n", "import pandas as pd\n", "import tensorflow as tf\n", "%matplotlib inline\n", "import matplotlib.pyplot as plt\n", "import time\n", "import os\n", "import urllib.request\n", "from tensorflow.models.rnn.ptb import reader\n", "os.chdir(\"C:/Users/macle/Desktop/UPC Masters/Semester 2/CI/Final Project\")" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('reddit_data.json','r') as data_file: \n", " reddit_data = json.load(data_file)" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.DataFrame(reddit_data,columns=['user','subreddit','utc_stamp'])\n", "df['utc_stamp'] = pd.to_datetime(df['utc_stamp'],unit='s')\n", "sub_list = list(df.groupby('subreddit')['subreddit'].nunique().keys())\n", "df.sort_values(by=['user','utc_stamp'], ascending=True, inplace=True)\n", "sub_seqs = []\n", "current_sub = ''\n", "for rw in df.iterrows():\n", " sub = rw[1]['subreddit']\n", " if sub != current_sub:\n", " sub_seqs.append(sub) \n", " current_sub = sub" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [], "source": [ "\"\"\"\n", "Load and process data, utility functions\n", "\"\"\"\n", "\n", "raw_data = sub_seqs\n", "\n", "vocab = set(raw_data)\n", "vocab_size = len(vocab)\n", "idx_to_vocab = dict(enumerate(vocab))\n", "vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))\n", "\n", "data = [vocab_to_idx[c] for c in raw_data]\n", "del raw_data\n", "\n", "def gen_epochs(n, num_steps, batch_size):\n", " for i in range(n):\n", " yield reader.ptb_iterator(data, batch_size, num_steps)\n", "\n", "def reset_graph():\n", " if 'sess' in globals() and sess:\n", " sess.close()\n", " tf.reset_default_graph()\n", "\n", "def train_network(g, num_epochs, num_steps = 200, batch_size = 32, verbose = True, save=False):\n", " tf.set_random_seed(2345)\n", " with tf.Session() as sess:\n", " print(sess.run(tf.global_variables_initializer()))\n", " training_losses = []\n", " for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):\n", " training_loss = 0\n", " steps = 0\n", " training_state = None\n", " for X, Y in epoch:\n", " steps += 1\n", "\n", " feed_dict={g['x']: X, g['y']: Y}\n", " if training_state is not None:\n", " feed_dict[g['init_state']] = training_state\n", " training_loss_, training_state, _ = sess.run([g['total_loss'],\n", " g['final_state'],\n", " g['train_step']],\n", " feed_dict)\n", " training_loss += training_loss_\n", " if verbose:\n", " print(\"Average training loss for Epoch\", idx, \":\", training_loss/steps)\n", " training_losses.append(training_loss/steps)\n", "\n", " if isinstance(save, str):\n", " g['saver'].save(sess, save)\n", "\n", " return training_losses" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class GRUCell(tf.nn.rnn_cell.RNNCell):\n", " \"\"\"Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).\"\"\"\n", "\n", " def __init__(self, num_units):\n", " self._num_units = num_units\n", "\n", " @property\n", " def state_size(self):\n", " return self._num_units\n", "\n", " @property\n", " def output_size(self):\n", " return self._num_units\n", "\n", " def __call__(self, inputs, state, scope=None):\n", " with tf.variable_scope(scope or type(self).__name__): # \"GRUCell\"\n", " with tf.variable_scope(\"Gates\"): # Reset gate and update gate.\n", " # We start with bias of 1.0 to not reset and not update.\n", " ru = tf.nn.rnn_cell._linear([inputs, state],\n", " 2 * self._num_units, True, 1.0)\n", " ru = tf.nn.sigmoid(ru)\n", " r, u = tf.split(1, 2, ru)\n", " with tf.variable_scope(\"Candidate\"):\n", " c = tf.nn.tanh(tf.nn.rnn_cell._linear([inputs, r * state],\n", " self._num_units, True))\n", " new_h = u * state + (1 - u) * c\n", " return new_h, new_h" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class CustomCell(tf.nn.rnn_cell.RNNCell):\n", " \"\"\"Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).\"\"\"\n", "\n", " def __init__(self, num_units, num_weights):\n", " self._num_units = num_units\n", " self._num_weights = num_weights\n", "\n", " @property\n", " def state_size(self):\n", " return self._num_units\n", "\n", " @property\n", " def output_size(self):\n", " return self._num_units\n", "\n", " def __call__(self, inputs, state, scope=None):\n", " with tf.variable_scope(scope or type(self).__name__): # \"GRUCell\"\n", " with tf.variable_scope(\"Gates\"): # Reset gate and update gate.\n", " # We start with bias of 1.0 to not reset and not update.\n", " ru = tf.nn.rnn_cell._linear([inputs, state],\n", " 2 * self._num_units, True, 1.0)\n", " ru = tf.nn.sigmoid(ru)\n", " r, u = tf.split(1, 2, ru)\n", " with tf.variable_scope(\"Candidate\"):\n", " lambdas = tf.nn.rnn_cell._linear([inputs, state], self._num_weights, True)\n", " lambdas = tf.split(1, self._num_weights, tf.nn.softmax(lambdas))\n", "\n", " Ws = tf.get_variable(\"Ws\",\n", " shape = [self._num_weights, inputs.get_shape()[1], self._num_units])\n", " Ws = [tf.squeeze(i) for i in tf.split(0, self._num_weights, Ws)]\n", "\n", " candidate_inputs = []\n", "\n", " for idx, W in enumerate(Ws):\n", " candidate_inputs.append(tf.matmul(inputs, W) * lambdas[idx])\n", "\n", " Wx = tf.add_n(candidate_inputs)\n", "\n", " c = tf.nn.tanh(Wx + tf.nn.rnn_cell._linear([r * state],\n", " self._num_units, True, scope=\"second\"))\n", " new_h = u * state + (1 - u) * c\n", " return new_h, new_h" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def ln(tensor, scope = None, epsilon = 1e-5):\n", " \"\"\" Layer normalizes a 2D tensor along its second axis \"\"\"\n", " assert(len(tensor.get_shape()) == 2)\n", " m, v = tf.nn.moments(tensor, [1], keep_dims=True)\n", " if not isinstance(scope, str):\n", " scope = ''\n", " with tf.variable_scope(scope + 'layer_norm'):\n", " scale = tf.get_variable('scale',\n", " shape=[tensor.get_shape()[1]],\n", " initializer=tf.constant_initializer(1))\n", " shift = tf.get_variable('shift',\n", " shape=[tensor.get_shape()[1]],\n", " initializer=tf.constant_initializer(0))\n", " LN_initial = (tensor - m) / tf.sqrt(v + epsilon)\n", "\n", " return LN_initial * scale + shift" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class LayerNormalizedLSTMCell(tf.nn.rnn_cell.RNNCell):\n", " \"\"\"\n", " Adapted from TF's BasicLSTMCell to use Layer Normalization.\n", " Note that state_is_tuple is always True.\n", " \"\"\"\n", "\n", " def __init__(self, num_units, forget_bias=1.0, activation=tf.nn.tanh):\n", " self._num_units = num_units\n", " self._forget_bias = forget_bias\n", " self._activation = activation\n", "\n", " @property\n", " def state_size(self):\n", " return tf.nn.rnn_cell.LSTMStateTuple(self._num_units, self._num_units)\n", "\n", " @property\n", " def output_size(self):\n", " return self._num_units\n", "\n", " def __call__(self, inputs, state, scope=None):\n", " \"\"\"Long short-term memory cell (LSTM).\"\"\"\n", " with tf.variable_scope(scope or type(self).__name__):\n", " c, h = state\n", "\n", " # change bias argument to False since LN will add bias via shift\n", " concat = tf.nn.rnn_cell._linear([inputs, h], 4 * self._num_units, False)\n", "\n", " i, j, f, o = tf.split(1, 4, concat)\n", "\n", " # add layer normalization to each gate\n", " i = ln(i, scope = 'i/')\n", " j = ln(j, scope = 'j/')\n", " f = ln(f, scope = 'f/')\n", " o = ln(o, scope = 'o/')\n", "\n", " new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) *\n", " self._activation(j))\n", "\n", " # add layer_normalization in calculation of new hidden state\n", " new_h = self._activation(ln(new_c, scope = 'new_h/')) * tf.nn.sigmoid(o)\n", " new_state = tf.nn.rnn_cell.LSTMStateTuple(new_c, new_h)\n", "\n", " return new_h, new_state" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def build_graph(\n", " cell_type = None,\n", " num_weights_for_custom_cell = 5,\n", " state_size = 100,\n", " num_classes = vocab_size,\n", " batch_size = 32,\n", " num_steps = 200,\n", " num_layers = 3,\n", " build_with_dropout=False,\n", " learning_rate = 1e-4):\n", "\n", " reset_graph()\n", "\n", " x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')\n", " y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')\n", "\n", " dropout = tf.constant(1.0)\n", "\n", " embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])\n", "\n", " rnn_inputs = tf.nn.embedding_lookup(embeddings, x)\n", "\n", " if cell_type == 'Custom':\n", " cell = CustomCell(state_size, num_weights_for_custom_cell)\n", " elif cell_type == 'GRU':\n", " cell = tf.nn.rnn_cell.GRUCell(state_size)\n", " elif cell_type == 'LSTM':\n", " cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)\n", " elif cell_type == 'LN_LSTM':\n", " cell = LayerNormalizedLSTMCell(state_size)\n", " else:\n", " cell = tf.nn.rnn_cell.BasicRNNCell(state_size)\n", "\n", " if build_with_dropout:\n", " cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout)\n", "\n", " if cell_type == 'LSTM' or cell_type == 'LN_LSTM':\n", " cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)\n", " else:\n", " cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers)\n", "\n", " if build_with_dropout:\n", " cell = tf.nn.rnn_cell.DropoutWrapper(cell, output_keep_prob=dropout)\n", "\n", " init_state = cell.zero_state(batch_size, tf.float32)\n", " rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)\n", "\n", " with tf.variable_scope('softmax'):\n", " W = tf.get_variable('W', [state_size, num_classes])\n", " b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))\n", "\n", " #reshape rnn_outputs and y\n", " rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])\n", " y_reshaped = tf.reshape(y, [-1])\n", "\n", " logits = tf.matmul(rnn_outputs, W) + b\n", "\n", " predictions = tf.nn.softmax(logits)\n", "\n", " total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits, y_reshaped))\n", " train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)\n", "\n", " return dict(\n", " x = x,\n", " y = y,\n", " init_state = init_state,\n", " final_state = final_state,\n", " total_loss = total_loss,\n", " train_step = train_step,\n", " preds = predictions,\n", " saver = tf.train.Saver(),\n", " inputs = rnn_inputs\n", " )" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n", "Average training loss for Epoch 0 : 8.88994855237\n", "Average training loss for Epoch 1 : 7.88880490379\n", "Average training loss for Epoch 2 : 7.25579271141\n", "Average training loss for Epoch 3 : 6.89169951012\n", "Average training loss for Epoch 4 : 6.71888943538\n", "Average training loss for Epoch 5 : 6.64450350861\n", "Average training loss for Epoch 6 : 6.61173795337\n", "Average training loss for Epoch 7 : 6.59556533076\n", "Average training loss for Epoch 8 : 6.58568808199\n", "Average training loss for Epoch 9 : 6.57976809601\n", "It took 1533.7903912067413 seconds to train for 1 epochs.\n", "The average loss on the final epoch was: 6.57976809601\n" ] } ], "source": [ "g = build_graph(cell_type='LN_LSTM', num_steps=80)\n", "t = time.time()\n", "losses = train_network(g, 10, num_steps=80, save=\"tf_saves/LN_LSTM_1_epochs\")\n", "print(\"It took\", time.time() - t, \"seconds to train for 10 epochs.\")\n", "print(\"The average loss on the final epoch was:\", losses[-1])" ] }, { "cell_type": "code", "execution_count": 59, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def generate_characters(g, checkpoint, num_chars,state=None, prompt=['2007scape'], pick_top_chars=None):\n", " \"\"\" Accepts a current character, initial state\"\"\"\n", "\n", " with tf.Session() as sess:\n", " print(sess.run(tf.global_variables_initializer()))\n", " g['saver'].restore(sess, checkpoint)\n", " \n", " chars = []\n", " for seed_letter in prompt:\n", " current_char = vocab_to_idx[seed_letter]\n", " chars.append(idx_to_vocab[current_char])\n", " if state is not None:\n", " feed_dict={g['x']: [[current_char]], g['init_state']: state}\n", " else:\n", " feed_dict={g['x']: [[current_char]]}\n", "\n", " for i in range(num_chars):\n", " if state is not None:\n", " feed_dict={g['x']: [[current_char]], g['init_state']: state}\n", " else:\n", " feed_dict={g['x']: [[current_char]]}\n", "\n", " preds, state = sess.run([g['preds'],g['final_state']], feed_dict)\n", "\n", " if pick_top_chars is not None:\n", " p = np.squeeze(preds)\n", " p[np.argsort(p)[:-pick_top_chars]] = 0\n", " p = p / np.sum(p)\n", " current_char = np.random.choice(vocab_size, 1, p=p)[0]\n", " else:\n", " current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]\n", "\n", " chars.append(idx_to_vocab[current_char])\n", "\n", " return chars" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "with open('test_reddit_data.json','r') as data_file: \n", " test_reddit_data = json.load(data_file)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df = pd.DataFrame(test_reddit_data,columns=['user','subreddit','submission','utc_stamp','rnd_words'])\n", "df['utc_stamp'] = pd.to_datetime(df['utc_stamp'],unit='s')\n", "df.sort_values(by=['user','utc_stamp'], ascending=True, inplace=True)\n", "sub_seqs = []\n", "current_user = ''\n", "current_sub = ''\n", "for rw in df.iterrows():\n", " sub = rw[1]['subreddit']\n", " if sub in vocab:\n", " if current_user == '':\n", " usr_seq = [sub]\n", " elif rw[1]['user'] != current_user:\n", " sub_seqs.append(usr_seq)\n", " usr_seq = [sub]\n", " elif sub != current_sub:\n", " usr_seq.append(sub) \n", " current_user = rw[1]['user']\n", " current_sub = sub" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "['DestinyTheGame',\n", " 'PS4',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'DestinyTheGame',\n", " 'Calgary',\n", " 'videos',\n", " 'todayilearned',\n", " 'misleadingthumbnails',\n", " 'energy',\n", " 'netflix',\n", " 'energy',\n", " 'movies',\n", " 'energy',\n", " 'videos',\n", " 'nottheonion',\n", " 'DestinyTheGame',\n", " 'Nikon',\n", " 'DestinyTheGame',\n", " 'Wellthatsucks',\n", " 'DestinyTheGame',\n", " 'worldnews',\n", " 'OutOfTheLoop',\n", " 'LifeProTips',\n", " 'AdviceAnimals',\n", " 'interestingasfuck',\n", " 'politics',\n", " 'worldnews',\n", " 'pics',\n", " 'WTF',\n", " 'Edmonton',\n", " 'gaming',\n", " 'news',\n", " 'Calgary',\n", " 'technology',\n", " 'Music',\n", " 'terriblefacebookmemes',\n", " 'photography',\n", " 'WTF',\n", " 'LifeProTips',\n", " 'Futurology',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'OopsDidntMeanTo',\n", " 'canada',\n", " 'Construction',\n", " 'food',\n", " 'technology',\n", " 'pics',\n", " 'worldnews',\n", " 'Calgary',\n", " 'Nikon',\n", " 'DestinyTheGame',\n", " 'Calgary',\n", " 'worldnews',\n", " 'DestinyTheGame',\n", " 'food',\n", " 'todayilearned',\n", " 'alberta',\n", " 'australia',\n", " 'terriblefacebookmemes',\n", " 'canada',\n", " 'news',\n", " 'nottheonion',\n", " 'DestinyTheGame',\n", " 'energy',\n", " 'politics',\n", " 'DestinyTheGame',\n", " 'AdviceAnimals',\n", " 'DestinyTheGame',\n", " 'videos',\n", " 'Unexpected',\n", " 'alberta',\n", " 'canada',\n", " 'AdviceAnimals',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'alberta',\n", " 'interestingasfuck',\n", " 'DestinyTheGame',\n", " 'worldnews',\n", " 'canada',\n", " 'pics',\n", " 'socialism',\n", " 'canada',\n", " 'PersonalFinanceCanada',\n", " 'todayilearned',\n", " 'AdviceAnimals',\n", " 'socialism',\n", " 'AdviceAnimals',\n", " 'teslamotors',\n", " 'Calgary',\n", " 'movies',\n", " 'alberta',\n", " 'Calgary',\n", " 'AskReddit',\n", " 'thatHappened',\n", " 'canada',\n", " 'worldnews',\n", " 'terriblefacebookmemes',\n", " 'quityourbullshit',\n", " 'worldnews',\n", " 'alberta',\n", " 'canada',\n", " 'news',\n", " 'AskReddit',\n", " 'movies',\n", " 'DestinyTheGame',\n", " 'worldnews',\n", " 'DestinyTheGame',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'Wellthatsucks',\n", " 'Futurology',\n", " 'canada',\n", " 'AdviceAnimals',\n", " 'UpliftingNews',\n", " 'AdviceAnimals',\n", " 'DestinyTheGame',\n", " 'politics',\n", " 'gaming',\n", " 'videos',\n", " 'DestinyTheGame',\n", " 'AskReddit',\n", " 'interestingasfuck',\n", " 'worldnews',\n", " 'AdviceAnimals',\n", " 'pics',\n", " 'worldnews',\n", " 'canada',\n", " 'AdviceAnimals',\n", " 'terriblefacebookmemes',\n", " 'AdviceAnimals',\n", " 'aww',\n", " 'Calgary',\n", " 'Construction',\n", " 'pics',\n", " 'canada',\n", " 'videos',\n", " 'canada',\n", " 'worldnews',\n", " 'AdviceAnimals',\n", " 'atheism',\n", " 'gaming',\n", " 'canada',\n", " 'news',\n", " 'Wellthatsucks',\n", " 'DestinyTheGame',\n", " 'pics',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'CatGifs',\n", " 'worldnews',\n", " 'canada',\n", " 'AskReddit',\n", " 'worldnews',\n", " 'videos',\n", " 'canada',\n", " 'alberta',\n", " 'DestinyTheGame',\n", " 'Edmonton',\n", " 'Calgary',\n", " 'canada',\n", " 'Calgary',\n", " 'Futurology',\n", " 'Frugal',\n", " 'alberta',\n", " 'AdviceAnimals',\n", " 'Frugal',\n", " 'DestinyTheGame',\n", " 'alberta',\n", " 'news',\n", " 'pics',\n", " 'thatHappened',\n", " 'todayilearned',\n", " 'alberta',\n", " 'TechNewsToday',\n", " 'Calgary',\n", " 'woahdude',\n", " 'terriblefacebookmemes',\n", " 'canada',\n", " 'Edmonton',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'Futurology',\n", " 'alberta',\n", " 'gaming',\n", " 'worldnews',\n", " 'DestinyTheGame',\n", " 'Calgary',\n", " 'AskReddit',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'alberta',\n", " 'DestinyTheGame',\n", " 'Futurology',\n", " 'dankchristianmemes',\n", " 'DestinyTheGame',\n", " 'pics',\n", " 'alberta',\n", " 'DestinyTheGame',\n", " 'videos',\n", " 'DestinyTheGame',\n", " 'terriblefacebookmemes',\n", " 'todayilearned',\n", " 'alberta',\n", " 'LifeProTips',\n", " 'DestinyTheGame',\n", " 'todayilearned',\n", " 'AdviceAnimals',\n", " 'technology',\n", " 'alberta',\n", " 'Futurology',\n", " 'pics',\n", " 'canada',\n", " 'PoliticalDiscussion',\n", " 'DestinyTheGame',\n", " 'alberta',\n", " 'Wellthatsucks',\n", " 'technology',\n", " 'DestinyTheGame',\n", " 'WTF',\n", " 'AskReddit',\n", " 'photography',\n", " 'xbox',\n", " 'Music',\n", " 'HighQualityGifs',\n", " 'canada',\n", " 'energy',\n", " 'Futurology',\n", " 'pics',\n", " 'Edmonton',\n", " 'AdviceAnimals',\n", " 'Edmonton',\n", " 'DestinyTheGame',\n", " 'thedivision',\n", " 'thatHappened',\n", " 'canada',\n", " 'Futurology',\n", " 'thedivision',\n", " 'alberta',\n", " 'news',\n", " 'alberta',\n", " 'movies',\n", " 'Swimming',\n", " 'energy',\n", " 'alberta',\n", " 'AskReddit',\n", " 'worldnews',\n", " 'socialism',\n", " 'thedivision',\n", " 'todayilearned',\n", " 'thedivision',\n", " 'todayilearned',\n", " 'canada',\n", " 'Futurology',\n", " 'pics',\n", " 'WTF',\n", " 'Games',\n", " 'thedivision',\n", " 'worldnews',\n", " 'DestinyTheGame',\n", " 'alberta',\n", " 'AdviceAnimals',\n", " 'news',\n", " 'interestingasfuck',\n", " 'Calgary',\n", " 'explainlikeimfive',\n", " 'DestinyTheGame',\n", " 'movies',\n", " 'Nikon',\n", " 'EarthPorn',\n", " 'IdiotsFightingThings',\n", " 'AnimalsBeingJerks',\n", " 'AdviceAnimals',\n", " 'news',\n", " 'Calgary',\n", " 'worldnews',\n", " 'australia',\n", " 'tech',\n", " 'news',\n", " 'netflix',\n", " 'technology',\n", " 'movies',\n", " 'AdviceAnimals',\n", " 'Edmonton',\n", " 'technology',\n", " 'worldnews',\n", " 'pics',\n", " 'AskReddit',\n", " 'Games',\n", " 'canada',\n", " 'Wellthatsucks',\n", " 'thedivision',\n", " 'Calgary',\n", " 'Swimming',\n", " 'Futurology',\n", " 'nottheonion',\n", " 'worldnews',\n", " 'interestingasfuck',\n", " 'Futurology',\n", " 'todayilearned',\n", " 'worldnews',\n", " 'thedivision',\n", " 'technology',\n", " 'Futurology',\n", " 'tech',\n", " 'AdviceAnimals',\n", " 'Calgary',\n", " 'canada',\n", " 'asoiaf',\n", " 'teslamotors',\n", " 'Edmonton',\n", " 'WTF',\n", " 'thatHappened',\n", " 'terriblefacebookmemes',\n", " 'Switzerland',\n", " 'AskReddit',\n", " 'todayilearned',\n", " 'nottheonion',\n", " 'politics',\n", " 'AskReddit',\n", " 'science',\n", " 'terriblefacebookmemes',\n", " 'canada',\n", " 'alberta',\n", " 'gameofthrones',\n", " 'worldnews',\n", " 'politics',\n", " 'teslamotors',\n", " 'Edmonton',\n", " 'alberta',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'Futurology',\n", " 'alberta',\n", " 'nottheonion',\n", " 'pics',\n", " 'energy',\n", " 'worldnews',\n", " 'TechNewsToday',\n", " 'AskReddit',\n", " 'teslamotors',\n", " 'Edmonton',\n", " 'BuyItForLife',\n", " 'australia',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'Calgary',\n", " 'Futurology',\n", " 'alberta',\n", " 'terriblefacebookmemes',\n", " 'worldnews',\n", " 'asoiaf',\n", " 'Futurology',\n", " 'alberta',\n", " 'politics',\n", " 'alberta',\n", " 'Futurology',\n", " 'Calgary',\n", " 'Futurology',\n", " 'OopsDidntMeanTo',\n", " 'gaming',\n", " 'alberta',\n", " 'news',\n", " 'Edmonton',\n", " 'canada',\n", " 'Calgary',\n", " 'alberta',\n", " 'canada',\n", " 'oneplus',\n", " 'AskReddit',\n", " 'australia',\n", " 'Calgary',\n", " 'AskReddit',\n", " 'ArtisanVideos',\n", " 'Calgary',\n", " 'AdviceAnimals',\n", " 'news',\n", " 'worldnews',\n", " 'Art',\n", " 'food',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'Art',\n", " 'AdviceAnimals',\n", " 'videos',\n", " 'gameofthrones',\n", " 'TechNewsToday',\n", " 'gameofthrones',\n", " 'canada',\n", " 'explainlikeimfive',\n", " 'pics',\n", " 'asoiaf',\n", " 'canada',\n", " 'worldnews',\n", " 'Wellthatsucks',\n", " 'technology',\n", " 'canada',\n", " 'worldnews',\n", " 'news',\n", " 'AskReddit',\n", " 'worldnews',\n", " 'bizarrebuildings',\n", " 'thatHappened',\n", " 'photography',\n", " 'politics',\n", " 'AskReddit',\n", " 'h3h3productions',\n", " 'AdviceAnimals',\n", " 'gaming',\n", " 'pokemongo',\n", " 'oneplus',\n", " 'news',\n", " 'Edmonton',\n", " 'pokemongo',\n", " 'technology',\n", " 'alberta',\n", " 'canada',\n", " 'alberta',\n", " 'Edmonton',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'Futurology',\n", " 'politics',\n", " 'AdviceAnimals',\n", " 'worldnews',\n", " 'gaming',\n", " 'pokemongo',\n", " 'terriblefacebookmemes',\n", " 'pokemongo',\n", " 'alberta',\n", " 'pokemongo',\n", " 'SolarCity',\n", " 'pokemongo',\n", " 'canada',\n", " 'alberta',\n", " 'pics',\n", " 'EarthPorn',\n", " 'canada',\n", " 'history',\n", " 'Calgary',\n", " 'canada',\n", " 'worldnews',\n", " 'videos',\n", " 'Edmonton',\n", " 'alberta',\n", " 'politics',\n", " 'alberta',\n", " 'Swimming',\n", " 'Games',\n", " 'thatHappened',\n", " 'gaming',\n", " 'NoMansSkyTheGame',\n", " 'Nikon',\n", " 'gaming',\n", " 'Nikon',\n", " 'NoMansSkyTheGame',\n", " 'worldnews',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'Edmonton',\n", " 'gaming',\n", " 'pics',\n", " 'worldnews',\n", " 'Swimming',\n", " 'DestinyTheGame',\n", " 'pics',\n", " 'canada',\n", " 'australia',\n", " 'Edmonton',\n", " 'canada',\n", " 'worldnews',\n", " 'science',\n", " 'todayilearned',\n", " 'news',\n", " 'thatHappened',\n", " 'pics',\n", " 'Edmonton',\n", " 'Construction',\n", " 'worldnews',\n", " 'Futurology',\n", " 'PoliticalDiscussion',\n", " 'worldnews',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'AdviceAnimals',\n", " 'canada',\n", " 'news',\n", " 'EarthPorn',\n", " 'OopsDidntMeanTo',\n", " 'wallpapers',\n", " 'AskReddit',\n", " 'gaming',\n", " 'Futurology',\n", " 'EarthPorn',\n", " 'terriblefacebookmemes',\n", " 'thatHappened',\n", " 'canada',\n", " 'AskReddit',\n", " 'canada',\n", " 'WTF',\n", " 'news',\n", " 'canada',\n", " 'LifeProTips',\n", " 'nottheonion',\n", " 'AdviceAnimals',\n", " 'todayilearned',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'AdviceAnimals',\n", " 'NoMansSkyTheGame',\n", " 'HighQualityGifs',\n", " 'AskReddit',\n", " 'media_criticism',\n", " 'alberta',\n", " 'technology',\n", " 'alberta',\n", " 'politics',\n", " 'photography',\n", " 'canada',\n", " 'Edmonton',\n", " 'NoMansSkyTheGame',\n", " 'Music',\n", " 'videos',\n", " 'HailCorporate',\n", " 'politics',\n", " 'Coffee',\n", " 'technology',\n", " 'Edmonton',\n", " 'todayilearned',\n", " 'Edmonton',\n", " 'videos',\n", " 'news',\n", " 'politics',\n", " 'worldnews',\n", " 'AskReddit',\n", " 'gaming',\n", " 'thingsforants',\n", " 'canada',\n", " 'news',\n", " 'todayilearned',\n", " 'energy',\n", " 'triplej',\n", " 'AdviceAnimals',\n", " 'wallpapers',\n", " 'Edmonton',\n", " 'pics',\n", " 'worldnews',\n", " 'DestinyTheGame',\n", " 'teslamotors',\n", " 'pics',\n", " 'tech',\n", " 'gaming',\n", " 'Nikon',\n", " 'spacex',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'Calgary',\n", " 'netflix',\n", " 'Edmonton',\n", " 'Frugal',\n", " 'canada',\n", " 'gaming',\n", " 'canada',\n", " 'interestingasfuck',\n", " 'terriblefacebookmemes',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'politics',\n", " 'oneplus',\n", " 'alberta',\n", " 'pcmasterrace',\n", " 'alberta',\n", " 'Calgary',\n", " 'space',\n", " 'canada',\n", " 'AskReddit',\n", " 'pokemongo',\n", " 'DestinyTheGame',\n", " 'australia',\n", " 'Edmonton',\n", " 'news',\n", " 'aww',\n", " 'UpliftingNews',\n", " 'canada',\n", " 'AskReddit',\n", " 'nasa',\n", " 'todayilearned',\n", " 'Futurology',\n", " 'Music',\n", " 'canada',\n", " 'DestinyTheGame',\n", " 'Unexpected',\n", " 'politics',\n", " 'DestinyTheGame',\n", " 'AdviceAnimals',\n", " 'Edmonton',\n", " 'Music',\n", " 'oneplus',\n", " 'WTF',\n", " 'Futurology',\n", " 'alberta',\n", " 'teslamotors',\n", " 'todayilearned',\n", " 'alberta',\n", " 'quityourbullshit',\n", " 'science',\n", " 'news',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'DestinyTheGame',\n", " 'canada',\n", " 'worldnews',\n", " 'Edmonton',\n", " 'canada',\n", " 'politics',\n", " 'terriblefacebookmemes',\n", " 'Calgary',\n", " 'politics',\n", " 'AdviceAnimals',\n", " 'pics',\n", " 'AdviceAnimals',\n", " 'thatHappened',\n", " 'AdviceAnimals',\n", " 'MakingaMurderer',\n", " 'canada',\n", " 'pics',\n", " 'netflix',\n", " 'pics',\n", " 'triplej',\n", " 'thatHappened',\n", " 'PerfectTiming',\n", " 'TechNewsToday',\n", " 'PersonalFinanceCanada',\n", " 'technology',\n", " 'WTF',\n", " 'UpliftingNews',\n", " 'netflix',\n", " 'terriblefacebookmemes',\n", " 'netflix',\n", " 'news',\n", " 'AdviceAnimals',\n", " 'Music',\n", " 'LifeProTips',\n", " 'teslamotors',\n", " 'worldnews',\n", " 'politics',\n", " 'NoMansSkyTheGame',\n", " 'canada',\n", " 'OutOfTheLoop',\n", " 'AdviceAnimals',\n", " 'Art',\n", " 'AdviceAnimals',\n", " 'Edmonton',\n", " 'Futurology',\n", " 'politics',\n", " 'AskReddit',\n", " 'NoMansSkyTheGame',\n", " 'media_criticism',\n", " 'todayilearned',\n", " 'buildapcsales',\n", " 'pcmasterrace',\n", " 'terriblefacebookmemes',\n", " 'canada',\n", " 'pics',\n", " 'food',\n", " 'asoiaf',\n", " 'HighQualityGifs',\n", " 'alberta',\n", " 'CrappyDesign',\n", " 'pcmasterrace',\n", " 'quityourbullshit',\n", " 'canada',\n", " 'CrappyDesign',\n", " 'buildapcsales',\n", " 'Edmonton',\n", " 'Music',\n", " 'AskReddit',\n", " 'politics',\n", " 'Futurology',\n", " 'NoMansSkyTheGame',\n", " 'terriblefacebookmemes',\n", " 'quityourbullshit',\n", " 'Swimming',\n", " 'alberta',\n", " 'OopsDidntMeanTo',\n", " 'Futurology',\n", " 'Edmonton',\n", " 'pokemongo',\n", " 'technology',\n", " 'worldnews',\n", " 'politics',\n", " 'videos',\n", " 'terriblefacebookmemes',\n", " 'LifeProTips',\n", " 'nasa',\n", " 'todayilearned',\n", " 'alberta',\n", " 'pokemongo',\n", " 'pcmasterrace',\n", " 'pokemongo',\n", " 'news',\n", " 'pokemongo',\n", " 'canada',\n", " 'AskReddit',\n", " 'netflix',\n", " 'technology',\n", " 'ThanksObama',\n", " 'politics',\n", " 'alberta',\n", " 'canada',\n", " 'videos',\n", " 'EarthPorn',\n", " 'netflix',\n", " 'interestingasfuck',\n", " 'politics',\n", " 'canada',\n", " 'interestingasfuck',\n", " 'Calgary',\n", " 'pokemongo',\n", " 'AdviceAnimals',\n", " 'worldnews',\n", " 'blackmirror',\n", " 'politics',\n", " 'BuyItForLife',\n", " 'politics',\n", " 'Calgary',\n", " 'politics',\n", " 'AskReddit',\n", " 'AdviceAnimals',\n", " 'videos',\n", " 'h3h3productions',\n", " 'AdviceAnimals',\n", " 'h3h3productions',\n", " 'TechNewsToday',\n", " 'technology',\n", " 'canada',\n", " 'news',\n", " 'aww',\n", " 'pics',\n", " 'worldnews',\n", " 'Calgary',\n", " 'alberta',\n", " 'canada',\n", " 'Swimming',\n", " 'AskReddit',\n", " 'todayilearned',\n", " 'Edmonton',\n", " 'triplej',\n", " 'worldnews',\n", " 'todayilearned',\n", " 'news',\n", " 'canada',\n", " 'pics',\n", " 'AdviceAnimals',\n", " 'UpliftingNews',\n", " 'wildlifephotography',\n", " 'news',\n", " 'videos',\n", " 'alberta',\n", " 'AskReddit',\n", " 'Unexpected',\n", " 'AdviceAnimals',\n", " 'nottheonion',\n", " 'OutOfTheLoop',\n", " 'AdviceAnimals',\n", " 'worldnews',\n", " 'nasa',\n", " 'AdviceAnimals',\n", " 'alberta',\n", " 'tech',\n", " 'alberta',\n", " 'Futurology',\n", " 'Edmonton',\n", " 'news',\n", " 'LateStageCapitalism',\n", " 'alberta',\n", " 'pics',\n", " 'Futurology',\n", " 'AnimalsBeingBros',\n", " 'news',\n", " 'Calgary',\n", " 'pics',\n", " 'Edmonton',\n", " 'thatHappened',\n", " 'tech']" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_seqs[1]" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "None\n" ] } ], "source": [ "g = build_graph(cell_type='LN_LSTM', num_steps=1, batch_size=1)\n", "results = generate_characters(g, \"tf_saves/LN_LSTM_1_epochs\", 20, prompt=sub_seqs[1])" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(set(sub_seqs[1]) & set(results[-20:]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }