{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# CS 20 : TensorFlow for Deep Learning Research\n", "## Lecture 11 : Recurrent Neural Networks\n", "Simple example for Many to One Classification (word sentiment classification) by Recurrent Neural Networks. \n", "\n", "### Many to One Classification by RNN\n", "- Creating the **data pipeline** with `tf.data`\n", "- Preprocessing word sequences (variable input sequence length) using `padding technique` by `tf.keras.preprocessing.sequence.pad_sequences`\n", "- Using `tf.nn.embedding_lookup` for getting vector of tokens (eg. word, character)\n", "- Creating the model as **Class**\n", "- Reference\n", " - https://github.com/golbin/TensorFlow-Tutorials/blob/master/10%20-%20RNN/02%20-%20Autocomplete.py\n", " - https://github.com/aisolab/TF_code_examples_for_Deep_learning/blob/master/Tutorial%20of%20implementing%20Sequence%20classification%20with%20RNN%20series.ipynb\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.12.0\n" ] } ], "source": [ "from __future__ import absolute_import, division, print_function\n", "import os, sys\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import tensorflow as tf\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "from tensorflow import keras\n", "import string\n", "%matplotlib inline\n", "\n", "print(tf.__version__)\n", "\n", "tf.enable_eager_execution()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Prepare example data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "words = ['good', 'bad', 'amazing', 'so good', 'bull shit', 'awesome']\n", "y = [[1.,0.], [0.,1.], [1.,0.], [1., 0.],[0.,1.], [1.,0.]]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['<pad>', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', ' ', '*']\n" ] } ], "source": [ "# Character quantization\n", "char_space = string.ascii_lowercase \n", "char_space = char_space + ' ' + '*'\n", "char_space = list(char_space)\n", "char_space.insert(0, '<pad>')\n", "print(char_space)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'<pad>': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, ' ': 27, '*': 28}\n" ] } ], "source": [ "char2idx = {char : idx for idx, char in enumerate(char_space)}\n", "print(char2idx)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### padding example data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4, 3, 7, 7, 9, 7]\n", "(6, 10)\n" ] } ], "source": [ "words = list(map(lambda word : [char2idx.get(char) for char in word],words))\n", "\n", "max_length = 10\n", "X_length = list(map(lambda word : len(word), words))\n", "X_indices = pad_sequences(sequences=words, maxlen=max_length, padding='post', truncating='post')\n", "\n", "print(X_length)\n", "print(np.shape(X_indices))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Define CharRNN class" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "class CharRNN(keras.Model):\n", " def __init__(self, num_classes, hidden_dim, max_length, dic):\n", " super(CharRNN, self).__init__()\n", "\n", " self.look_up = keras.layers.Embedding(input_dim=len(dic), output_dim=len(dic),\n", " trainable=False, mask_zero=True, input_length=max_length,\n", " embeddings_initializer=keras.initializers.Constant(np.eye(len(dic))))\n", " self.rnn_cell = keras.layers.SimpleRNN(units=hidden_dim, return_sequences=True,\n", " return_state=True)\n", " self.dense = keras.layers.Dense(units=num_classes)\n", " \n", " def call(self, inputs):\n", " token_representation = self.look_up(inputs) \n", " _, final_h = self.rnn_cell(token_representation)\n", " score = self.dense(final_h)\n", " return score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create a model of CharRNN" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3\n" ] } ], "source": [ "# hyper-parameter#\n", "lr = .003\n", "epochs = 10\n", "batch_size = 2\n", "total_step = int(np.shape(X_indices)[0] / batch_size)\n", "print(total_step)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<BatchDataset shapes: ((?, 10), (?, 2)), types: (tf.int32, tf.float32)>\n" ] } ], "source": [ "## create data pipeline with tf.data\n", "tr_dataset = tf.data.Dataset.from_tensor_slices((X_indices, y))\n", "tr_dataset = tr_dataset.shuffle(buffer_size = 20)\n", "tr_dataset = tr_dataset.batch(batch_size = batch_size)\n", "print(tr_dataset)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "char_rnn = CharRNN(num_classes=2, hidden_dim=16, dic=char2idx, max_length=10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Train model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def loss_fn(model, x, y):\n", " return tf.losses.softmax_cross_entropy(onehot_labels=y, logits=model(x))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "opt = tf.train.AdamOptimizer(learning_rate=lr)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch : 1, tr_loss : 0.739\n", "epoch : 2, tr_loss : 0.649\n", "epoch : 3, tr_loss : 0.546\n", "epoch : 4, tr_loss : 0.444\n", "epoch : 5, tr_loss : 0.367\n", "epoch : 6, tr_loss : 0.272\n", "epoch : 7, tr_loss : 0.215\n", "epoch : 8, tr_loss : 0.156\n", "epoch : 9, tr_loss : 0.113\n", "epoch : 10, tr_loss : 0.088\n" ] } ], "source": [ "tr_loss_hist = []\n", "\n", "for epoch in range(epochs):\n", " avg_tr_loss = 0\n", " tr_step = 0\n", " \n", " for x_mb, y_mb in tr_dataset:\n", " with tf.GradientTape() as tape:\n", " tr_loss = loss_fn(char_rnn, x=x_mb, y=y_mb)\n", " grads = tape.gradient(target=tr_loss, sources=char_rnn.variables)\n", " opt.apply_gradients(grads_and_vars=zip(grads, char_rnn.variables))\n", " avg_tr_loss += tr_loss\n", " tr_step += 1\n", " else:\n", " avg_tr_loss /= tr_step\n", " tr_loss_hist.append(avg_tr_loss)\n", " \n", " print('epoch : {:3}, tr_loss : {:.3f}'.format(epoch + 1, avg_tr_loss))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[<matplotlib.lines.Line2D at 0x7f3de0704b38>]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot(tr_loss_hist, label = 'train')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "acc : 100.00%\n" ] } ], "source": [ "yhat = np.argmax(char_rnn(inputs=tf.convert_to_tensor(X_indices)), axis=-1)\n", "print('acc : {:.2%}'.format(np.mean(yhat == np.argmax(y, axis=-1))))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }