{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Basic RNN - Simple\n", "- 참고\n", " - https://gist.github.com/j-min/481749dcb853b4477c4f441bf7452195\n", " - http://pythonkim.tistory.com/58\n", " - http://pythonkim.tistory.com/61\n", " - http://karpathy.github.io/2015/05/21/rnn-effectiveness/" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 목표\n", " - Character-level Language Modeling\n", " - 입력 데이터" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import tensorflow as tf\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "'0.9.0'" ] }, "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.__version__" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 사전 내포방식으로 Character를 키로, 인덱스를 값으로 지니는 Dictionary 생성" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'h': 0, 'e': 1, 'l': 2, 'o': 3}\n" ] } ], "source": [ "char_rdic = ['h', 'e', 'l', 'o'] # id -> char\n", "char_dic = {w : i for i, w in enumerate(char_rdic)} # char -> id\n", "print char_dic" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- ground_true --> 'hello' --> [0, 1, 2, 2, 3] 생성 " ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0, 1, 2, 2, 3]\n" ] } ], "source": [ "ground_truth = [char_dic[c] for c in 'hello']\n", "print ground_truth" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 입력데이터로 활용할 x_data 마련\n", " - 입력데이터에는 hell 까지만 존재" ] }, { "cell_type": "code", "execution_count": 72, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4, 4) float32\n" ] } ], "source": [ "x_data = np.array([[1,0,0,0], # h\n", " [0,1,0,0], # e\n", " [0,0,1,0], # l\n", " [0,0,1,0]], # l\n", " dtype = 'float32')\n", "print x_data.shape, x_data.dtype" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#one_hot\n", "\n", "**tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None)**" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Exception AssertionError: AssertionError(\"Nesting violated for default stack of objects\",) in > ignored\n" ] } ], "source": [ "session = tf.InteractiveSession()\n", "session.run(tf.initialize_all_variables())" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0, 1, 2, 2, 3] [0, 1, 2, 2]\n" ] } ], "source": [ "print ground_truth[:], ground_truth[:-1]" ] }, { "cell_type": "code", "execution_count": 75, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1. 0. 0. 0.]\n", " [ 0. 1. 0. 0.]\n", " [ 0. 0. 1. 0.]\n", " [ 0. 0. 1. 0.]]\n" ] } ], "source": [ "x_data = tf.one_hot(ground_truth[:-1], depth = len(char_dic), on_value = 1.0, off_value = 0.0)\n", "print x_data.eval()" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Configuration\n", "rnn_size = len(char_dic) # 4\n", "batch_size = 1\n", "output_size = 4" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 참고: https://www.tensorflow.org/api_docs/python/rnn_cell/rnn_cells_for_use_with_tensorflow_s_core_rnn_methods#BasicRNNCell" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Tensor(\"zeros_12:0\", shape=(1, 4), dtype=float32)\n", "[[ 0. 0. 0. 0.]]\n" ] } ], "source": [ "# RNN Model\n", "rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = rnn_size)\n", "\n", "#initial_state = rnn_cell.zero_state(batch_size, tf.float32)\n", "initial_state = tf.zeros([batch_size, rnn_cell.state_size]) # 위 코드와 같은 결과\n", "print(initial_state)\n", "print initial_state.eval()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#split\n", "\n", "**tf.split(split_dim, num_split, value, name='split')**" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1. 0. 0. 0.]\n", " [ 0. 1. 0. 0.]\n", " [ 0. 0. 1. 0.]\n", " [ 0. 0. 1. 0.]]\n", "\n", "\n", "[[ 1. 0. 0. 0.]]\n", "[[ 0. 1. 0. 0.]]\n", "[[ 0. 0. 1. 0.]]\n", "[[ 0. 0. 1. 0.]]\n" ] } ], "source": [ "print x_data.eval()\n", "print\n", "\n", "x_split = tf.split(split_dim = 0, num_split = len(char_dic), value = x_data) # dimension=0 을 기준으로 4개로 split\n", "print type(x_split)\n", "for t in x_split:\n", " print t.eval()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 참고: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks#rnn" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "(1, 4)\n", "(1, 4)\n", "(1, 4)\n", "(1, 4)\n", "\n", "(1, 4)\n" ] } ], "source": [ "with tf.variable_scope('forward'):\n", " outputs, state = tf.nn.rnn(cell = rnn_cell, inputs = x_split, initial_state = initial_state)\n", " print type(outputs)\n", " print\n", "\n", " for t in outputs:\n", " print t.get_shape()\n", " print \n", "\n", " print state.get_shape()" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(4, 4)\n" ] } ], "source": [ "result_outputs = tf.reshape(tf.concat(1, outputs), # shape = 1 x 16\n", " [-1, rnn_size]) # shape = 4 x 4\n", "print result_outputs.get_shape()" ] }, { "cell_type": "code", "execution_count": 82, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1, 2, 2, 3]\n", "[1 2 2 3]\n" ] } ], "source": [ "print ground_truth[1:]\n", "targets = tf.constant(ground_truth[1:], tf.int32) # a shape of [-1] flattens into 1-D\n", "print targets.eval()" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1. 1. 1. 1.]\n" ] } ], "source": [ "weights = tf.ones([len(char_dic) * batch_size]) # tf.ones([4])\n", "print weights.eval()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- 참고: https://www.tensorflow.org/tutorials/recurrent/\n", " - tf.nn.seq2seq.sequence_loss_by_example - return Weighted cross-entropy loss for a sequence of logits (per example)" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": false }, "outputs": [], "source": [ "loss = tf.nn.seq2seq.sequence_loss_by_example([result_outputs], [targets], [weights])\n", "cost = tf.reduce_sum(loss) / batch_size\n", "train_op = tf.train.RMSPropOptimizer(0.01, 0.9).minimize(cost)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n", "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n" ] } ], "source": [ "# Launch the graph in a session\n", "with tf.Session() as sess:\n", " tf.initialize_all_variables().run()\n", " for i in range(100):\n", " sess.run(train_op)\n", " result = sess.run(tf.argmax(result_outputs, 1))\n", " print(result, [char_rdic[t] for t in result]) " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [Root]", "language": "python", "name": "Python [Root]" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 0 }