{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Basic RNN - Simple\n",
    "- 참고\n",
    "  - https://gist.github.com/j-min/481749dcb853b4477c4f441bf7452195\n",
    "  - http://pythonkim.tistory.com/58\n",
    "  - http://pythonkim.tistory.com/61\n",
    "  - http://karpathy.github.io/2015/05/21/rnn-effectiveness/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 목표\n",
    "  - Character-level Language Modeling\n",
    "  - 입력 데이터"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.9.0'"
      ]
     },
     "execution_count": 69,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tf.__version__"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 사전 내포방식으로 Character를 키로, 인덱스를 값으로 지니는 Dictionary 생성"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'h': 0, 'e': 1, 'l': 2, 'o': 3}\n"
     ]
    }
   ],
   "source": [
    "char_rdic = ['h', 'e', 'l', 'o'] # id -> char\n",
    "char_dic = {w : i for i, w in enumerate(char_rdic)} # char -> id\n",
    "print char_dic"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- ground_true --> 'hello' --> [0, 1, 2, 2, 3] 생성 "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 2, 3]\n"
     ]
    }
   ],
   "source": [
    "ground_truth = [char_dic[c] for c in 'hello']\n",
    "print ground_truth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 입력데이터로 활용할 x_data 마련\n",
    "  - 입력데이터에는 hell 까지만 존재"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4, 4) float32\n"
     ]
    }
   ],
   "source": [
    "x_data = np.array([[1,0,0,0], # h\n",
    "                   [0,1,0,0], # e\n",
    "                   [0,0,1,0], # l\n",
    "                   [0,0,1,0]], # l\n",
    "                 dtype = 'float32')\n",
    "print x_data.shape, x_data.dtype"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#one_hot\n",
    "\n",
    "**tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None)**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Exception AssertionError: AssertionError(\"Nesting violated for default stack of <type 'weakref'> objects\",) in <bound method InteractiveSession.__del__ of <tensorflow.python.client.session.InteractiveSession object at 0x11b850b90>> ignored\n"
     ]
    }
   ],
   "source": [
    "session = tf.InteractiveSession()\n",
    "session.run(tf.initialize_all_variables())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0, 1, 2, 2, 3] [0, 1, 2, 2]\n"
     ]
    }
   ],
   "source": [
    "print ground_truth[:], ground_truth[:-1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1.  0.  0.  0.]\n",
      " [ 0.  1.  0.  0.]\n",
      " [ 0.  0.  1.  0.]\n",
      " [ 0.  0.  1.  0.]]\n"
     ]
    }
   ],
   "source": [
    "x_data = tf.one_hot(ground_truth[:-1], depth = len(char_dic), on_value = 1.0, off_value = 0.0)\n",
    "print x_data.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Configuration\n",
    "rnn_size = len(char_dic) # 4\n",
    "batch_size = 1\n",
    "output_size = 4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 참고: https://www.tensorflow.org/api_docs/python/rnn_cell/rnn_cells_for_use_with_tensorflow_s_core_rnn_methods#BasicRNNCell"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Tensor(\"zeros_12:0\", shape=(1, 4), dtype=float32)\n",
      "[[ 0.  0.  0.  0.]]\n"
     ]
    }
   ],
   "source": [
    "# RNN Model\n",
    "rnn_cell = tf.nn.rnn_cell.BasicRNNCell(num_units = rnn_size)\n",
    "\n",
    "#initial_state = rnn_cell.zero_state(batch_size, tf.float32)\n",
    "initial_state = tf.zeros([batch_size, rnn_cell.state_size]) #  위 코드와 같은 결과\n",
    "print(initial_state)\n",
    "print initial_state.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 참고: https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#split\n",
    "\n",
    "**tf.split(split_dim, num_split, value, name='split')**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 78,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1.  0.  0.  0.]\n",
      " [ 0.  1.  0.  0.]\n",
      " [ 0.  0.  1.  0.]\n",
      " [ 0.  0.  1.  0.]]\n",
      "\n",
      "<type 'list'>\n",
      "[[ 1.  0.  0.  0.]]\n",
      "[[ 0.  1.  0.  0.]]\n",
      "[[ 0.  0.  1.  0.]]\n",
      "[[ 0.  0.  1.  0.]]\n"
     ]
    }
   ],
   "source": [
    "print x_data.eval()\n",
    "print\n",
    "\n",
    "x_split = tf.split(split_dim = 0, num_split = len(char_dic), value = x_data) # dimension=0 을 기준으로 4개로 split\n",
    "print type(x_split)\n",
    "for t in x_split:\n",
    "    print t.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 참고: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks#rnn"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 80,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<type 'list'>\n",
      "\n",
      "(1, 4)\n",
      "(1, 4)\n",
      "(1, 4)\n",
      "(1, 4)\n",
      "\n",
      "(1, 4)\n"
     ]
    }
   ],
   "source": [
    "with tf.variable_scope('forward'):\n",
    "    outputs, state = tf.nn.rnn(cell = rnn_cell, inputs = x_split, initial_state = initial_state)\n",
    "    print type(outputs)\n",
    "    print\n",
    "\n",
    "    for t in outputs:\n",
    "        print t.get_shape()\n",
    "    print    \n",
    "\n",
    "    print state.get_shape()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(4, 4)\n"
     ]
    }
   ],
   "source": [
    "result_outputs = tf.reshape(tf.concat(1, outputs), # shape = 1 x 16\n",
    "                            [-1, rnn_size])        # shape = 4 x 4\n",
    "print result_outputs.get_shape()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[1, 2, 2, 3]\n",
      "[1 2 2 3]\n"
     ]
    }
   ],
   "source": [
    "print ground_truth[1:]\n",
    "targets = tf.constant(ground_truth[1:], tf.int32) # a shape of [-1] flattens into 1-D\n",
    "print targets.eval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[ 1.  1.  1.  1.]\n"
     ]
    }
   ],
   "source": [
    "weights = tf.ones([len(char_dic) * batch_size]) # tf.ones([4])\n",
    "print weights.eval()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "- 참고: https://www.tensorflow.org/tutorials/recurrent/\n",
    "  - tf.nn.seq2seq.sequence_loss_by_example - return Weighted cross-entropy loss for a sequence of logits (per example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "loss = tf.nn.seq2seq.sequence_loss_by_example([result_outputs], [targets], [weights])\n",
    "cost = tf.reduce_sum(loss) / batch_size\n",
    "train_op = tf.train.RMSPropOptimizer(0.01, 0.9).minimize(cost)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 85,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 2]), ['e', 'l', 'l', 'l'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n",
      "(array([1, 2, 2, 3]), ['e', 'l', 'l', 'o'])\n"
     ]
    }
   ],
   "source": [
    "# Launch the graph in a session\n",
    "with tf.Session() as sess:\n",
    "    tf.initialize_all_variables().run()\n",
    "    for i in range(100):\n",
    "        sess.run(train_op)\n",
    "        result = sess.run(tf.argmax(result_outputs, 1))\n",
    "        print(result, [char_rdic[t] for t in result])  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python [Root]",
   "language": "python",
   "name": "Python [Root]"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}