{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br></br>\n",
    "# **자연어와 Deep Learning**\n",
    "## **LSTM 단어 알파벳 완성모델**\n",
    "\n",
    "<br></br>\n",
    "## **1 데이터 정의**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "char_arr = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', \n",
    "            'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']\n",
    "num_dic  = {n: i for i, n in enumerate(char_arr)}\n",
    "dic_len  = len(num_dic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def make_batch(seq_data):\n",
    "    input_batch, target_batch = [], []\n",
    "    for seq in seq_data: \n",
    "        input_num = [num_dic[n] for n in seq[:-1]]  \n",
    "        target    = num_dic[seq[-1]]                \n",
    "        input_batch.append(np.eye(dic_len)[input_num])\n",
    "        target_batch.append(target)          \n",
    "    return input_batch, target_batch"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br></br>\n",
    "## **2 모델의 정의**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "tf.reset_default_graph()\n",
    "learning_rate = 0.01\n",
    "n_step = 3           \n",
    "n_hidden, total_epoch = 64, 30\n",
    "n_input = n_class = dic_len"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = tf.placeholder(tf.float32, [None, n_step, n_input])\n",
    "Y = tf.placeholder(tf.int32,   [None])\n",
    "\n",
    "W = tf.Variable(tf.random_normal([n_hidden, n_class])) \n",
    "b = tf.Variable(tf.random_normal([n_class]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "cell1 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)\n",
    "cell1 = tf.nn.rnn_cell.DropoutWrapper(cell1, output_keep_prob=0.5)    \n",
    "cell2 = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)         \n",
    "multi_cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs, states = tf.nn.dynamic_rnn(multi_cell, X, dtype=tf.float32)\n",
    "outputs = tf.transpose(outputs, [1, 0, 2])\n",
    "outputs = outputs[-1]\n",
    "model   = tf.matmul(outputs, W) + b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n",
    "                      logits = model, labels = Y))\n",
    "optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br></br>\n",
    "## **3 모델의 학습**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch: 1.0000 cost = 4.107153\n",
      "Epoch: 5.0000 cost = 1.661128\n",
      "Epoch: 9.0000 cost = 0.736937\n",
      "Epoch: 13.0000 cost = 0.514840\n",
      "Epoch: 17.0000 cost = 0.298752\n",
      "Epoch: 21.0000 cost = 0.262406\n",
      "Epoch: 25.0000 cost = 0.156218\n",
      "Epoch: 29.0000 cost = 0.171951\n",
      "최적화 완료!\n",
      "CPU times: user 1.56 s, sys: 494 ms, total: 2.06 s\n",
      "Wall time: 3.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "seq_data = ['word', 'wood', 'deep', 'dive', 'cold', 'cool', 'load', 'love', 'kiss', 'kind']\n",
    "\n",
    "sess = tf.Session()\n",
    "sess.run(tf.global_variables_initializer())\n",
    "input_batch, target_batch = make_batch(seq_data)\n",
    "for epoch in range(total_epoch):\n",
    "    _, loss = sess.run([optimizer, cost],\n",
    "                       feed_dict={X: input_batch, Y: target_batch})\n",
    "    if epoch % 4 == 0:\n",
    "        print('Epoch: {:.4f} cost = {:.6f}'.format(epoch + 1, loss))\n",
    "print('최적화 완료!')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br></br>\n",
    "## **4 학습 모델의 평가**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 80.4 ms, sys: 0 ns, total: 80.4 ms\n",
      "Wall time: 78 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "prediction       = tf.cast(tf.argmax(model, 1), tf.int32)\n",
    "prediction_check = tf.equal(prediction, Y)               \n",
    "accuracy         = tf.reduce_mean(tf.cast(prediction_check, tf.float32))\n",
    "\n",
    "input_batch, target_batch = make_batch(seq_data)\n",
    "predict, accuracy_val     = sess.run([prediction, accuracy],\n",
    "                                 feed_dict={X: input_batch, Y: target_batch})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "=== 예측 결과 ===\n",
      "입력값: ['wor ', 'woo ', 'dee ', 'div ', 'col ', 'coo ', 'loa ', 'lov ', 'kis ', 'kin ']\n",
      "예측값: ['word', 'wood', 'deep', 'dive', 'cold', 'cood', 'load', 'love', 'kiss', 'kind']\n",
      "정확도: 0.9\n"
     ]
    }
   ],
   "source": [
    "predict_words = []\n",
    "for idx, val in enumerate(seq_data):\n",
    "    last_char = char_arr[predict[idx]]\n",
    "    predict_words.append(val[:3] + last_char)\n",
    "\n",
    "print('\\n=== 예측 결과 ===')\n",
    "print('입력값:', [w[:-1] + ' ' for w in seq_data])\n",
    "print('예측값:', predict_words)\n",
    "print('정확도:', accuracy_val)\n",
    "sess.close()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}