{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "

\n", "# **자연어와 Deep Learning**\n", "## **Seq2Seq 번역모델**" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "

\n", "## **1 데이터의 정의**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "char_arr = [ c for c in 'SPabcdefghijklmnopqrstuvwxyz나놀녀단랑무사소스어이키E'] \n", "num_dic = { n : i for i, n in enumerate(char_arr)}\n", "dic_len = len(num_dic)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "seq_data = [['word', '단어'], ['wood', '나무'], ['game', '놀이'], \n", " ['girl', '소녀'], ['kiss', '키스'], ['love', '사랑']]\n", "\n", "def make_batch(seq_data):\n", " input_batch, output_batch, target_batch = [], [], []\n", " for seq in seq_data:\n", " input = [num_dic[n] for n in seq[0]]\n", " output = [num_dic[n] for n in ('S' + seq[1])] \n", " target = [num_dic[n] for n in (seq[1] + 'E')]\n", " input_batch.append(np.eye(dic_len)[input])\n", " output_batch.append(np.eye(dic_len)[output])\n", " target_batch.append(target)\n", " return input_batch, output_batch, target_batch" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "

\n", "## **2 모델의 정의**" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import tensorflow as tf\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "tf.reset_default_graph()\n", "learning_rate = 0.01\n", "n_hidden, total_epoch = 128, 100\n", "n_class = n_input = dic_len\n", "\n", "enc_input = tf.placeholder(tf.float32, [None, None, n_input])\n", "dec_input = tf.placeholder(tf.float32, [None, None, n_input])\n", "targets = tf.placeholder(tf.int64, [None, None]) # [batch size, time steps]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "with tf.variable_scope('encode'):\n", " enc_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n", " enc_cell = tf.nn.rnn_cell.DropoutWrapper(enc_cell, output_keep_prob = 0.5) \n", " outputs, enc_states = tf.nn.dynamic_rnn(enc_cell, enc_input,dtype=tf.float32)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "with tf.variable_scope('decode'):\n", " dec_cell = tf.nn.rnn_cell.BasicRNNCell(n_hidden)\n", " dec_cell = tf.nn.rnn_cell.DropoutWrapper(dec_cell, output_keep_prob=0.5)\n", " outputs, dec_states = tf.nn.dynamic_rnn(dec_cell, dec_input, \n", " initial_state = enc_states, \n", " dtype = tf.float32)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "model = tf.layers.dense(outputs, n_class, activation=None)\n", "cost = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(\n", " logits = model, labels = targets))\n", "optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "

\n", "## **3 모델의 학습**" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch: 1 cost = 3.721701\n", "Epoch: 10 cost = 0.165835\n", "Epoch: 19 cost = 0.016843\n", "Epoch: 28 cost = 0.005062\n", "Epoch: 37 cost = 0.004484\n", "Epoch: 46 cost = 0.001066\n", "Epoch: 55 cost = 0.000756\n", "Epoch: 64 cost = 0.000527\n", "Epoch: 73 cost = 0.000315\n", "Epoch: 82 cost = 0.000267\n", "Epoch: 91 cost = 0.000388\n", "Epoch: 100 cost = 0.000355\n", "최적화 완료!\n", "CPU times: user 1.61 s, sys: 412 ms, total: 2.03 s\n", "Wall time: 1.67 s\n" ] } ], "source": [ "%%time\n", "sess = tf.Session()\n", "sess.run(tf.global_variables_initializer())\n", "input_batch, output_batch, target_batch = make_batch(seq_data)\n", "for epoch in range(total_epoch):\n", " _, loss = sess.run([optimizer, cost],\n", " feed_dict={enc_input: input_batch,\n", " dec_input: output_batch,\n", " targets: target_batch})\n", " if epoch % 9 == 0 :\n", " print('Epoch: {:4d} cost = {:.6f}'.format((epoch + 1),loss))\n", "print('최적화 완료!')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "

\n", "## **4 모델의 검증**" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "=== 번역 테스트 ===\n", "wodr -> Translated : 단어\n", "love -> Translated : 사랑\n", "loev -> Translated : 사랑\n", "abcd -> Translated : 단어\n", "CPU times: user 297 ms, sys: 2.26 ms, total: 299 ms\n", "Wall time: 280 ms\n" ] } ], "source": [ "%%time\n", "def translate(word):\n", " seq_data = [word, 'P' * len(word)]\n", " input_batch, output_batch, target_batch = make_batch([seq_data])\n", " prediction = tf.argmax(model, 2) # [None, None, n_input]\n", " result = sess.run(prediction,\n", " feed_dict={enc_input: input_batch,\n", " dec_input: output_batch,\n", " targets: target_batch})\n", " decoded = [char_arr[i] for i in result[0]]\n", " end = decoded.index('E')\n", " translated = ''.join(decoded[:end])\n", " return translated\n", "\n", "print('\\n=== 번역 테스트 ===')\n", "for test_text in ['wodr','love','loev','abcd']:\n", " print('{} -> Translated : {}'.format(test_text,\n", " translate(test_text)))\n", "sess.close()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 2 }