{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "EncoderDecoder - BiLSTM.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "accelerator": "GPU" }, "cells": [ { "metadata": { "id": "3zPcXVyfljkk", "colab_type": "text" }, "cell_type": "markdown", "source": [ "**This is tested on tensorflow-gpu = 1.13.1**\n", "\n", "This notebook aims to demonstrate the power of Seq2Seq modelling with RNN's (and its variants like LSTM / GRU) where the length input sequences doesn't match with the length of output sequences. \n", "\n", "Canonical usecases are:\n", "\n", "1. Machine Translation\n", "2. Text Summarisation\n", "\n", "\n", "But for the purposes of this notebook we will be using a contrived dataset. We will translating natural langauge english sentences mentioning some form of time to their coressponding numerical versions. For instance\n", "\n", "\n", "* **four minutes past seven evening**\tshould translate as **19:04**\n", "* **quarter past noon**\tshould translate as **12:15**\n", "* **quarter past night**\tshould translate as **00:15**\n", "\n", "\n", "\n", "But nevertheless you will learn both the advantages and disadvantages of a EncoderDecoder Model. We will also look at the motivation for attention mechanism in the EncoderDecoder Model.\n", "\n", "\n", "\n", "\n" ] }, { "metadata": { "id": "ybEztToU_Hhx", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "# Start by importing all the things we'll need.\n", "%matplotlib inline\n", "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.layers import Input, LSTM, Dense, Embedding, CuDNNLSTM, Flatten, TimeDistributed, Dropout, LSTMCell, RNN, Bidirectional, Concatenate\n", "from tensorflow.keras.callbacks import ModelCheckpoint\n", "from tensorflow.python.keras.utils import tf_utils\n", "from tensorflow.keras import backend as K\n", "\n", "import unicodedata\n", "import re\n", "import numpy as np\n", "import os\n", "import time\n", "import shutil" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "RC4JlGR-AtIB", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ " # Synthetic Training and Test data\n", "import random\n", "\n", "def generate_data(hour, minute, sentence=''):\n", "\n", " special = [15,30]\n", " suffix = \"\"\n", "\n", " dictionary = {1:\"one\", 2:\"two\", 3:\"three\", 4:\"four\", 5:\"five\", 6:\"six\", 7:\"seven\", 8:\"eight\", 9:\"nine\", 10:\"ten\", 11:\"eleven\", 12:\"twelve\", 13:\"thirteen\", \n", " 14:\"fourteen\", 16:\"sixteen\", 17:\"seventeen\", 18:\"eighteen\", 19:\"nineteen\", 20:\"twenty\", 30:\"thirty\",\n", " 40:\"forty\", 50:\"fifty\"}\n", " result = \"\"\n", " if minute == 15:\n", " result= \"quarter past\"\n", " elif minute == 30:\n", " result= \"half past\" \n", " elif minute == 0:\n", " pass\n", " else:\n", "\n", " if minute in dictionary:\n", " result = dictionary[minute] + \" minutes past\"\n", " else: \n", " minute1 = int(str(minute // 10 ) + \"0\") \n", " minute2 = minute % 10\n", " result = dictionary[minute1] + ' ' + dictionary[minute2] + \" minutes past\"\n", "\n", " if hour == 0:\n", " suffix = \"mid night\"\n", " elif hour >= 1 and hour <= 11:\n", " suffix = \"morning\"\n", " elif hour == 12:\n", " suffix = \"noon\"\n", " elif hour > 12 and hour <=16: \n", " suffix = \"after noon\"\n", " elif hour > 16 and hour <=19: \n", " suffix = \"evening\"\n", " elif hour > 20 and hour <=23: \n", " suffix = \"night\"\n", "\n", " save_hour = hour \n", " if hour > 12:\n", " hour = hour - 12\n", "\n", " if hour > 0:\n", " # Lets introduce some variation in the way how hours an sufffixes are formed, just for randomness\n", " if hour % 2 == 0:\n", " result = result + \" \" + dictionary[hour]+ \" in the \" + suffix \n", " else: \n", " result = result + \" \" + dictionary[hour]+ \" \" + suffix \n", " else:\n", " result = result + \" \" + suffix \n", "\n", " return save_hour, minute, result\n", "\n", "\n", "def create_dataset(num_examples=20000): \n", " output = []\n", " i = 0\n", " while i < num_examples:\n", " data = []\n", " hour = random.randint(0,23)\n", " minute = random.randint(0,59)\n", " sent = random.randint(0,3)\n", " hour, minute, result = generate_data(hour, minute)\n", " inwords = result\n", " data.append(' ' + inwords + ' ')\n", " data.append(' ' + str(hour) + \" : \" + str(minute) + ' ')\n", " i += 1\n", " output.append(data)\n", " print(\"Dataset prepared with prefix and suffixes for teacher forcing technique\")\n", " dummy_df = pd.DataFrame(output, columns=['input','output'])\n", " return output, dummy_df\n", "\n", "\n" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "rsYaLiT1Aj8g", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "class LanguageIndex():\n", " def __init__(self, lang):\n", " self.lang = lang\n", " self.word2idx = {}\n", " self.idx2word = {}\n", " self.vocab = set()\n", " self.create_index()\n", " def create_index(self):\n", " for phrase in self.lang:\n", " self.vocab.update(phrase.split(' '))\n", " self.vocab = sorted(self.vocab)\n", " self.word2idx[\"\"] = 0\n", " self.idx2word[0] = \"\"\n", " for i,word in enumerate(self.vocab):\n", " self.word2idx[word] = i + 1\n", " self.idx2word[i+1] = word\n", "\n", "def max_length(t):\n", " return max(len(i) for i in t)\n", "\n", "def load_dataset(num_examples):\n", " pairs,df = create_dataset(num_examples)\n", " #print(pairs[10])\n", " #print(len(pairs))\n", " out_lang = LanguageIndex(sp for en, sp in pairs)\n", " in_lang = LanguageIndex(en for en, sp in pairs)\n", " input_data = [[in_lang.word2idx[s] for s in en.split(' ')] for en, sp in pairs]\n", " output_data = [[out_lang.word2idx[s] for s in sp.split(' ')] for en, sp in pairs]\n", "\n", " max_length_in, max_length_out = max_length(input_data), max_length(output_data)\n", " input_data = tf.keras.preprocessing.sequence.pad_sequences(input_data, maxlen=max_length_in, padding=\"post\")\n", " output_data = tf.keras.preprocessing.sequence.pad_sequences(output_data, maxlen=max_length_out, padding=\"post\")\n", " return input_data, output_data, in_lang, out_lang, max_length_in, max_length_out, df" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "l3fEraiRA0IF", "colab_type": "code", "outputId": "c7d1eba3-7786-472f-ef5c-bf1cf1cc87af", "colab": { "base_uri": "https://localhost:8080/", "height": 221 } }, "cell_type": "code", "source": [ "num_examples = 50000 # Partial set for faster training\n", "input_data, teacher_data, input_lang, target_lang, len_input, len_target, df = load_dataset(num_examples)\n", "\n", "\n", "target_data = [[teacher_data[n][i+1] for i in range(len(teacher_data[n])-1)] for n in range(len(teacher_data))]\n", "target_data = tf.keras.preprocessing.sequence.pad_sequences(target_data, maxlen=len_target, padding=\"post\")\n", "target_data = target_data.reshape((target_data.shape[0], target_data.shape[1], 1))\n", "\n", "# Shuffle all of the data in unison. This training set has the longest (e.g. most complicated) data at the end,\n", "# so a simple Keras validation split will be problematic if not shuffled.\n", "\n", "p = np.random.permutation(len(input_data))\n", "input_data = input_data[p]\n", "teacher_data = teacher_data[p]\n", "target_data = target_data[p]\n", "\n", "BUFFER_SIZE = len(input_data)\n", "BATCH_SIZE = 128\n", "embedding_dim = 300\n", "units = 128\n", "vocab_in_size = len(input_lang.word2idx)\n", "vocab_out_size = len(target_lang.word2idx)\n", "df.head()\n" ], "execution_count": 51, "outputs": [ { "output_type": "stream", "text": [ "Dataset prepared with prefix and suffixes for teacher forcing technique\n" ], "name": "stdout" }, { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
inputoutput
0<start> eight minutes past three after noon <end><start> 15 : 8 <end>
1<start> eleven minutes past seven morning <end><start> 7 : 11 <end>
2<start> three minutes past five morning <end><start> 5 : 3 <end>
3<start> twenty one minutes past eleven morning...<start> 11 : 21 <end>
4<start> fifty one minutes past three after noo...<start> 15 : 51 <end>
\n", "
" ], "text/plain": [ " input output\n", "0 eight minutes past three after noon 15 : 8 \n", "1 eleven minutes past seven morning 7 : 11 \n", "2 three minutes past five morning 5 : 3 \n", "3 twenty one minutes past eleven morning... 11 : 21 \n", "4 fifty one minutes past three after noo... 15 : 51 " ] }, "metadata": { "tags": [] }, "execution_count": 51 } ] }, { "metadata": { "id": "omhHVCBFB928", "colab_type": "code", "outputId": "bf2f8c39-f8c9-4c1a-f04b-edcf2d6ebb18", "colab": { "base_uri": "https://localhost:8080/", "height": 612 } }, "cell_type": "code", "source": [ "# Create the Encoder layers first.\n", "encoder_inputs = Input(shape=(len_input,))\n", "encoder_emb = Embedding(input_dim=vocab_in_size, output_dim=embedding_dim)\n", "\n", "# Use this if you dont need Bidirectional LSTM\n", "# encoder_lstm = CuDNNLSTM(units=units, return_sequences=True, return_state=True)\n", "# encoder_outputs, state_h, state_c = encoder_lstm(encoder_emb(encoder_inputs))\n", "\n", "encoder_lstm = Bidirectional(CuDNNLSTM(units=units, return_sequences=True, return_state=True))\n", "encoder_outputs, fstate_h, fstate_c, bstate_h, bstate_c = encoder_lstm(encoder_emb(encoder_inputs))\n", "state_h = Concatenate()([fstate_h,bstate_h])\n", "state_c = Concatenate()([bstate_h,bstate_c])\n", "encoder_states = [state_h, state_c]\n", "\n", "# Now create the Decoder layers.\n", "decoder_inputs = Input(shape=(None,))\n", "decoder_emb = Embedding(input_dim=vocab_out_size, output_dim=embedding_dim)\n", "decoder_lstm = CuDNNLSTM(units=units*2, return_sequences=True, return_state=True)\n", "decoder_lstm_out, _, _ = decoder_lstm(decoder_emb(decoder_inputs), initial_state=encoder_states)\n", "# Two dense layers added to this model to improve inference capabilities.\n", "decoder_d1 = Dense(units, activation=\"relu\")\n", "decoder_d2 = Dense(vocab_out_size, activation=\"softmax\")\n", "# Drop-out is added in the dense layers to help mitigate overfitting in this part of the model. Astute developers\n", "# may want to add the same mechanism inside the LSTMs.\n", "decoder_out = decoder_d2(Dropout(rate=.4)(decoder_d1(Dropout(rate=.4)(decoder_lstm_out))))\n", "\n", "# Finally, create a training model which combines the encoder and the decoder.\n", "# Note that this model has three inputs:\n", "# encoder_inputs=[batch,encoded_words] from input language (English)\n", "# decoder_inputs=[batch,encoded_words] from output language (Spanish). This is the \"teacher tensor\".\n", "# decoder_out=[batch,encoded_words] from output language (Spanish). This is the \"target tensor\".\n", "model = Model([encoder_inputs, decoder_inputs], decoder_out)\n", "# We'll use sparse_categorical_crossentropy so we don't have to expand decoder_out into a massive one-hot array.\n", "# Adam is used because it's, well, the best.\n", "model.compile(optimizer=tf.train.AdamOptimizer(), loss=\"sparse_categorical_crossentropy\", metrics=['sparse_categorical_accuracy'])\n", "model.summary()" ], "execution_count": 53, "outputs": [ { "output_type": "stream", "text": [ "__________________________________________________________________________________________________\n", "Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", "input_5 (InputLayer) (None, 11) 0 \n", "__________________________________________________________________________________________________\n", "embedding_4 (Embedding) (None, 11, 300) 11400 input_5[0][0] \n", "__________________________________________________________________________________________________\n", "input_6 (InputLayer) (None, None) 0 \n", "__________________________________________________________________________________________________\n", "bidirectional_2 (Bidirectional) [(None, 11, 256), (N 440320 embedding_4[0][0] \n", "__________________________________________________________________________________________________\n", "embedding_5 (Embedding) (None, None, 300) 19200 input_6[0][0] \n", "__________________________________________________________________________________________________\n", "concatenate_4 (Concatenate) (None, 256) 0 bidirectional_2[0][1] \n", " bidirectional_2[0][3] \n", "__________________________________________________________________________________________________\n", "concatenate_5 (Concatenate) (None, 256) 0 bidirectional_2[0][3] \n", " bidirectional_2[0][4] \n", "__________________________________________________________________________________________________\n", "cu_dnnlstm_5 (CuDNNLSTM) [(None, None, 256), 571392 embedding_5[0][0] \n", " concatenate_4[0][0] \n", " concatenate_5[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_5 (Dropout) (None, None, 256) 0 cu_dnnlstm_5[0][0] \n", "__________________________________________________________________________________________________\n", "dense_4 (Dense) (None, None, 128) 32896 dropout_5[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_4 (Dropout) (None, None, 128) 0 dense_4[0][0] \n", "__________________________________________________________________________________________________\n", "dense_5 (Dense) (None, None, 64) 8256 dropout_4[0][0] \n", "==================================================================================================\n", "Total params: 1,083,464\n", "Trainable params: 1,083,464\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n" ], "name": "stdout" } ] }, { "metadata": { "id": "6JWOVb1iCHga", "colab_type": "code", "outputId": "19c82399-5c41-48bb-9061-5290f58d90d7", "colab": { "base_uri": "https://localhost:8080/", "height": 816 } }, "cell_type": "code", "source": [ "# Note, we use 20% of our data for validation.\n", "epochs = 15\n", "history = model.fit([input_data, teacher_data], target_data,\n", " batch_size=BATCH_SIZE,\n", " epochs=epochs,\n", " validation_split=0.2)\n", "\n", "# Plot the results of the training.\n", "import matplotlib.pyplot as plt\n", "\n", "plt.plot(history.history['sparse_categorical_accuracy'], label=\"Training loss\")\n", "plt.plot(history.history['val_sparse_categorical_accuracy'], label=\"Validation loss\")\n", "plt.show()" ], "execution_count": 57, "outputs": [ { "output_type": "stream", "text": [ "Train on 40000 samples, validate on 10000 samples\n", "Epoch 1/15\n", "40000/40000 [==============================] - 3s 81us/sample - loss: 0.0013 - sparse_categorical_accuracy: 0.9996 - val_loss: 6.4224e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 2/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 0.0017 - sparse_categorical_accuracy: 0.9995 - val_loss: 1.4393e-05 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 3/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 0.0036 - sparse_categorical_accuracy: 0.9991 - val_loss: 6.6231e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 4/15\n", "40000/40000 [==============================] - 3s 79us/sample - loss: 0.0011 - sparse_categorical_accuracy: 0.9997 - val_loss: 6.4520e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 5/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 0.0013 - sparse_categorical_accuracy: 0.9996 - val_loss: 6.3670e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 6/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 6.1585e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.3543e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 7/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 5.6072e-04 - sparse_categorical_accuracy: 0.9999 - val_loss: 6.3567e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 8/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 9.3691e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.3618e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 9/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 0.0016 - sparse_categorical_accuracy: 0.9996 - val_loss: 6.3789e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 10/15\n", "40000/40000 [==============================] - 3s 79us/sample - loss: 8.1809e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.3663e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 11/15\n", "40000/40000 [==============================] - 3s 82us/sample - loss: 5.0451e-04 - sparse_categorical_accuracy: 0.9999 - val_loss: 6.3527e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 12/15\n", "40000/40000 [==============================] - 3s 87us/sample - loss: 5.1703e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.3574e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 13/15\n", "40000/40000 [==============================] - 4s 88us/sample - loss: 7.2768e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.3778e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 14/15\n", "40000/40000 [==============================] - 3s 84us/sample - loss: 0.0028 - sparse_categorical_accuracy: 0.9993 - val_loss: 6.4043e-06 - val_sparse_categorical_accuracy: 1.0000\n", "Epoch 15/15\n", "40000/40000 [==============================] - 3s 78us/sample - loss: 7.0991e-04 - sparse_categorical_accuracy: 0.9998 - val_loss: 6.4202e-06 - val_sparse_categorical_accuracy: 1.0000\n" ], "name": "stdout" }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAD8CAYAAABpcuN4AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd8XNW16PHf0qiXUbGKJblIbtiS\nMbaRZDqmhGtKIJQEcymmhSSQHrjAzbvkhYRLckNeAiEJEEpMCC2EJOSGYmJaEmzccJWMPbItW8WW\n3FStvt8fcyQGI49G0sycM9L6fj76aLTnzJw1tjRr9tlliTEGpZRS6lii7A5AKaWUs2miUEop5Zcm\nCqWUUn5polBKKeWXJgqllFJ+aaJQSinllyYKpZRSfmmiUEop5ZcmCqWUUn5F2x1AMGRmZpqCggK7\nw1BKqYiydu3a/caYrMGOGxWJoqCggDVr1tgdhlJKRRQRqQrkOL30pJRSyi9NFEoppfzSRKGUUsov\nTRRKKaX80kShlFLKr4AShYg8KSL1IrL5GPeLiDwkIh4R2Sgi833uWyIi262vJT7tJ4rIJusxD4mI\nWO0ZIvKmdfybIpI+0heplFJq+ALtUfwWWOTn/vOB6dbXLcCvwfumD3wPWACUAd/zeeP/NfBFn8f1\nPf9dwHJjzHRgufWzUkopmwS0jsIY856IFPg55BLgaeOtq7pSRNJEJBdYCLxpjDkIICJvAotE5B3A\nbYxZabU/DXwOeM16roXW8y4F3gHuHMqLCthrd8HeTSF5aqWUCovxx8P5PwrpKYI1RpEP7PH5udpq\n89dePUA7QI4xps66vRfIGeiEInKLiKwRkTUNDQ0jfwVKKaUG5OiV2cYYIyLmGPc9BjwGUFJSMuAx\ngwpxFlZKqdEgWD2KGmCiz88TrDZ/7RMGaAfYZ122wvpeH6QYlVJKDUOwEsUrwHXW7KeTgEbr8tEb\nwHkikm4NYp8HvGHd1yQiJ1mzna4D/uLzXH2zo5b4tCullLJBQJeeROQ5vAPMmSJSjXcmUwyAMeYR\n4FXgAsADtAE3WPcdFJEfAKutp7q3b2AbuBXvbKoEvIPYr1ntPwJeFJGbgCrgC8N/eUoppUZKvBOV\nIltJSYnR3WOVUmpoRGStMaZksON0ZbZSSim/NFEopZTySxOFUkopvzRRKKWU8ksThVJKKb80USil\nlPJLE4VSSim/NFEopZTySxOFUkopvzRRKKWU8ksThVJKKb80USillPJLE4VSSim/NFEopZTySxOF\nUkopvzRRKKWU8ksThVJKKb80USillPJLE4VSKmRaOrr5xvMfcv9rFTS2ddkdjhqmaLsDUEqNToda\nO7n+qVVsrm2i1xieX7WH286aynUnFxAf47I7PDUE2qNQSgXdvqZ2vvDoCir2NvPYtSfyt6+dzgkT\n0/jvV7dyzk/f5eV11fT2GrvDVAHSRKGUCqrdB9q44pH3qT18hKU3lHHOrByK8tw8fWMZv795AelJ\nMXz7xQ1c+It/8u62BozRhOF0miiUUkGzfV8zn3/0fZrbu3n2iydx8tRxn7j/1GmZvHLbaTy4eC4t\nHV0seXIV1zzxAZtrGm2KWAVCE4VSKig2Vh/mC4+uwBh44ZaTOWFi2oDHRUUJl8zN5+/fPpN7Liqi\nvLaJi37xT77+3IfsOdgW5qhVIGQ0dPtKSkrMmjVr7A5DOZwxhvauXlo7u2nt6Kalo5u2zh5aOro5\n0tlDSUE62SnxdocZkVbuOMDNS9eQnhTDMzctYPK4pIAf29TexSPvVPLkv3bS02u49qQCvnb2NNKT\nYkMYsQIQkbXGmJJBj9NEoSJJY1sXa3cfpKWjh9aObuurp//N35sAemgbIBm0dfbQ42cANTM5jt/e\nUMrs/NQwvqLI99bWfXzlmXVMykjkdzctYHzq8JLt3sZ2fvbmNv6wdg9JsdF8eeFUbjy1kIRYnSEV\nKpoo1Kh0029Xs3xr/afa42OiSIqNJinO+op1kRQXTXJcNIm+t+NcVls0yXHe9sTYaDq6e7jjDxs5\n3NbJr685kTNmZNnw6iLPKxtq+fYL65mV62bpjWVkBKEXsG1fM//z+lb+XlHPeHc83/7MDC4/cQKu\nKAlCxMqXJgo16lQdaGXhA++w5OQCrjlpUn9SSIxxEe0a+XDbvqZ2ljy5Ck99Cz++fA6XnzghCFGP\nXs9+sJvv/nkTpQUZPLGkhJT4mKA+/wc7DnD/a1tZv+cwM3KSuXPRTM6emY2IJoxgCTRR6GC2ihjP\nrKzCJcJXFk5lWnYKuakJuONjgpIkAHLc8bz45ZNZMCWD7/xhA79826NTN4/hsfcq+c8/bWLhjCye\nvrEs6EkCYMGUcfzp1lP41dXz6eox3LR0DVc+tpIPdx8K+rmUf5ooVEQ40tnDC6v38G+zx5PjDt2A\nszs+hqeuL+OSuXn85I2P+K+/bPY7rjHWGGN44I2P+O9Xt3LRnFwevbYkpKusRYQLjs9l2bfO4AeX\nFLOjoYVLf/U+t/5+LTv3t4bsvOqTdAsPFRH+sr6GpvZulpxcEPJzxUZH8bMvzGW8O55H39tBfVMH\nD101b8xvO9Hba/j+X7ewdEUVV5VN5IefOz5s4wYxriiuPbmAS+dP4Dfv7eA3/9jBG1v2MSMnhdl5\nbo6fkMrs/FRmjXfr4HcI6BiFcjxjDBc89E+MMbz2jdPDeo36qX/t5N7/LWf+pHQev65kzE7Z7O7p\n5T9e2sjLH9ZwyxlTuPv8mbaOFdQ3t/PMyt2s33OYzTWNHGztBMAVJUzLSmZ2fiqz890cn59KUZ6b\nxFj9TDyQQMco9F9POd6aqkNU1DVx/2XHh/3N6YZTCxnvjucbL6zn8kfeZ+kNZUzMSAxrDHZr7+rh\n6899yLLyfdx+3gxuO2ua7QPK2Sne2VDg/SBR19jOpppGNltf725r4I/rqgGIEpjanzxS+5NHcpy+\n/QVK/6WU4y19fxfu+GgumZtny/nPPz6Xcclx3Lx0NZf9+n2eun7srLVo7ejmlt+t4V+eA3z/4mKW\nnFJgd0ifIiLkpSWQl5bAvxWPB7zJY19TB5trGvsTyL88+/nThzXWY6AwM4nj81OZnedNIMX5btwh\nGJQPpWse/4Dzjx/P1Qsmh/Q8ASUKEVkEPAi4gMeNMT866v7JwJNAFnAQuMYYU23d92PgQuvQHxhj\nXrDazwYeAGKBtcBNxphuEUkFngEmWfE9YIx5akSvUkWsfU3tvL55L9efUmDr5YOywgz++JVTWPLk\nKq58dAWPXHsip08f3WstGtu6uP63q9iw5zA//fwJETVdWEQYnxrP+NR4zi3K6W+vb2pnc20jm6qb\n2FzbyKqdB/nL+tr++wvGJXLa9EzuvXg2UQ5ft3GgpYN/evZzZhjW/Az6lyciLuCXwGeAamC1iLxi\njCn3OewB4GljzFIrAdwPXCsiFwLzgblAHPCOiLwGtABLgXOMMdtE5F5gCfAEcBtQboz5rIhkAR+J\nyO+NMZ3BetEqcjz7wW56jOGak0L7iSkQ03NSePnWU7n+qVXc8NRq/ueKOVw2P3LePIeivrmd655Y\nxY6GVn519Yksmj3e7pCCItsdz9nueM6e+XHy2N/S8YlLVs+s3M1Np02hMDPwbUjsUFHXDEBRnjvk\n5wpkemwZ4DHG7LDerJ8HLjnqmCLgLev22z73FwHvGWO6jTGtwEZgETAO6DTGbLOOexO43LptgBTx\nXgRNxttD6R7yK1MRr7O7l2dX7WbhjCwKHPJHOz7Vu9aitCCDb7+4gV+9M/rWWlQfauMLj6yg6kAb\nT15fOmqSxLFkJsex8Lhsvnr2dL732WIAttQ6fzfb8jpvjLNynZEo8oE9Pj9XW22+NgCXWbcvxftG\nP85qXyQiiSKSCZwFTAT2A9Ei0jfafoXVDvAwMAuoBTYB3zDG9B4dlIjcIiJrRGRNQ0NDAC9DRZrX\nt+ylobmD6xx2XdwdH8Nvbyzl4hPy+J/XP+J7r2wZNWstKhta+MIjKzjY2skzNy/gtOmZdocUVtNz\nkomOEsprm+wOZVDltU3kpsYHZduUwQRrwd3twJki8iFwJlAD9BhjlgGvAu8DzwErrHYDLAZ+JiKr\ngGagx3qufwPWA3l4L1k9LCKfSpnGmMeMMSXGmJKsrNF9rXisevr9XUwel8iZDhwLiIt28fMr5/Kl\nM6bw9Ioqbv39Wtq7egZ/oIOV1zbxhUdW0NnTy/O3nMyJk9PtDins4qJdTMtOprwuAhJFXRNFYehN\nQGCJooaPP+0DTLDa+hljao0xlxlj5gHftdoOW9/vM8bMNcZ8BhBgm9W+whhzujGmDHivrx24AXjZ\neHmAncDMYb9CFZE21zSypuoQ15402bGDilFRwt0XzOKei4pYVr6Pqx//gEOtkTmU1ttr+NYL64lx\nRfHil04Oy3VvpyrOS2WLw3sU7V09VDa0hu3/KZBEsRqYLiKFIhKLtyfwiu8BIpIpIn3PdTfeGVCI\niMu6BIWIzAHmAMusn7Ot73HAncAj1uN3A+dY9+UAxwE7hvsCVWT63YoqEmJcfP7EiYMfbLMbTyvk\n4avms6m6kSseeZ/qQ5FXfGdZ+V4+2tfM3RfMZEpWst3h2Kooz01Dcwf1ze12h3JM2/e10NNrnNOj\nMMZ0A18F3gAqgBeNMVtE5F4Rudg6bCHe2UnbgBzgPqs9BviHiJQDj+GdNts3MH2HiFTgHeD+qzGm\nbzD8B8ApIrIJWA7caYzZP9IXqiLH4bZO/ry+hs/Nyyc1MTLmtV84J5enbyqjobmDy371fkQMhvbp\n7TU8uNzDlMwkLppjz1oVJym2PqU7eZyibyA7XD2KgCamG2NexTvW4Nt2j8/tl4CXBnhcO96ZTwM9\n5x3AHQO01wLnBRKXGp1eXLOHju5erjvZ/imxQ3HSlHG81L/WYiWPXHNiRAwG/71iHxV1Tfz08ydo\nzQc+nkVUXtfEwuOybY5mYOW1TSTHRTMxPTy7BOjuscpRenoNz6zcTVlhRlim/QXbjJwUXr71FCak\nJ3D9U6v404fVdofklzGGh97azuRxibatfHea1IQYJqQnOHqcoryuiVm5KWEbv9NEoRzl3W317D7Y\nFpZdYkMlNzWhf63Ft17YwLIte+0O6Zje2lrP5pombjtrWtDqeowGxXluKhyaKHp7DRV1zWEbnwBN\nFMphlr5fRY47jvOKcwY/2MH61locl5PCfa9W0NHtvKmzxhgeWr6diRkJXDrv6KVRY1tRbio7D7TS\n2uG8tb57DrXR0tEd1plpmiiUY+zc38q72xq4esFkYkbBp9u4aBffvXAWVQfa+N2KKrvD+ZR3tzWw\nobqR2xZOGxX/3sFUnOfGGNi613m9ir5B9qLc8G1Mqb8dyjF+t6KKGJewuMz5U2IDdcaMLBYel8WD\ny7f310xwAmMMDy7fTn5awqjdr2okihw886m8rglXlDA9J3zTmDVRKEdo7ejmD2v3cP7sXLJTQlfq\n1A7fvWAWbZ09PLR8u92h9PunZz8f7j7MVxZOJTZa3waOlpsaT3pijCMHtMtrm5iWlRzWiov6G6Ic\n4c/ra2hu72bJKZE1JTYQ03NSuKpsIr9bWYWnvsXucLy9ib9vJzc1ns+XaG9iICJCUZ7bkVt5lNc1\nhX3lvCYKZTtjDE+/X0Vxnpv5k0bn/kLfPHcGCTEufvRahd2hsGLHAdZUHeIrC6cSF631pY+lKNfN\n1r3NdPd8ak9S2xxs7aSusT2sM55AE4VygA92HuSjfc0sObnA9hKboZKZHMdtZ03j7xX1/Mtj70YD\nD/59OznuOL5QMnrGgkKhOC+Vzu5eKhta7Q6lX4XVw9EehRpznl6xi7TEGC4e5Qu+bji1gPy0BH74\ntwrbtiVfueMAH+w8yJfPnBrWa9yRqH9Au84527H0Da6HezGqJgplq7rGI7yxZR9Xlkwc9W9c8TEu\n7jp/JhV1TfxxrT0rtn/x1nYyk+O4qmySLeePJFMyk4iLjmJLjXPGKcrrwleDwpcmCoWn3r7rsM9+\nsJteh5Q6DYeL5uQyb1IaP1n2UdgXc63ZdZB/eQ7w5TOnjPqkHAzRrihmjk9x1IB2eW34alD40kQx\nxtU1HuEzP3uP255dR1eYk0VHdw/PrdrNOTOzmZgRns3N7CYi/NdFRTQ0d/Dou5VhPfeDy7eTmRzL\n1QvGRlIOhqI8N1tqmxxR7ra9qwdPQ4sttUI0UYxxW/c2Ywy8sWUf335xQ1ivnb+2aS/7Wzq5LoL3\ndRqO+ZPS+ewJeTz2jx3UNR4JyznX7T7EP7bv54unTyEhVnsTgSrKS6XxSBe1jfbXpgh3DQpfmijG\nuEprXv+XzpzCXzfUcucfN9IbpmSxdMUupmQmcdo052/FHWz/8W/H0WvgJ298FJbzPbR8OxlJsWPm\nEl+w9L0pO2GFdrhrUPjSRDHGeepbyEiK5e7zZ/GNc6bz0tpq7nllc8i72hurD/Ph7sNce7JzS52G\n0sSMRG46rZCX19WwsfpwSM+1Yc9h3vmogZtPLyQpLqASNMoyKzcFERxRiCrcNSh8aaIY4yobWpia\nlQTAN8+dzpfOnMIzK3dz398qQposnl5RRWKsi8tPHLsrg29dOJVxSbH8MMT/1r94aztpiTFj7hJf\nMCTGRlOYmeSQHkV4a1D40kQxxnnqW5iW7d1cTES4a9FMrj+lgMf/uZOfLtsWknMebO3klQ21XDY/\nH3d8ZJQ6DYWU+Bi+fd4MVu08yBtb9oXkHJtrGvl7RT03nVpIsvYmhqU4L9X2PZ/sqEHhSxPFGHag\npYNDbV1Mzfp4F0oR4Z6LilhcOpGH3/bw8FvB38juhdV76Ozu1U+4wJUlE5mencz9r1XQ2R38WWcP\nLd+OOz6aJacWBP25x4qiXDc1h4/Q2NZlWwx21KDwpYliDOvboK6vR9EnKkq479LjuXRePg8s28bj\n/9gRtHN6S51WcfKUcczISQna80aqaFdUf82Kp1fsCupzl9c2sax8HzeeVjime24j1ffmvMXGFdp2\n1KDwpYliDPM0DJwoAFxRwk+umMOFx+fyw79V8LuVwSm8s7xiHzWHj4zKXWKHa+Fx2ZwxI4uHlm/n\nUBBrVvzire2kxEVzwymFQXvOscgJM5/sqEHhSxPFGFZZ30pCjIu81IQB7492RfHzxXM5d1Y2//Xn\nzby4Zs+Iz/n0iipyU+M5d1ZklzoNtu9eMIuWjm4eDFLNio/2NvPa5r3ccGoBqYnamxiJrJQ4slPi\nbF2hbUcNCl+aKMYwT0MLU7KS/M6iiHFF8fC/z+f06Znc+ceN/GV9zfDPV9/CPz37ueakyURr6c1P\nOG58CovLJvHMyioqG0Zes+IXb20nKdbFjadpbyIYivPctvcoZuXad6lW/1rHsEqfGU/+xMe4eOza\nEsoKMvj2ixt4ffPeYZ3vmZVVxLqiuLJUt7ceyLfOnUF8jIv7X906oufZvq+Zv22qY8kpBaQlhnfz\nuNGqKM+Np76F9q6esJ+7vwaFTQPZoIlizGrt6Kbm8BGmZQV2zTMh1sUT15dywoRUvvbcOt7eWj+k\n87V0dPPS2moumpNLZnLccEIe9bJS4rj1rKn8vWIf71cOv2bFw297SIhxcfPpU4IY3dhWlJtKd69h\n+77wVyjsr0Fh00A2aKIYs3ZYxVgC6VH0SY6L5qkbyjhufApfembtkArw/GldNS0d3Vx3SsFQQx1T\nbjy10Fuz4n+HV7OisqGFv26o5dqTJ4d9K+rRrNjG2hQf16DQS0+26O01Q/5kPFr0XQefOoREAZCa\nEMPvblxA4bgkbl66htW7Dg76GGMMS1dUMWdCKnMnpg0r3rEiPsbFnefPpLyuiZfXDb1mxS/f8hAX\n7eKL2psIqkkZiSTHRdsyTlFe18R4dzzjbOyJj+lE8cKaPdzw29W8uqnO7lDCzlPfgitKKBiXNOTH\npifF8szNC8hNi+eGp1azfo//vYpWVB7AU9+iC+wC9Nm+mhVvfERbZ+A1K3btb+XP62u45qRJenkv\nyKKihFm5Kbas0C6vbbJ1fALGeKK44sQJnDAxjbv+uJGaw+HZ7tkpPPUtTM5IJDZ6eL8CWSlxPHvz\nSWQkxXLdEx/43TRt6YpdpCfGcNGc3GFGO7aICP/nwiLqmzt49N3AFzv+8m0PMa4ovniG9iZCoSjX\nTUVdU9h2VwafGhQ2bd3RZ0wnihhXFA8tnkuvgW8+/6FtVd7s4GloGfJlp6ONT43n2S8uIDkummuf\nWMW2fc2fOqbm8BHeLN/H4rJJWlVtCE6cnM6Fc3J59L1K9gZQC2H3gTZe/rCGf18wieyU+DBEOPYU\n56XS2tlD1cG2sJ2zvwaF9ijsNXlcEj/83GxW7zrEw2977A4nLLp6etm1v3VIA9nHMiE9kWe/eBLR\nUcLVj3/Azv2tn7j/99aK7qsXaI3mobpr0Ux6ewOrWfGrdzy4ooQvnzk1DJGNTX1v1uEcp+ivQaE9\nCvt9bl4+l83L56Hl2wManI10uw+20d1rPrEZ4EgUZCbx7BcX0NtruPo3K9ljfeJq7+rh+dV7OHdW\nDhNs2EM/0k3MSOSG0wr447pqNlUf+9Je9aE2XlpbzVWlE8lxa28iVKbnJBMdJWGtTVFe20RSrItJ\nNpcK1kRhufdzs5mYkcg3n19v6y6R4XCszQBHYlp2Cr+7aQGtnT38++MrqWs8wt821nGwtZMlOiV2\n2G47axoZSbH88G/lx6xZ8at3KokS4csLtTcRSnHRLqZlJ4d1Kw/vimy37cW9NFFYkuOieWjxPPY1\ntfOff9rkiGLqodKXKPoKFgVLUZ6bp28s43BrF1f/5gN+848dTM1K4pSp44J6nrHEHR/Dtz4zgw92\nHmRZ+adrVtQePsIf1uzh8yUTyD3Gnl0qeIrCuJVHfw0Km8cnIMBEISKLROQjEfGIyF0D3D9ZRJaL\nyEYReUdEJvjc92MR2Wx9XenTfraIrLPal4pItM99C0VkvYhsEZF3R/oiA3XCxDS+c95x/G1TXVA2\nwHOqyvoWxrvjSQnB1tMnTEzjqRtK2dvUzta9zSw5pQCRsVfqNJiuKp3ItOxk7n/10zUrHnm3EoBb\nz5pmR2hjTnFeKvXNHTQ0d4T8XNWHjnhrUNg8PgEBJAoRcQG/BM4HioCrRKToqMMeAJ42xswB7gXu\ntx57ITAfmAssAG4XEbeIRAFLgcXGmNlAFbDEekwa8CvgYmNMMfD5Eb/KIfjSGVM4ddo4/u8r5f2f\nvEcbT0NgezwNV0lBBk9eX8rn5uZx2fyxW+o0WPpqVuw60MYzPtu9721s5/lVe7jixAnkp2lvIhz6\ntxwPw+Wn/oHsCOlRlAEeY8wOY0wn8DxwyVHHFAFvWbff9rm/CHjPGNNtjGkFNgKLgHFApzGmr9bm\nm8Dl1u1/B142xuwGMMaEdel0VJTw/74wl/iYKL7+3Id0dId/E7BQMsZQWd8S9MtORztpyjh+vnie\nlt8MkoUzsjh9eiYPLt/O4TZvzYpH3q2k1xhuXai9iXDpSxThGNAur/XWoHBCga9AEkU+4Hsdptpq\n87UBuMy6fSmQIiLjrPZFIpIoIpnAWcBEYD8QLSIl1mOusNoBZgDp1iWstSJy3UBBicgtIrJGRNY0\nNDQE8DICl+OO5ydXnEB5XRP/8/rgUxMjyd6mdlo7e0Lao1DBJyJ898JZNLd38dByD/VN7Ty3ajeX\nzc9nos0zYsaS1MQYJqQnhGWcoryuialZSY5YfxSsj3u3Aw+LyPXAe0AN0GOMWSYipcD7QAOwwmo3\nIrIY+JmIxAHLgL6P7tHAicA5QAKwQkRW+vQ+ADDGPAY8BlBSUhL0kedzi3JYcvJknvjnTk6fnsnC\n47KDfQpb9A9ka6KIODPHu7mydBJPr9hFzWHvFOfbdGwi7IpywzOgXV7bRFlhRsjPE4hAehQ1fPxp\nH2CC1dbPGFNrjLnMGDMP+K7Vdtj6fp8xZq4x5jOAANus9hXGmNONMWV4k0tfIqgG3jDGtBpj9lv3\nnTDsVzgCd18wi5njU7j9DxvCMngVDqGYGqvC59ufmUFcdBRvbNnHJXPzmDyMvbrUyBTnpbLzQCut\nHYHvwzVUh1o7qbW5BoWvQBLFamC6iBSKSCywGHjF9wARybQGqAHuBp602l3WJShEZA4wB2/vARHJ\ntr7HAXcCj1iP/wtwmohEi0gi3kHwiuG/xOGLj3Hxi6vm0dzezXf+sCGse7yESmVDCynx0WTppnER\nKSsljq+dM51YVxRf1d6ELYry3BgDW/d+esuaYHFCDQpfgyYKY0w38FXgDbxv2C8aY7aIyL0icrF1\n2ELgIxHZBuQA91ntMcA/RKQc72Wia6znA7hDRCrwDnD/1RjzlnW+CuB1q30V8LgxZvPIX+rwTM9J\n4b8uKuK9bQ08+a+ddoURNB6rqp1OWY1cXzpjCiv/8xymBGllvRqa/toUIRzQ7ptVZWcNCl8BjVEY\nY14FXj2q7R6f2y8BLw3wuHa8M58Ges47gDuOcd9PgJ8EEls4XL1gEu9ta+DHr2/lpCnjmJ3vjCw/\nHJ76Vs46LsvuMNQIiIgWJbJRbmo8aYkxIZ0iW15rfw0KX7oyOwAiwo8vn8O4pDi+/tyHIb02GUqN\nbV3sb+nQ8QmlRkBEKMp1h7Q2RXmd/TUofGmiCFB6Uiw/u3IuOw+08v2/brE7nGHxNHivqWqiUGpk\nivPcbN3bHJLSBO1dPXjq7a9B4UsTxRCcPHUcty2cxotrqvnfjbV2hzNklfXeLcCDtWusUmNVUZ6b\nzu5eKhtaBz94iDz1LXQ7oAaFL00UQ/SNc6czb1Iad7+8qX877UjhaWghNjpKF2gpNULFed5xyr5t\nNoKpb42G9igimLcq3jww8M0X1kdUVTxPfQtTMpNw2bxlsVKRbkpmErHRUSFZeFde54waFL40UQzD\nxIxEfnjpbNZWHeKhtyKnKp6nfuTlT5VS3o0aZ45PCcmAdnmtM2pQ+NJEMUyXzM3n8vkTePit7aza\n6fyqeO1dPew51MY0HZ9QKiiK89yU1zUFtXZNb69x3Iwn0EQxIt+/pJhJGYl88/kPHV8Vb+f+VozR\nPZ6UCpaiXDeH27qobWwP2nM6qQaFL00UI5AcF81DV82joaWDu17e6OiqeP17PGmPQqmgKOob0A7i\n5Scn1aDwpYlihOZMSOP28441f223AAAVd0lEQVTjtc17eX61c6vieepbEIEpIa5DodRYMXN8CiJB\nThQOqkHhSxNFEHzx9CmcPj2T7/91C5760G0UNhKehhYmpic6Ym97pUaDpLhoCjOTglrEyEk1KHxp\nogiCqCjhp58/gcTYaL723Hrau5xXFa+yPrTlT5Uai4py3UHd86m8tslx4xOgiSJost3xPPD5OVTU\nNfHj17faHc4n9PQaduxvDXn5U6XGmqI8N9WHjgRlMovTalD40kQRRGfPzOH6Uwp46l+7eHtrWEt9\n+1V9qI3O7l7tUSgVZB+v0B55r8JpNSh8aaIIsrvOn8nM8Sn8nz9vdswsKK1qp1Ro9F0mCsY4hdNq\nUPjSRBFk8TEurj15MjWHj1B1wBl7QX08NdZ5v4BKRbKslDiyU+KC0qNwWg0KX5ooQqCswFsQfdUu\nZ6zY9tS3kJkcR2pijN2hKDXqFOW5gzJFtryuyZG9CdBEERLTspNJT4xhtUO29qhsaNGBbKVCpCjX\njae+hY7u4c927K9B4cCBbNBEERIiQklBBqsd0KMwxvTXyVZKBV9xXirdvYbt+1qG/Rz9NSgcOJAN\nmihCpqwgg10H2qhvDt4+MMPR0NJBU3u3JgqlQqSvFzCSAe3+GhTaoxhbSgu94xSrdx6yNQ6d8aRU\naE3OSCQp1jWicYryuiYSY11MdlANCl+aKEKkOM9NQozL9stPfaUaNVEoFRpRUcKsXPeIalM4sQaF\nL00UIRLjimL+5DTba1VU1reQFOtivDve1jiUGs2K89xU1DXR2zv0tVP9NSgcuHVHH00UIVRakEHF\n3iaa2u2rVdFX1U7EmZ9UlBoNivLctHb2sPvg0NdO9degcOj4BGiiCKmyggyMgbVV9o1TeOpbtAaF\nUiHWN1tpOJef+mtQaI9ibJo3KZ3oKLFtPUVzexd7m9q1qp1SITY9J5noKOl/0x+K8tomogSOG+/M\nxXagiSKkEmJdzM5PtW1Ae4cOZCsVFvExLqZlJw+zR9HE1Kxkx9Wg8KWJIsTKCjPYsKfRlhoVfVNj\np+qlJ6VCbrhbeVTUNTt6fAI0UYRcaUEGnT29bKwOXhWsQHkaWoiOEiaPc+bcbKVGk6JcN/XNHTQ0\ndwT8mMNtndQcPuLo8QnQRBFyJZPTAWy5/OSpb6EgM4kYl/43KxVqw6lN0Xes9ijGuPSkWKZnJ9uy\nnqJSZzwpFTZ9vYKhXH7qO3aW9ihUaWEG66oO0TOMxTjD1dndS9XBNqZm666xSoVDamIM+WkJQ9rz\nqbyuiRx3HJkOrEHhSxNFGJQVZNDc0c3WvcErwj6YqgOt9PQanfGkVBgV57mHdump1tkrsvtoogiD\njzcIDN/lJ61qp1T4FeW52bm/ldaO7kGP7eh2dg0KXwElChFZJCIfiYhHRO4a4P7JIrJcRDaKyDsi\nMsHnvh+LyGbr60qf9rNFZJ3VvlREoo96zlIR6RaRK0byAp0gPy2B/LQEVu8K3wrt/qmxeulJqbAp\nzkvFGNi6t3nQY7fvc3YNCl+DJgoRcQG/BM4HioCrRKToqMMeAJ42xswB7gXutx57ITAfmAssAG4X\nEbeIRAFLgcXGmNlAFbDkqHP+GFg2spfnHKUF6azadRBjwjNO4WloIT8tgcTY6MEPVkoFRV/vIJDL\nT5Ey4wkC61GUAR5jzA5jTCfwPHDJUccUAW9Zt9/2ub8IeM8Y022MaQU2AouAcUCnMWabddybwOU+\nz/c14I9A/RBfj2OVFmbQ0NxB1YGhbxo2HJUNLUzR8qdKhVVeajypCTGUBzCgXV7r7BoUvgJJFPnA\nHp+fq602XxuAy6zblwIpIjLOal8kIokikgmcBUwE9gPRIlJiPeYKqx0Rybee49f+ghKRW0RkjYis\naWhoCOBl2KuswDtOsSoM6yl6ew2V9a06kK1UmImId0A7gCmy5XXOrkHhK1iD2bcDZ4rIh8CZQA3Q\nY4xZBrwKvA88B6yw2g2wGPiZiKwCmoG+PS5+DtxpjOn1d0JjzGPGmBJjTElWVlaQXkboTMtOJj0x\nJiwD2rWNRzjS1aOJQikbFOW62bq3me6eY7+FGWOoiJAZTwCBXMCuwfq0b5lgtfUzxtRi9ShEJBm4\n3Bhz2LrvPuA+675ngW1W+wrgdKv9PGCG9XQlwPNW/YRM4AIR6TbG/HkYr88xRISSgoywrND+eMaT\nJgqlwq04301Hdy879rcyI2fgWYfVh47Q7PAaFL4C6VGsBqaLSKGIxOLtCbzie4CIZFoD1AB3A09a\n7S7rEhQiMgeYgzVALSLZ1vc44E7gEQBjTKExpsAYUwC8BNwa6UmiT1lBBrsOtFHf3B7S82idbKXs\n83FtimOPU/TtMhspPYpBE4Uxphv4KvAGUAG8aIzZIiL3isjF1mELgY9EZBuQg9WDAGKAf4hIOfAY\ncI31fAB3iEgF3gHuvxpj+gbDR62P11OEdppsZUMraYkxZCTFhvQ8SqlPm5qVRGx0lN9xivI659eg\n8BXQ3EljzKt4xxp82+7xuf0S3k//Rz+uHe/Mp4Ge8w7gjkHOe30g8UWK4jw3CTEuVu86yIVzckN2\nnr49nrT8qVLhF+2KYub4FL9TZMtrnV+DwpeuzA6jGFcU8yenhXyDQE9Di152UspGRbluttQ2HXPd\nVEVdU8SMT4AmirArLcigYm8TTe1dIXn+g62dHGzt1EShlI2K89wcbuuirvHT45GRUoPClyaKMCsr\nyMAYWFsVmnGKyoa+rTs0UShll77ewkClUSNpRXYfTRRhNndSGtFRErL1FDo1Vin7zRzvRmTg2hSR\nUoPClyaKMEuMjaY4PzVk6yk89S3Ex0SRn5YQkudXSg0uKS6awnFJA06RjZQaFL40UdigrCCdDXsa\nae/qGfzgIfLUtzAlMzkitgVQajSbdYzaFJFSg8KXJgoblBZk0NnTy8bqwCthBcpTrzOelHKC4jw3\n1YeO0Hjk44krkVSDwpcmChuUWhsEBvvy05HOHmoOH9FEoZQDDFRDO5JqUPjSRGGD9KRYpmcnB309\nRf+MJx3IVsp2xXneZOB7+SkSZzyBJgrblBZmsK7qED29wStk1JcotEehlP2yUuLISon7xIB2JNWg\n8KWJwiZlBRk0d3RTMYRC7IPx1LcQJVCQGVm/hEqNVkfXpiiva2Lm+JSIm2yiicIm/RsEBnGcwlPf\nwuRxScRFR8b+MUqNdkW5bjz1LXR093xcgyLCLjuBJgrb5KclkJ+WwJpdwVuhXdnQouMTSjlIUZ6b\n7l7D9n0tH9egiLCBbNBEYavSgnRW7Tp4zI3DhqK7p5ed+1uZmq11spVyiv4B7dqmj2tQaI9CDUVp\nYQYNzR1UHWgb8XPtPthGV4/RrTuUcpDJGYkkxbrYUtv4cQ2KY1S9czJNFDYqs9ZTrArCOIVWtVPK\neaKihFm53hXa5bVNTMlKJiE28sYQNVHYaFp2MumJMUHZINCju8Yq5UhF1syn8trGiNu6o48mChuJ\nCCUFGUGZ+VRZ30qOOw53fEwQIlNKBUtRrpvWzh5qG9sjcnwCNFHYrqwgg10H2qhv/nSBk6Hw6Iwn\npRypb0Ab0B6FGp7+9RQ7hz9N1hjjrZOtl52UcpzpOcm4rAV2kVSDwpcmCpsV57lJiHGN6PLTvqYO\nWjq6NVEo5UDxMS6mZyeTbW3pEYmi7Q5grItxRTFvUtqINgjUqnZKOdvNp0+hub1r8AMdShOFA5QW\nZPDQW9tpau8a1mC01slWytmuOHGC3SGMiF56coCywgyMgbVVwxun8NS3kBIXTXaEdmuVUs6micIB\n5k1KIzpKhr2ewlPfwtTsZEQia0dKpVRk0EThAImx0RTnpw57QNvToDOelFKho4nCIcoK0tmwp5H2\nrp4hPa7xSBcNzR2aKJRSIaOJwiFKCzLo7OllY3Xj4Af70PKnSqlQ00ThEKUFwytkpJsBKqVCTROF\nQ6QnxTI9O3nI6ykq61uIdUUxMT0hRJEppcY6TRQOUlqYwbqqQ/T0Bl7IyFPfQmFmEtEu/a9USoWG\nvrs4SFlBBs0d3VTUNQ1+sKVSZzwppUJME4WD9G8QGOA4RXtXD7sPtjE1S8ufKqVCJ6BEISKLROQj\nEfGIyF0D3D9ZRJaLyEYReUdEJvjc92MR2Wx9XenTfraIrLPal4pItNV+tfU8m0TkfRE5IRgvNBLk\npyWQn5YQcKLYdaCVXqNbdyilQmvQRCEiLuCXwPlAEXCViBQdddgDwNPGmDnAvcD91mMvBOYDc4EF\nwO0i4haRKGApsNgYMxuoApZYz7UTONMYczzwA+Cxkb3EyFJakM6qnYcwZvBxCp3xpJQKh0B6FGWA\nxxizwxjTCTwPXHLUMUXAW9btt33uLwLeM8Z0G2NagY3AImAc0GmM2WYd9yZwOYAx5n1jTN+mRyuB\nyN5Na4hKCzPY39JB1YG2QY/11LcgomsolFKhFUiiyAf2+PxcbbX52gBcZt2+FEgRkXFW+yIRSRSR\nTOAsYCKwH4gWkRLrMVdY7Ue7CXgtkBcyWpRZ6ylWBXD5qbKhlQnpCcTHRF6xdqVU5AjWYPbtwJki\n8iFwJlAD9BhjlgGvAu8DzwErrHYDLAZ+JiKrgGbgE3tXiMhZeBPFnQOdUERuEZE1IrKmoaEhSC/D\nftOyk0lPjAlog0BPvZY/VUqFXiCJooZPftqfYLX1M8bUGmMuM8bMA75rtR22vt9njJlrjPkMIMA2\nq32FMeZ0Y0wZ8F5fO4CIzAEeBy4xxhwYKChjzGPGmBJjTElWVlaAL9f5RIQTJ2cMOqDd02vY0dCi\nxYqUUiEXSKJYDUwXkUIRicXbE3jF9wARybQGqAHuBp602l3WJai+N/85wDLr52zrexzeXsMj1s+T\ngJeBa33GMMaUssJ0dh1oo765/ZjH1Bw6Qkd3rw5kK6VCbtBEYYzpBr4KvAFUAC8aY7aIyL0icrF1\n2ELgIxHZBuQA91ntMcA/RKQc7+yla6znA7hDRCrwDnD/1RjTNxh+D97B7l+JyHoRWTPiVxlh+vd9\n2nnsQkaehmZAZzwppUIvoFKoxphX8Y41+Lbd43P7JeClAR7Xjnfm00DPeQdwxwDtNwM3BxLXaDU7\nP5WEGBerdx3kwjm5Ax5TWd8K6IwnpVTo6cpsB4pxRTFvUprfDQI99S2MS4olPSk2jJEppcYiTRQO\nVVqQQcXeJprauwa839PQoiuylVJhoYnCocoKMzAG1lZ9epzCGIOnXjcDVEqFhyYKh5o3KY3oKBlw\nPcX+lk4aj3Tp1FilVFhoonCoxNhoivNTB1xP0Vf+VHsUSqlw0EThYGUF6WzY00h71ycWrfdvBqhj\nFEqpcNBE4WClBRl09vSysbrxE+2e+hYSY13kpcbbFJlSaizRROFg/Qvvjrr8VNng3eNJROwISyk1\nxmiicLD0pFimZyd/aj1Fpc54UkqFkSYKhystzGBd1SF6er2FjFo7uqltbNdEoZQKG00UDldWkEFz\nRzcVdU3AxzOetE62UipcNFE4XGnhJ8cptPypUircNFE4XH5aAvlpCZ9IFNFRwuRx2qNQSoWHJooI\nUFKQzqqdhzDGUNnQwuRxicS49L9OKRUe+m4TAUoLMtjf0sGuA226x5NSKuw0UUSAMmucYkXlAaoO\ntGkNCqVUWGmiiADTspJJS4zhpbV76O412qNQSoWVJooIEBUllEzOYN3uw4DOeFJKhZcmighRVpje\nf1svPSmlwkkTRYTo2/cpNzWepLiASp0rpVRQaKKIELPzU0mIcellJ6VU2OlH0wgR44rie58tYkJ6\not2hKKXGGE0UEWRx2SS7Q1BKjUF66UkppZRfmiiUUkr5pYlCKaWUX5oolFJK+aWJQimllF+aKJRS\nSvmliUIppZRfmiiUUkr5JcYYu2MYMRFpAKqG+fBMYH8Qwwm1SIo3kmKFyIo3kmKFyIo3kmKFkcU7\n2RiTNdhBoyJRjISIrDHGlNgdR6AiKd5IihUiK95IihUiK95IihXCE69eelJKKeWXJgqllFJ+aaKA\nx+wOYIgiKd5IihUiK95IihUiK95IihXCEO+YH6NQSinln/YolFJK+TWmE4WILBKRj0TEIyJ32R3P\nsYjIRBF5W0TKRWSLiHzD7pgCISIuEflQRP7X7lj8EZE0EXlJRLaKSIWInGx3TP6IyLes34PNIvKc\niMTbHZMvEXlSROpFZLNPW4aIvCki263v6f6eI1yOEetPrN+FjSLyJxFJszNGXwPF63Pfd0TEiEhm\nsM87ZhOFiLiAXwLnA0XAVSJSZG9Ux9QNfMcYUwScBNzm4Fh9fQOosDuIADwIvG6MmQmcgINjFpF8\n4OtAiTFmNuACFtsb1af8Flh0VNtdwHJjzHRgufWzE/yWT8f6JjDbGDMH2AbcHe6g/Pgtn44XEZkI\nnAfsDsVJx2yiAMoAjzFmhzGmE3geuMTmmAZkjKkzxqyzbjfjfSPLtzcq/0RkAnAh8LjdsfgjIqnA\nGcATAMaYTmPMYXujGlQ0kCAi0UAiUGtzPJ9gjHkPOHhU8yXAUuv2UuBzYQ3qGAaK1RizzBjTbf24\nEpgQ9sCO4Rj/tgA/A/4DCMmg81hOFPnAHp+fq3H4my+AiBQA84AP7I1kUD/H+4vba3cggygEGoCn\nrMtkj4tIkt1BHYsxpgZ4AO8nxzqg0RizzN6oApJjjKmzbu8FcuwMZghuBF6zOwh/ROQSoMYYsyFU\n5xjLiSLiiEgy8Efgm8aYJrvjORYRuQioN8astTuWAEQD84FfG2PmAa0457LIp1jX9i/Bm+DygCQR\nucbeqIbGeKdaOn66pYh8F+9l39/bHcuxiEgi8J/APaE8z1hOFDXARJ+fJ1htjiQiMXiTxO+NMS/b\nHc8gTgUuFpFdeC/pnS0iz9gb0jFVA9XGmL4e2kt4E4dTnQvsNMY0GGO6gJeBU2yOKRD7RCQXwPpe\nb3M8fonI9cBFwNXG2WsIpuL90LDB+nubAKwTkfHBPMlYThSrgekiUigisXgHBF+xOaYBiYjgvYZe\nYYz5f3bHMxhjzN3GmAnGmAK8/65vGWMc+anXGLMX2CMix1lN5wDlNoY0mN3ASSKSaP1enIODB999\nvAIssW4vAf5iYyx+icgivJdNLzbGtNkdjz/GmE3GmGxjTIH191YNzLd+r4NmzCYKa7Dqq8AbeP/Q\nXjTGbLE3qmM6FbgW7yfz9dbXBXYHNYp8Dfi9iGwE5gL/bXM8x2T1fF4C1gGb8P4NO2olsYg8B6wA\njhORahG5CfgR8BkR2Y63V/QjO2Psc4xYHwZSgDetv7VHbA3SxzHiDf15nd2rUkopZbcx26NQSikV\nGE0USiml/NJEoZRSyi9NFEoppfzSRKGUUsovTRRKKaX80kShlFLKL00USiml/Pr/JSRBJ7KPHmUA\nAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "tags": [] } } ] }, { "metadata": { "id": "-L02JX0wCUCn", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "# Create the encoder model from the tensors we previously declared.\n", "encoder_model = Model(encoder_inputs, [encoder_outputs, state_h, state_c])\n", "\n", "# Generate a new set of tensors for our new inference decoder. Note that we are using new tensors, \n", "# this does not preclude using the same underlying layers that we trained on. (e.g. weights/biases).\n", "inf_decoder_inputs = Input(shape=(None,), name=\"inf_decoder_inputs\")\n", "# We'll need to force feed the two state variables into the decoder each step.\n", "state_input_h = Input(shape=(units*2,), name=\"state_input_h\")\n", "state_input_c = Input(shape=(units*2,), name=\"state_input_c\")\n", "decoder_res, decoder_h, decoder_c = decoder_lstm(\n", " decoder_emb(inf_decoder_inputs), \n", " initial_state=[state_input_h, state_input_c])\n", "inf_decoder_out = decoder_d2(decoder_d1(decoder_res))\n", "inf_model = Model(inputs=[inf_decoder_inputs, state_input_h, state_input_c], \n", " outputs=[inf_decoder_out, decoder_h, decoder_c])" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "97adiy7pD2gY", "colab_type": "code", "colab": {} }, "cell_type": "code", "source": [ "# Converts the given sentence (just a string) into a vector of word IDs\n", "# using the language specified. This can be used for either the input (English)\n", "# or target (Spanish) languages.\n", "# Output is 1-D: [timesteps/words]\n", "def sentence_to_vector(sentence, lang):\n", " #pre = preprocess_sentence(sentence)\n", " pre = sentence\n", " vec = np.zeros(len_input)\n", " sentence_list = [lang.word2idx[s] for s in pre.split(' ')]\n", " for i,w in enumerate(sentence_list):\n", " vec[i] = w\n", " return vec\n", "\n", "# Given an input string, an encoder model (infenc_model) and a decoder model (infmodel),\n", "# return a translated string.\n", "def translate(input_sentence, infenc_model, infmodel, attention=False):\n", " sv = sentence_to_vector(input_sentence, input_lang)\n", " # Reshape so we can use the encoder model. New shape=[samples,sequence length]\n", " sv = sv.reshape(1,len(sv))\n", " [emb_out, sh, sc] = infenc_model.predict(x=sv)\n", " \n", " i = 0\n", " start_vec = target_lang.word2idx[\"\"]\n", " stop_vec = target_lang.word2idx[\"\"]\n", " # We will continuously feed cur_vec as an input into the decoder to produce the next word,\n", " # which will be assigned to cur_vec. Start it with \"\".\n", " cur_vec = np.zeros((1,1))\n", " cur_vec[0,0] = start_vec\n", " cur_word = \"\"\n", " output_sentence = \"\"\n", " # Start doing the feeding. Terminate when the model predicts an \"\" or we reach the end\n", " # of the max target language sentence length.\n", " while cur_word != \"\" and i < (len_target-1):\n", " i += 1\n", " if cur_word != \"\":\n", " output_sentence = output_sentence + \" \" + cur_word\n", " x_in = [cur_vec, sh, sc]\n", " # This will allow us to accomodate attention models, which we will talk about later.\n", " if attention:\n", " x_in += [emb_out]\n", " [nvec, sh, sc] = infmodel.predict(x=x_in)\n", " # The output of the model is a massive softmax vector with one spot for every possible word. Convert\n", " # it to a word ID using argmax().\n", " cur_vec[0,0] = np.argmax(nvec[0,0])\n", " cur_word = target_lang.idx2word[np.argmax(nvec[0,0])]\n", " return output_sentence" ], "execution_count": 0, "outputs": [] }, { "metadata": { "id": "Q9QOVTHXD50v", "colab_type": "code", "outputId": "08251bf4-2087-4b75-e188-d6e140f7c2b3", "colab": { "base_uri": "https://localhost:8080/", "height": 204 } }, "cell_type": "code", "source": [ "# Let's test out the model! Feel free to modify as you see fit. Note that only words\n", "# that we've trained the model on will be available, otherwise you'll get an error.\n", "\n", "\n", "test = [\n", " 'four minutes past seven evening',\n", " 'seventeen minutes past nine evening',\n", " 'quarter past mid night',\n", " 'fifty four minutes past seven evening',\n", " 'two minutes past seven morning',\n", " 'sixteen minutes past two in the morning',\n", " 'fifty nine minutes past six in the morning',\n", " 'sixteen minutes past twelve in the noon',\n", " 'fourteen minutes past six in the morning',\n", " 'seven minutes past three morning'\n", "]\n", " \n", "\n", "import pandas as pd\n", "output = [] \n", "for t in test: \n", " output.append({\"Input seq\":t, \"Pred. Seq\":translate(t, encoder_model, inf_model)})\n", "\n", "results_df = pd.DataFrame.from_dict(output) \n", "results_df.head()" ], "execution_count": 27, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Input seqPred. Seq
0four minutes past seven evening19 : 4
1seventeen minutes past nine evening19 : 17
2quarter past mid night0 : 15
3fifty four minutes past seven evening19 : 54
4two minutes past seven morning7 : 2
\n", "
" ], "text/plain": [ " Input seq Pred. Seq\n", "0 four minutes past seven evening 19 : 4\n", "1 seventeen minutes past nine evening 19 : 17\n", "2 quarter past mid night 0 : 15\n", "3 fifty four minutes past seven evening 19 : 54\n", "4 two minutes past seven morning 7 : 2" ] }, "metadata": { "tags": [] }, "execution_count": 27 } ] } ] }