# Import basic libraries and keras import os import json from keras import layers from keras.layers import LSTM from keras.models import Model from keras.models import load_model from stories import get_stories, vectorize_stories # Set parameters EMBED_HIDDEN_SIZE = 50 SENT_HIDDEN_SIZE = 100 QUERY_HIDDEN_SIZE = 100 BATCH_SIZE = 32 EPOCHS = 40 # Load input data train = get_stories('qa1_single-supporting-fact_train.txt') test = get_stories('qa1_single-supporting-fact_test.txt') # Create vocabulary vocab = set() for story, q, answer in train + test: vocab |= set(story + q + [answer]) vocab = sorted(vocab) # Create index of words {word: id} # Reserve 0 for masking via pad_sequences vocab_size = len(vocab) + 1 word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) # Get maximum length of sequences story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) # Save vocabulary and lengths to file if not os.path.exists('dictionary.json'): with open('dictionary.json', 'w') as outfile: json.dump(word_idx, outfile) if not os.path.exists('lengths.json'): with open('lengths.json', 'w') as outfile: json.dump({'story_maxlen': story_maxlen, 'query_maxlen': query_maxlen}, outfile) # Vectorize the stories x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen) # Check if there is a pre-trained model if not os.path.exists('rnn_model.h5'): # Create a neural network for the stories sentence = layers.Input(shape=(story_maxlen,), dtype='int32') encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) encoded_sentence = layers.Dropout(0.3)(encoded_sentence) # Create a neural network for the questions question = layers.Input(shape=(query_maxlen,), dtype='int32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = LSTM(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) # Combine the two networks merged = layers.add([encoded_sentence, encoded_question]) merged = LSTM(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Train the model model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05) # Save the model model.save('rnn_model.h5') else: # Load the model from disk model = load_model('rnn_model.h5') model.summary() score = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1])