In [1]:
import json
import tensorflow as tf

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

!wget --no-check-certificate \
 https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \
 -O /tmp/sarcasm.json

vocab_size = 1000
embedding_dim = 16
max_length = 120
trunc_type='post'
padding_type='post'
oov_tok = ""
training_size = 20000


with open("/tmp/sarcasm.json", 'r') as f:
 datastore = json.load(f)


sentences = []
labels = []
urls = []
for item in datastore:
 sentences.append(item['headline'])
 labels.append(item['is_sarcastic'])

training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)

model = tf.keras.Sequential([
 tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),
 tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
 tf.keras.layers.Dense(24, activation='relu'),
 tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

num_epochs = 50
history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)



--2019-11-24 12:33:45-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json
Resolving storage.googleapis.com (storage.googleapis.com)... 216.58.197.144, 2404:6800:4004:800::2010
Connecting to storage.googleapis.com (storage.googleapis.com)|216.58.197.144|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5643545 (5.4M) [application/json]
Saving to: ‘/tmp/sarcasm.json’


2019-11-24 12:33:45 (37.1 MB/s) - ‘/tmp/sarcasm.json’ saved [5643545/5643545]

Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param # 
embedding (Embedding) (None, 120, 16) 16000 
_________________________________________________________________
bidirectional (Bidirectional (None, 64) 12544 
_________________________________________________________________
dense (Dense) (None, 24) 1560 
_________________________________________________________________
dense_1 (Dense) (None, 1) 25 
Total params: 30,129
Trainab

ValueError: Failed to find data adapter that can handle input: , ( containing values of types {""})

In [0]:
import matplotlib.pyplot as plt


def plot_graphs(history, string):
 plt.plot(history.history[string])
 plt.plot(history.history['val_'+string])
 plt.xlabel("Epochs")
 plt.ylabel(string)
 plt.legend([string, 'val_'+string])
 plt.show()

plot_graphs(history, 'acc')
plot_graphs(history, 'loss')

In [0]:
model.save("test.h5")