{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": {}, "colab_type": "code", "id": "jGwXGIXvFhXW" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2019-11-24 12:33:45-- https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json\n", "Resolving storage.googleapis.com (storage.googleapis.com)... 216.58.197.144, 2404:6800:4004:800::2010\n", "Connecting to storage.googleapis.com (storage.googleapis.com)|216.58.197.144|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5643545 (5.4M) [application/json]\n", "Saving to: ‘/tmp/sarcasm.json’\n", "\n", "/tmp/sarcasm.json 100%[===================>] 5.38M --.-KB/s in 0.1s \n", "\n", "2019-11-24 12:33:45 (37.1 MB/s) - ‘/tmp/sarcasm.json’ saved [5643545/5643545]\n", "\n", "Model: \"sequential\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "embedding (Embedding) (None, 120, 16) 16000 \n", "_________________________________________________________________\n", "bidirectional (Bidirectional (None, 64) 12544 \n", "_________________________________________________________________\n", "dense (Dense) (None, 24) 1560 \n", "_________________________________________________________________\n", "dense_1 (Dense) (None, 1) 25 \n", "=================================================================\n", "Total params: 30,129\n", "Trainable params: 30,129\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] }, { "ename": "ValueError", "evalue": "Failed to find data adapter that can handle input: , ( containing values of types {\"\"})", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[0mnum_epochs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 55\u001b[0;31m \u001b[0mhistory\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining_padded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraining_labels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnum_epochs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtesting_padded\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtesting_labels\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 56\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 727\u001b[0m \u001b[0mworkers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 728\u001b[0;31m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[1;32m 729\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 730\u001b[0m def evaluate(self,\n", "\u001b[0;32m~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, **kwargs)\u001b[0m\n\u001b[1;32m 222\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_data\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 223\u001b[0m \u001b[0mvalidation_steps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mvalidation_steps\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 224\u001b[0;31m distribution_strategy=strategy)\n\u001b[0m\u001b[1;32m 225\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0mtotal_samples\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_total_number_of_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtraining_data_adapter\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_v2.py\u001b[0m in \u001b[0;36m_process_training_inputs\u001b[0;34m(model, x, y, batch_size, epochs, sample_weights, class_weights, steps_per_epoch, validation_split, validation_data, validation_steps, shuffle, distribution_strategy, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 495\u001b[0m 'at same time.')\n\u001b[1;32m 496\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 497\u001b[0;31m \u001b[0madapter_cls\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_adapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mselect_data_adapter\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 498\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 499\u001b[0m \u001b[0;31m# Handle validation_split, we want to split the data and get the training\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m~/anaconda3/envs/tf2/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/data_adapter.py\u001b[0m in \u001b[0;36mselect_data_adapter\u001b[0;34m(x, y)\u001b[0m\n\u001b[1;32m 651\u001b[0m \u001b[0;34m\"Failed to find data adapter that can handle \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 652\u001b[0m \"input: {}, {}\".format(\n\u001b[0;32m--> 653\u001b[0;31m _type_name(x), _type_name(y)))\n\u001b[0m\u001b[1;32m 654\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0madapter_cls\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 655\u001b[0m raise RuntimeError(\n", "\u001b[0;31mValueError\u001b[0m: Failed to find data adapter that can handle input: , ( containing values of types {\"\"})" ] } ], "source": [ "import json\n", "import tensorflow as tf\n", "\n", "from tensorflow.keras.preprocessing.text import Tokenizer\n", "from tensorflow.keras.preprocessing.sequence import pad_sequences\n", "\n", "!wget --no-check-certificate \\\n", " https://storage.googleapis.com/laurencemoroney-blog.appspot.com/sarcasm.json \\\n", " -O /tmp/sarcasm.json\n", "\n", "vocab_size = 1000\n", "embedding_dim = 16\n", "max_length = 120\n", "trunc_type='post'\n", "padding_type='post'\n", "oov_tok = \"\"\n", "training_size = 20000\n", "\n", "\n", "with open(\"/tmp/sarcasm.json\", 'r') as f:\n", " datastore = json.load(f)\n", "\n", "\n", "sentences = []\n", "labels = []\n", "urls = []\n", "for item in datastore:\n", " sentences.append(item['headline'])\n", " labels.append(item['is_sarcastic'])\n", "\n", "training_sentences = sentences[0:training_size]\n", "testing_sentences = sentences[training_size:]\n", "training_labels = labels[0:training_size]\n", "testing_labels = labels[training_size:]\n", "\n", "tokenizer = Tokenizer(num_words=vocab_size, oov_token=oov_tok)\n", "tokenizer.fit_on_texts(training_sentences)\n", "\n", "word_index = tokenizer.word_index\n", "\n", "training_sequences = tokenizer.texts_to_sequences(training_sentences)\n", "training_padded = pad_sequences(training_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", "\n", "testing_sequences = tokenizer.texts_to_sequences(testing_sentences)\n", "testing_padded = pad_sequences(testing_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)\n", "\n", "model = tf.keras.Sequential([\n", " tf.keras.layers.Embedding(vocab_size, embedding_dim, input_length=max_length),\n", " tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),\n", " tf.keras.layers.Dense(24, activation='relu'),\n", " tf.keras.layers.Dense(1, activation='sigmoid')\n", "])\n", "model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])\n", "model.summary()\n", "\n", "num_epochs = 50\n", "history = model.fit(training_padded, training_labels, epochs=num_epochs, validation_data=(testing_padded, testing_labels), verbose=1)\n", "\n" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "g9DC6dmLF8DC" }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "\n", "def plot_graphs(history, string):\n", " plt.plot(history.history[string])\n", " plt.plot(history.history['val_'+string])\n", " plt.xlabel(\"Epochs\")\n", " plt.ylabel(string)\n", " plt.legend([string, 'val_'+string])\n", " plt.show()\n", "\n", "plot_graphs(history, 'acc')\n", "plot_graphs(history, 'loss')" ] }, { "cell_type": "code", "execution_count": 0, "metadata": { "colab": {}, "colab_type": "code", "id": "7ZEZIUppGhdi" }, "outputs": [], "source": [ "model.save(\"test.h5\")" ] } ], "metadata": { "accelerator": "GPU", "colab": { "collapsed_sections": [], "name": "c3w3e1-bidirectional-lstm.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 1 }