{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using TensorFlow backend.\n" ] } ], "source": [ "from keras.utils import plot_model" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loading data...\n", "8982 train sequences\n", "2246 test sequences\n", "46 classes\n", "Vectorizing sequence data...\n", "x_train shape: (8982, 1000)\n", "x_test shape: (2246, 1000)\n", "Convert class vector to binary class matrix (for use with categorical_crossentropy)\n", "y_train shape: (8982, 46)\n", "y_test shape: (2246, 46)\n", "Building model...\n" ] } ], "source": [ "\n", "'''Trains and evaluate a simple MLP\n", "on the Reuters newswire topic classification task.\n", "'''\n", "from __future__ import print_function\n", "\n", "import numpy as np\n", "import keras\n", "from keras.datasets import reuters\n", "from keras.models import Sequential\n", "from keras.layers import Dense, Dropout, Activation\n", "from keras.preprocessing.text import Tokenizer\n", "\n", "max_words = 1000\n", "batch_size = 32\n", "epochs = 5\n", "\n", "print('Loading data...')\n", "(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words,\n", " test_split=0.2)\n", "print(len(x_train), 'train sequences')\n", "print(len(x_test), 'test sequences')\n", "\n", "num_classes = np.max(y_train) + 1\n", "print(num_classes, 'classes')\n", "\n", "print('Vectorizing sequence data...')\n", "tokenizer = Tokenizer(num_words=max_words)\n", "x_train = tokenizer.sequences_to_matrix(x_train, mode='binary')\n", "x_test = tokenizer.sequences_to_matrix(x_test, mode='binary')\n", "print('x_train shape:', x_train.shape)\n", "print('x_test shape:', x_test.shape)\n", "\n", "print('Convert class vector to binary class matrix '\n", " '(for use with categorical_crossentropy)')\n", "y_train = keras.utils.to_categorical(y_train, num_classes)\n", "y_test = keras.utils.to_categorical(y_test, num_classes)\n", "print('y_train shape:', y_train.shape)\n", "print('y_test shape:', y_test.shape)\n", "\n", "print('Building model...')\n", "model = Sequential()\n", "model.add(Dense(512, input_shape=(max_words,)))\n", "model.add(Dropout(0.2))\n", "model.add(Activation('relu'))\n", "model.add(Dropout(0.5))\n", "model.add(Dense(num_classes))\n", "model.add(Activation('softmax'))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 2 µs, sys: 0 ns, total: 2 µs\n", "Wall time: 4.53 µs\n", "Train on 8083 samples, validate on 899 samples\n", "Epoch 1/5\n", "8083/8083 [==============================] - 3s - loss: 1.4582 - acc: 0.6704 - val_loss: 1.2260 - val_acc: 0.7442\n", "Epoch 2/5\n", "8083/8083 [==============================] - 3s - loss: 0.9869 - acc: 0.7872 - val_loss: 1.1579 - val_acc: 0.7642\n", "Epoch 3/5\n", "8083/8083 [==============================] - 3s - loss: 0.8224 - acc: 0.8237 - val_loss: 1.1424 - val_acc: 0.7898\n", "Epoch 4/5\n", "8083/8083 [==============================] - 3s - loss: 0.7168 - acc: 0.8497 - val_loss: 1.1364 - val_acc: 0.7864\n", "Epoch 5/5\n", "8083/8083 [==============================] - 3s - loss: 0.6359 - acc: 0.8689 - val_loss: 1.1486 - val_acc: 0.7964\n", "2240/2246 [============================>.] - ETA: 0sTest score: 1.0966974948\n", "Test accuracy: 0.790739091719\n" ] } ], "source": [ "%time\n", "\n", "model.compile(loss='categorical_crossentropy',\n", " optimizer='rmsprop',\n", " metrics=['accuracy'])\n", "\n", "history = model.fit(x_train, y_train,\n", " batch_size=batch_size,\n", " epochs=epochs,\n", " verbose=1,\n", " validation_split=0.1)\n", "score = model.evaluate(x_test, y_test,\n", " batch_size=batch_size, verbose=1)\n", "print('Test score:', score[0])\n", "print('Test accuracy:', score[1])\n", "plot_model(model, to_file='model.png')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Sandbox Venv (Python 3)", "language": "python", "name": "sandbox-venv-3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.4.3" } }, "nbformat": 4, "nbformat_minor": 0 }