{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# High-level RNN CNTK Example" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import os\n", "import sys\n", "import cntk\n", "from cntk.layers import Embedding, LSTM, GRU, Dense, Recurrence\n", "from cntk import sequence\n", "from common.params_lstm import *\n", "from common.utils import *" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "OS: linux\n", "Python: 3.5.2 |Anaconda custom (64-bit)| (default, Jul 2 2016, 17:53:06) \n", "[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]\n", "Numpy: 1.13.3\n", "CNTK: 2.2\n", "GPU: ['Tesla K80']\n" ] } ], "source": [ "print(\"OS: \", sys.platform)\n", "print(\"Python: \", sys.version)\n", "print(\"Numpy: \", np.__version__)\n", "print(\"CNTK: \", cntk.__version__)\n", "print(\"GPU: \", get_gpu_name())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def create_symbol(CUDNN=True):\n", " # Weight initialiser from uniform distribution\n", " # Activation (unless states) is None\n", " with cntk.layers.default_options(init = cntk.glorot_uniform()):\n", " x = Embedding(EMBEDSIZE)(features) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, EMBEDSIZE)\n", " \n", " # Since we have a vanilla RNN, instead of using the more flexible Recurrence(GRU) unit, which allows for\n", " # example LayerNormalisation to be added to the network, we can use optimized_rnnstack which quickly\n", " # goes down to the CuDNN level. This is another reason not to read much into the speed comparison because\n", " # it becomes a measure of which framework has the fastest way to go down to CuDNN.\n", " if not CUDNN:\n", " x = Recurrence(GRU(NUMHIDDEN))(x) # output: list of len=BATCHSIZE of arrays with shape=(MAXLEN, NUMHIDDEN)\n", " else:\n", " W = cntk.parameter((cntk.InferredDimension, 4))\n", " x = cntk.ops.optimized_rnnstack(x, W, NUMHIDDEN, num_layers=1, bidirectional=False, recurrent_op='gru')\n", " \n", " x = sequence.last(x) #o utput: array with shape=(BATCHSIZE, NUMHIDDEN)\n", " x = Dense(2)(x) # output: array with shape=(BATCHSIZE, 2)\n", " return x" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def init_model(m):\n", " # Loss (dense labels); check if support for sparse labels\n", " loss = cntk.cross_entropy_with_softmax(m, labels) \n", " # ADAM, set unit_gain to False to match others\n", " learner = cntk.adam(m.parameters,\n", " lr=cntk.learning_rate_schedule(LR, cntk.UnitType.minibatch) ,\n", " momentum=cntk.momentum_schedule(BETA_1), \n", " variance_momentum=cntk.momentum_schedule(BETA_2),\n", " epsilon=EPS,\n", " unit_gain=False)\n", " trainer = cntk.Trainer(m, (loss, cntk.classification_error(m, labels)), [learner])\n", " return trainer" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Preparing train set...\n", "Preparing test set...\n", "Trimming to 30000 max-features\n", "Padding to length 150\n", "(25000, 150) (25000, 150) (25000, 2) (25000, 2)\n", "int32 int32 float32 float32\n", "CPU times: user 5.58 s, sys: 283 ms, total: 5.86 s\n", "Wall time: 5.87 s\n" ] } ], "source": [ "%%time\n", "# Data into format for library\n", "x_train, x_test, y_train, y_test = imdb_for_library(seq_len=MAXLEN, max_features=MAXFEATURES, one_hot=True)# CNTK format\n", "y_train = y_train.astype(np.float32)\n", "y_test = y_test.astype(np.float32)\n", "print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)\n", "print(x_train.dtype, x_test.dtype, y_train.dtype, y_test.dtype)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 9.42 ms, sys: 28 ms, total: 37.4 ms\n", "Wall time: 66.6 ms\n" ] } ], "source": [ "%%time\n", "# Placeholders\n", "features = sequence.input_variable(shape=MAXFEATURES, is_sparse=True)\n", "labels = cntk.input_variable(2)\n", "# Load symbol\n", "sym = create_symbol()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 88.8 ms, sys: 203 ms, total: 291 ms\n", "Wall time: 299 ms\n" ] } ], "source": [ "%%time\n", "trainer = init_model(sym)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1 | Accuracy: 0.765625\n", "Epoch 2 | Accuracy: 0.937500\n", "Epoch 3 | Accuracy: 0.937500\n", "CPU times: user 28 s, sys: 4.66 s, total: 32.7 s\n", "Wall time: 32.3 s\n" ] } ], "source": [ "%%time\n", "# 32s\n", "# Train model\n", "for j in range(EPOCHS):\n", " for data, label in yield_mb(x_train, y_train, BATCHSIZE, shuffle=True):\n", " data_1hot = cntk.Value.one_hot(data, MAXFEATURES) #TODO: do this externally and generate batches of 1hot\n", " trainer.train_minibatch({features: data_1hot, labels: label})\n", " # Log (this is just last batch in epoch, not average of batches)\n", " eval_error = trainer.previous_minibatch_evaluation_average\n", " print(\"Epoch %d | Accuracy: %.6f\" % (j+1, (1-eval_error)))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 3.58 s, sys: 396 ms, total: 3.98 s\n", "Wall time: 3.98 s\n" ] } ], "source": [ "%%time\n", "# Predict and then score accuracy\n", "# Apply softmax since that is only applied at training\n", "# with cross-entropy loss\n", "z = cntk.softmax(sym)\n", "n_samples = (y_test.shape[0]//BATCHSIZE)*BATCHSIZE\n", "y_guess = np.zeros(n_samples, dtype=np.int)\n", "y_truth = np.argmax(y_test[:n_samples], axis=-1)\n", "c = 0\n", "for data, label in yield_mb(x_test, y_test, BATCHSIZE):\n", " data = cntk.Value.one_hot(data, MAXFEATURES)\n", " predicted_label_probs = z.eval({features : data})\n", " y_guess[c*BATCHSIZE:(c+1)*BATCHSIZE] = np.argmax(predicted_label_probs, axis=-1)\n", " c += 1" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.853405448718\n" ] } ], "source": [ "print(\"Accuracy: \", sum(y_guess == y_truth)/len(y_guess))" ] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }