{ "metadata": { "name": "", "signature": "sha256:a082fc807336303e77a8e6c02f11f8c32fbaf99a81685da95a5b59cf3c38ddb6" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "% load_ext autoreload\n", "% autoreload 2\n", "% matplotlib inline\n", "% load_ext cythonmagic\n", "% config InlineBackend.figure_format = 'svg'\n", "import matplotlib.pyplot as plt\n", "import numpy as np, matplotlib\n", "from cython_lstm.network import Network\n", "from cython_lstm.neuron import LogisticNeuron, TanhNeuron, SoftmaxNeuron\n", "from cython_lstm.layers import LoopLayer, SliceLayer, ActivationLayer, LinearLayer\n", "from cython_lstm.trainer import Trainer\n", "from cython_lstm.error import MSE, CategoricalCrossEntropy, BinaryCrossEntropy\n", "from cython_lstm.dataset import create_xor_dataset, create_digit_dataset\n", "import cython_lstm.network_viewer" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "def test_net():\n", " # create a test dataset\n", " xor_dataset, xor_labels = create_xor_dataset()\n", " # create a small network:\n", "\n", " net = Network(metric= BinaryCrossEntropy)\n", " print(\"Initialization OK\")\n", " first_layer = LinearLayer(xor_dataset.shape[1], 6)\n", " activation_layer = ActivationLayer(LogisticNeuron)\n", " first_layer.connect_to(activation_layer)\n", "\n", " second_layer = LinearLayer(6, xor_labels.shape[1])\n", " activation_layer.connect_to(second_layer)\n", "\n", " second_activation_layer = ActivationLayer(LogisticNeuron)\n", " second_layer.connect_to(second_activation_layer)\n", "\n", " net.add_layer(first_layer, input=True)\n", " net.add_layer(activation_layer)\n", " net.add_layer(second_layer)\n", " net.add_layer(activation_layer)\n", " net.add_layer(second_layer)\n", " net.add_layer(second_activation_layer, output=True)\n", "\n", " print(\"Construction OK\")\n", "\n", " net.clear()\n", " print(\"Clearing OK\")\n", " net.activate(xor_dataset)\n", " print(\"Activation OK\")\n", " net.backpropagate(xor_labels)\n", " print(\"Backpropagation OK\")\n", "\n", " for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n", " assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n", "\n", " print(\"Updates and parameters shapes OK\")\n", "\n", " trainer = Trainer(net, 0.3)\n", " print(\"Trainer OK\")\n", " epochs = 2000\n", "\n", " for epoch in range(epochs):\n", " er = trainer.train(xor_dataset, xor_labels)\n", " if epoch > 0 and epoch % 250 == 0:\n", " print(\"epoch %d, Error %.2f\" % (epoch, er))\n", "\n", " print(\"Training OK\")\n", "\n", " net.clear()\n", " np.set_printoptions(precision=2)\n", " passed_predictions = []\n", " for data, prediction, label in zip(xor_dataset, net.activate(xor_dataset), xor_labels):\n", " passed_predictions.append(np.allclose(prediction.round(), label))\n", " print(\"%r => %r : %r\" % (data.astype(np.float64), np.around(prediction.astype(np.float64), decimals=2), passed_predictions[-1]))\n", " if all(passed_predictions):\n", " print(\"Learning OK\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "test_net()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Initialization OK\n", "Construction OK\n", "Clearing OK\n", "Activation OK\n", "Backpropagation OK\n", "Updates and parameters shapes OK\n", "Trainer OK\n", "epoch 250, Error 5.53" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 500, Error 0.93" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 750, Error 0.39" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1000, Error 0.28" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1250, Error 0.22" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1500, Error 0.19" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1750, Error 0.17" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Training OK" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "array([ 0., 0.]) => array([ 0.02, 0.98]) : True\n", "array([ 0., 1.]) => array([ 0.98, 0.02]) : True\n", "array([ 1., 0.]) => array([ 0.98, 0.02]) : True\n", "array([ 1., 1.]) => array([ 0.02, 0.98]) : True\n", "Learning OK\n" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "def test_softmax_net():\n", " # create a simple binary to decimal converter\n", " digit_dataset, digit_labels = create_digit_dataset()\n", " \n", " # create a small network:\n", "\n", " net = Network(metric = CategoricalCrossEntropy)\n", " print(\"Initialization OK\")\n", " first_layer = LinearLayer(digit_dataset.shape[1], 3)\n", " first_layer_activation = ActivationLayer(LogisticNeuron)\n", " first_layer.connect_to(first_layer_activation)\n", " \n", " second_layer = LinearLayer(3, 11) # 0, 1, ..., 9, 10\n", " first_layer_activation.connect_to(second_layer)\n", " \n", " second_layer_activation = ActivationLayer(SoftmaxNeuron)\n", " second_layer.connect_to(second_layer_activation)\n", " \n", " net.add_layer(first_layer, input=True)\n", " net.add_layer(first_layer_activation)\n", " net.add_layer(second_layer)\n", " net.add_layer(second_layer_activation, output=True)\n", " \n", " print(\"Construction OK\")\n", " net.clear()\n", " print(\"Clearing OK\")\n", " net.activate(digit_dataset)\n", " print(\"Activation OK\")\n", " net.backpropagate(digit_labels)\n", " print(\"Backpropagation OK\")\n", "\n", " for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n", " assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n", " \n", " print(\"Updates and parameters shapes OK\")\n", " \n", " trainer = Trainer(net, 0.01)\n", " print(\"Trainer OK\")\n", " epochs = 2000\n", " \n", " for epoch in range(epochs):\n", " er = trainer.train(digit_dataset, digit_labels)\n", " if epoch % 250 == 0:\n", " print(\"epoch %d, Error %.2f\" % (epoch, er))\n", " \n", " print(\"Training OK\")\n", " \n", " net.clear()\n", " np.set_printoptions(precision=2)\n", " passed_predictions = []\n", " plt.matshow(net.activate(digit_dataset), cmap = matplotlib.cm.Blues)\n", " plt.xticks(np.arange(0,11), [str(w) for w in list(np.arange(0,11))])\n", " plt.yticks(np.arange(0,11), [str(datum) for datum in digit_dataset])\n", " plt.title(\"Prediction distribution for decimals from binary codes\")\n", " \n", " for data, prediction, label in zip(digit_dataset, net.activate(digit_dataset), digit_labels):\n", " passed_predictions.append(prediction.argmax() == label)\n", " print(\"%r => %r : %r\" % (data.astype(np.float64), prediction.argmax(), passed_predictions[-1]))\n", " if all(passed_predictions):\n", " print(\"Learning OK\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "test_softmax_net()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Initialization OK\n", "Construction OK\n", "Clearing OK\n", "Activation OK\n", "Backpropagation OK\n", "Updates and parameters shapes OK\n", "Trainer OK\n", "epoch 0, Error 27.20\n", "epoch 250, Error 0.63" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 500, Error 0.28" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 750, Error 0.18" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1000, Error 0.13" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1250, Error 0.10" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1500, Error 0.08" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1750, Error 0.07" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Training OK" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "array([ 0., 0., 0., 0.]) => 0 : True" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "array([ 0., 0., 0., 1.]) => 1 : True\n", "array([ 0., 0., 1., 0.]) => 2 : True\n", "array([ 0., 0., 1., 1.]) => 3 : True\n", "array([ 0., 1., 0., 0.]) => 4 : True\n", "array([ 0., 1., 0., 1.]) => 5 : True\n", "array([ 0., 1., 1., 0.]) => 6 : True\n", "array([ 0., 1., 1., 1.]) => 7 : True\n", "array([ 1., 0., 0., 0.]) => 8 : True\n", "array([ 1., 0., 0., 1.]) => 9 : True\n", "array([ 1., 0., 1., 0.]) => 10 : True\n", "Learning OK\n" ] }, { "metadata": {}, "output_type": "display_data", "svg": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text": [ "" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [ "def test_tensor_net():\n", " # create a simple binary to decimal converter\n", " digit_dataset, digit_labels = create_digit_dataset()\n", " \n", " # create a small network:\n", "\n", " net = Network(metric=CategoricalCrossEntropy)\n", " print(\"Initialization OK\")\n", " first_layer = LinearLayer(digit_dataset.shape[1], 3)\n", " first_layer_activation = ActivationLayer(LogisticNeuron)\n", " first_layer.connect_to(first_layer_activation)\n", " \n", " second_layer = LinearLayer(3, 11, tensor=True) # 0, 1, ..., 9, 10\n", " first_layer_activation.connect_to(second_layer)\n", " \n", " second_layer_activation = ActivationLayer(SoftmaxNeuron)\n", " second_layer.connect_to(second_layer_activation)\n", " \n", " net.add_layer(first_layer, input=True)\n", " net.add_layer(first_layer_activation)\n", " net.add_layer(second_layer)\n", " net.add_layer(second_layer_activation, output=True)\n", " \n", " print(\"Construction OK\")\n", " \n", " net.clear()\n", " print(\"Clearing OK\")\n", " net.activate(digit_dataset)\n", " print(\"Activation OK\")\n", " net.backpropagate(digit_labels)\n", " print(\"Backpropagation OK\")\n", "\n", " for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n", " assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n", " \n", " print(\"Updates and parameters shapes OK\")\n", " \n", " trainer = Trainer(net, 0.01)\n", " print(\"Trainer OK\")\n", " epochs = 2000\n", " \n", " for epoch in range(epochs):\n", " er = trainer.train(digit_dataset, digit_labels)\n", " if epoch % 250 == 0:\n", " print(\"epoch %d, Error %.2f\" % (epoch, er))\n", " \n", " print(\"Training OK\")\n", " \n", " net.clear()\n", " np.set_printoptions(precision=2)\n", " passed_predictions = []\n", " plt.matshow(net.activate(digit_dataset), cmap = matplotlib.cm.Blues)\n", " plt.xticks(np.arange(0,11), [str(w) for w in list(np.arange(0,11))])\n", " plt.yticks(np.arange(0,11), [str(datum) for datum in digit_dataset])\n", " plt.title(\"Prediction distribution for decimals from binary codes\")\n", " \n", " for data, prediction, label in zip(digit_dataset, net.activate(digit_dataset), digit_labels):\n", " passed_predictions.append(prediction.argmax() == label)\n", " print(\"%r => %r : %r\" % (data.astype(np.float64), prediction.argmax(), passed_predictions[-1]))\n", " if all(passed_predictions):\n", " print(\"Learning OK\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 31 }, { "cell_type": "code", "collapsed": false, "input": [ "test_tensor_net()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Initialization OK\n", "Construction OK\n", "Clearing OK\n", "Activation OK\n", "Backpropagation OK\n", "Updates and parameters shapes OK\n", "Trainer OK\n", "epoch 0, Error 27.47\n", "epoch 250, Error 0.13" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 500, Error 0.06" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 750, Error 0.04" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1000, Error 0.03" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1250, Error 0.02" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1500, Error 0.02" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 1750, Error 0.02" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Training OK" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "array([ 0., 0., 0., 0.]) => 0 : True" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "array([ 0., 0., 0., 1.]) => 1 : True\n", "array([ 0., 0., 1., 0.]) => 2 : True\n", "array([ 0., 0., 1., 1.]) => 3 : True\n", "array([ 0., 1., 0., 0.]) => 4 : True\n", "array([ 0., 1., 0., 1.]) => 5 : True\n", "array([ 0., 1., 1., 0.]) => 6 : True\n", "array([ 0., 1., 1., 1.]) => 7 : True\n", "array([ 1., 0., 0., 0.]) => 8 : True\n", "array([ 1., 0., 0., 1.]) => 9 : True\n", "array([ 1., 0., 1., 0.]) => 10 : True\n", "Learning OK\n" ] }, { "metadata": {}, "output_type": "display_data", "svg": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text": [ "" ] } ], "prompt_number": 32 }, { "cell_type": "code", "collapsed": false, "input": [ "def update_step(data, step):\n", " data[step,:,:] = data[step-1,:,:]\n", " data[step,:,0] += 1\n", " for stream in range(data.shape[1]):\n", " if data[step,stream,0] > 1:\n", " data[step,stream,0] = 0\n", " data[step,stream,1] += 1\n", " if data[step,stream,1] > 1:\n", " data[step,stream,1] = 0\n", " data[step,stream,2] += 1\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0\n", " if data[step,stream,1] > 1:\n", " data[step,stream,1] = 0\n", " data[step,stream,2] += 1\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "def binary_addition_data(TIMESTEPS = 20,\n", " DIFFERENT_OBSERVABLES = 3,\n", " OBSERVATION_DIMENSIONS = 3,\n", " NOISE_SIZE = 0.03):\n", "\n", " recurrent_data = np.zeros([TIMESTEPS, DIFFERENT_OBSERVABLES, OBSERVATION_DIMENSIONS], dtype=np.float32)\n", "\n", " start_step = np.random.randint(0, 1, size=(DIFFERENT_OBSERVABLES, OBSERVATION_DIMENSIONS))\n", "\n", " def update_step(data, step):\n", " data[step,:,:] = data[step-1,:,:]\n", " data[step,:,0] += 1\n", " for stream in range(data.shape[1]):\n", " if data[step,stream,0] > 1:\n", " data[step,stream,0] = 0\n", " data[step,stream,1] += 1\n", " if data[step,stream,1] > 1:\n", " data[step,stream,1] = 0\n", " data[step,stream,2] += 1\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0\n", " if data[step,stream,1] > 1:\n", " data[step,stream,1] = 0\n", " data[step,stream,2] += 1\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0\n", " if data[step,stream,2] > 1:\n", " data[step,stream,2] = 0\n", " \n", " recurrent_data[0,:,:] = start_step\n", " for i in range(1, TIMESTEPS):\n", " update_step(recurrent_data, i)\n", " \n", " noisy_data = recurrent_data + NOISE_SIZE * np.random.standard_normal(recurrent_data.shape).astype(np.float32)\n", " return noisy_data, recurrent_data\n", "\n", "def one_trick_pony(network, temporal=False):\n", " print(\"Simple binary additions using network:\")\n", " for num in range(0, 6):\n", " bin_repr = np.binary_repr(num)[::-1][:3]\n", " if len(bin_repr) < 3:\n", " bin_repr = bin_repr + (3 - len(bin_repr)) * \"0\"\n", " if temporal:\n", " bin_repr = np.array([[list(bin_repr)]])\n", " else:\n", " bin_repr = np.array([list(bin_repr)])\n", " print(\"%d + 1 ~= %d\" % (num, sum(2 ** k if i > 0 else 0. for k, i in enumerate(network.activate(bin_repr)[0].round()))))\n", "\n", "\n", "def test_reccurent_net():\n", " \n", " # Binary addition problem\n", "\n", " TIMESTEPS = 20\n", " DIFFERENT_OBSERVABLES = 3\n", " OBSERVATION_DIMENSIONS = 3\n", " NOISE_SIZE = 0.03\n", "\n", " noisy_data, recurrent_data = binary_addition_data(TIMESTEPS,\n", " DIFFERENT_OBSERVABLES,\n", " OBSERVATION_DIMENSIONS,\n", " NOISE_SIZE)\n", " \n", " HIDDEN_DIMENSIONS = 8\n", "\n", " net = Network()\n", " \n", " input_layer = LinearLayer(OBSERVATION_DIMENSIONS, HIDDEN_DIMENSIONS)\n", " activ_layer = ActivationLayer(TanhNeuron)\n", " prediction_layer = LinearLayer(HIDDEN_DIMENSIONS, OBSERVATION_DIMENSIONS)\n", " output_layer = ActivationLayer(LogisticNeuron)\n", " \n", " input_layer.connect_to(activ_layer)\n", " activ_layer.connect_to(prediction_layer)\n", " prediction_layer.connect_to(output_layer)\n", " \n", " temporal_loop = LoopLayer(OBSERVATION_DIMENSIONS, input_layer)\n", " \n", " slice_layer = SliceLayer((-1,-1))\n", " temporal_loop.connect_to(slice_layer)\n", "\n", " net.add_layer(temporal_loop, input=True)\n", " net.add_layer(slice_layer, output=True)\n", " net.set_error(BinaryCrossEntropy)\n", " net.activate(noisy_data[:-1,:,:])\n", " \n", " net.backpropagate(recurrent_data[-1,:,:].astype(np.int32))\n", " print(\"Backpropagation Through Time OK\")\n", "\n", " for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n", " assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n", " \n", " print(\"Updates and parameters shapes OK\")\n", " \n", " trainer = Trainer(net, method=\"adadelta\", rho=0.95)\n", " print(\"Trainer OK\")\n", " epochs = 5000\n", " subepochs = 10\n", " \n", " print(\"before we start, here's the network's view of addition:\")\n", " \n", " one_trick_pony(net, True)\n", " \n", " # use last time step for prediction\n", " \n", " # Note: if you look closely, this is a really poor\n", " # example, since we are showing many useless observations\n", " # and finally closing with one useful one for training\n", " # and context is not useful in this instance for prediction.\n", " \n", " er = 0.\n", " for epoch in range(epochs):\n", " \n", " for subepoch in range(subepochs):\n", " \n", " random_range_begin = np.random.randint(0, TIMESTEPS-5)\n", " random_range_end = random_range_begin + 1#np.random.randint(random_range_begin+4, TIMESTEPS)\n", "\n", " er += trainer.train(noisy_data[random_range_begin:random_range_end,:,:], recurrent_data[random_range_end,:,:].astype(np.int32))\n", " \n", " if epoch > 0 and epoch % 1000 == 0:\n", " print(\"epoch %d, Error %.2f\" % (epoch * subepochs, er))\n", " er = 0.\n", "\n", " print(\"Training OK\")\n", " \n", " one_trick_pony(net, True)\n", " \n", " return net\n", "\n", "def test_binary_addition_net():\n", " # Binary addition problem\n", "\n", " TIMESTEPS = 200\n", " DIFFERENT_OBSERVABLES = 10\n", " OBSERVATION_DIMENSIONS = 3\n", " NOISE_SIZE = 0.03\n", "\n", " noisy_data, recurrent_data = binary_addition_data(TIMESTEPS,\n", " DIFFERENT_OBSERVABLES,\n", " OBSERVATION_DIMENSIONS,\n", " NOISE_SIZE)\n", " \n", " HIDDEN_DIMENSIONS = 8\n", "\n", " net = Network()\n", " \n", " input_layer = LinearLayer(OBSERVATION_DIMENSIONS, HIDDEN_DIMENSIONS)\n", " activ_layer = ActivationLayer(TanhNeuron)\n", " prediction_layer = LinearLayer(HIDDEN_DIMENSIONS, OBSERVATION_DIMENSIONS)\n", " output_layer = ActivationLayer(LogisticNeuron)\n", " \n", " input_layer.connect_to(activ_layer)\n", " activ_layer.connect_to(prediction_layer)\n", " prediction_layer.connect_to(output_layer)\n", "\n", " net.add_layer(input_layer, input=True)\n", " net.add_layer(activ_layer)\n", " net.add_layer(prediction_layer)\n", " net.add_layer(output_layer, output=True)\n", " net.set_error(BinaryCrossEntropy)\n", " net.activate(noisy_data[0,:,:])\n", " \n", " net.backpropagate(recurrent_data[1,:,:].astype(np.int32))\n", "\n", " for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n", " assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n", " \n", " print(\"Updates and parameters shapes OK\")\n", " \n", " trainer = Trainer(net, method=\"adadelta\", rho=0.95)\n", " print(\"Trainer OK\")\n", " epochs = 5000\n", " subepochs = 10\n", " \n", " print(\"before we start, here's the network's view of addition:\")\n", " \n", " one_trick_pony(net)\n", " \n", " er = 0.\n", " for epoch in range(epochs):\n", " \n", " for subepoch in range(subepochs):\n", " \n", " random_range_begin = np.random.randint(0, TIMESTEPS-5)\n", " random_range_end = random_range_begin +1\n", "\n", " er += trainer.train(noisy_data[random_range_begin,:,:], recurrent_data[random_range_end,:,:].astype(np.int32))\n", " \n", " if epoch > 0 and epoch % 1000 == 0:\n", " print(\"epoch %d, Error %.2f\" % (epoch * subepochs, er))\n", " er = 0.\n", "\n", " print(\"Training OK\")\n", " \n", " one_trick_pony(net)\n", "\n", " return net" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 202 }, { "cell_type": "code", "collapsed": false, "input": [ "flat_calculator_net = test_binary_addition_net()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Updates and parameters shapes OK\n", "Trainer OK\n", "before we start, here's the network's view of addition:\n", "Simple binary additions using network:\n", "0 + 1 ~= 6\n", "1 + 1 ~= 6\n", "2 + 1 ~= 6\n", "3 + 1 ~= 6\n", "4 + 1 ~= 6\n", "5 + 1 ~= 6\n", "epoch 10000, Error 35194.14" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 20000, Error 792.28" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 30000, Error 527.60" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 40000, Error 426.45" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Training OK" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Simple binary additions using network:\n", "0 + 1 ~= 1\n", "1 + 1 ~= 2\n", "2 + 1 ~= 3\n", "3 + 1 ~= 4\n", "4 + 1 ~= 5\n", "5 + 1 ~= 6\n" ] } ], "prompt_number": 203 }, { "cell_type": "code", "collapsed": false, "input": [ "recurrent_net = test_reccurent_net()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Backpropagation Through Time OK\n", "Updates and parameters shapes OK\n", "Trainer OK\n", "before we start, here's the network's view of addition:\n", "Simple binary additions using network:\n", "0 + 1 ~= 0\n", "1 + 1 ~= 0\n", "2 + 1 ~= 0\n", "3 + 1 ~= 0\n", "4 + 1 ~= 0\n", "5 + 1 ~= 0\n", "epoch 10000, Error 12438.66" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 20000, Error 701.64" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 30000, Error 329.03" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "epoch 40000, Error 251.96" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Training OK" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "Simple binary additions using network:\n", "0 + 1 ~= 1\n", "1 + 1 ~= 2\n", "2 + 1 ~= 3\n", "3 + 1 ~= 4\n", "4 + 1 ~= 5\n", "5 + 1 ~= 6\n" ] } ], "prompt_number": 200 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Drawing the network" ] }, { "cell_type": "code", "collapsed": false, "input": [ "net = Network()\n", "first_layer = Layer(3, neuron=TanhNeuron)\n", "net.add_layer(first_layer, input=True)\n", "second_layer = Layer(5, 2, neuron=SoftmaxNeuron)\n", "net.add_layer(second_layer, output=True)\n", "first_layer.connect_to(second_layer)\n", "cython_lstm.network_viewer.draw(net)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "display_data", "svg": [ "\n", "\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "\n" ], "text": [ "" ] } ], "prompt_number": 18 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Future APIs issues and ideas:\n", "\n", "* Prevent parameter duplication by adding uuids to each, and using sets to identify them (at least initially)\n", "\n", "* Slice Layer, and add layer are clunky ways of achieving simple things. These should implicity act on the layer and connect it.\n", "\n", "* In doing so the network should also implicitly gobble up all the resulting layers into a coherent whole.\n", "\n", "* Optimization is then a matter of preventing the saving of the non transformed input of a linear layer, and converting all these classes either to Numba or Cython for compilation to avoid doing all sorts of polymorphic checks everywhere.\n", "\n", "## Future Steps:\n", "\n", "* Implement RNN with memory\n", "\n", "* Implement gate unit by overloading the `__add__` operator\n", "\n", "* Implement the loop layer in Cython with nogil where possible\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "def topology_test():\n", " # create a test dataset\n", " xor_dataset, xor_labels = create_xor_dataset()\n", " # create a small network:\n", "\n", " net = Network(metric = BinaryCrossEntropy)\n", " print(\"Initialization OK\")\n", " first_layer = LinearLayer(xor_dataset.shape[1], 6)\n", " activation_layer = ActivationLayer(LogisticNeuron)\n", " \n", " first_layer.connect_to(activation_layer)\n", " \n", " second_input = cython_lstm.network.DataLayer()\n", " \n", " second_layer = LinearLayer(3, 6)\n", " second_input.connect_to(second_layer)\n", " \n", " third_layer = activation_layer + second_layer\n", " \n", " net.add_layer(first_layer, input=True)\n", " net.add_layer(activation_layer)\n", " net.add_layer(second_layer)\n", " net.add_layer(second_input)\n", " net.add_layer(third_layer, output=True)\n", " return net\n", " \n", " " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "a = topology_test()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Initialization OK\n" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "[b.layer for b in a.topsort()]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 25, "text": [ "[,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ]" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }