{
"metadata": {
"name": "",
"signature": "sha256:a082fc807336303e77a8e6c02f11f8c32fbaf99a81685da95a5b59cf3c38ddb6"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"% load_ext autoreload\n",
"% autoreload 2\n",
"% matplotlib inline\n",
"% load_ext cythonmagic\n",
"% config InlineBackend.figure_format = 'svg'\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np, matplotlib\n",
"from cython_lstm.network import Network\n",
"from cython_lstm.neuron import LogisticNeuron, TanhNeuron, SoftmaxNeuron\n",
"from cython_lstm.layers import LoopLayer, SliceLayer, ActivationLayer, LinearLayer\n",
"from cython_lstm.trainer import Trainer\n",
"from cython_lstm.error import MSE, CategoricalCrossEntropy, BinaryCrossEntropy\n",
"from cython_lstm.dataset import create_xor_dataset, create_digit_dataset\n",
"import cython_lstm.network_viewer"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def test_net():\n",
" # create a test dataset\n",
" xor_dataset, xor_labels = create_xor_dataset()\n",
" # create a small network:\n",
"\n",
" net = Network(metric= BinaryCrossEntropy)\n",
" print(\"Initialization OK\")\n",
" first_layer = LinearLayer(xor_dataset.shape[1], 6)\n",
" activation_layer = ActivationLayer(LogisticNeuron)\n",
" first_layer.connect_to(activation_layer)\n",
"\n",
" second_layer = LinearLayer(6, xor_labels.shape[1])\n",
" activation_layer.connect_to(second_layer)\n",
"\n",
" second_activation_layer = ActivationLayer(LogisticNeuron)\n",
" second_layer.connect_to(second_activation_layer)\n",
"\n",
" net.add_layer(first_layer, input=True)\n",
" net.add_layer(activation_layer)\n",
" net.add_layer(second_layer)\n",
" net.add_layer(activation_layer)\n",
" net.add_layer(second_layer)\n",
" net.add_layer(second_activation_layer, output=True)\n",
"\n",
" print(\"Construction OK\")\n",
"\n",
" net.clear()\n",
" print(\"Clearing OK\")\n",
" net.activate(xor_dataset)\n",
" print(\"Activation OK\")\n",
" net.backpropagate(xor_labels)\n",
" print(\"Backpropagation OK\")\n",
"\n",
" for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n",
" assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n",
"\n",
" print(\"Updates and parameters shapes OK\")\n",
"\n",
" trainer = Trainer(net, 0.3)\n",
" print(\"Trainer OK\")\n",
" epochs = 2000\n",
"\n",
" for epoch in range(epochs):\n",
" er = trainer.train(xor_dataset, xor_labels)\n",
" if epoch > 0 and epoch % 250 == 0:\n",
" print(\"epoch %d, Error %.2f\" % (epoch, er))\n",
"\n",
" print(\"Training OK\")\n",
"\n",
" net.clear()\n",
" np.set_printoptions(precision=2)\n",
" passed_predictions = []\n",
" for data, prediction, label in zip(xor_dataset, net.activate(xor_dataset), xor_labels):\n",
" passed_predictions.append(np.allclose(prediction.round(), label))\n",
" print(\"%r => %r : %r\" % (data.astype(np.float64), np.around(prediction.astype(np.float64), decimals=2), passed_predictions[-1]))\n",
" if all(passed_predictions):\n",
" print(\"Learning OK\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 2
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_net()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Initialization OK\n",
"Construction OK\n",
"Clearing OK\n",
"Activation OK\n",
"Backpropagation OK\n",
"Updates and parameters shapes OK\n",
"Trainer OK\n",
"epoch 250, Error 5.53"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 500, Error 0.93"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 750, Error 0.39"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1000, Error 0.28"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1250, Error 0.22"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1500, Error 0.19"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1750, Error 0.17"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Training OK"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"array([ 0., 0.]) => array([ 0.02, 0.98]) : True\n",
"array([ 0., 1.]) => array([ 0.98, 0.02]) : True\n",
"array([ 1., 0.]) => array([ 0.98, 0.02]) : True\n",
"array([ 1., 1.]) => array([ 0.02, 0.98]) : True\n",
"Learning OK\n"
]
}
],
"prompt_number": 3
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def test_softmax_net():\n",
" # create a simple binary to decimal converter\n",
" digit_dataset, digit_labels = create_digit_dataset()\n",
" \n",
" # create a small network:\n",
"\n",
" net = Network(metric = CategoricalCrossEntropy)\n",
" print(\"Initialization OK\")\n",
" first_layer = LinearLayer(digit_dataset.shape[1], 3)\n",
" first_layer_activation = ActivationLayer(LogisticNeuron)\n",
" first_layer.connect_to(first_layer_activation)\n",
" \n",
" second_layer = LinearLayer(3, 11) # 0, 1, ..., 9, 10\n",
" first_layer_activation.connect_to(second_layer)\n",
" \n",
" second_layer_activation = ActivationLayer(SoftmaxNeuron)\n",
" second_layer.connect_to(second_layer_activation)\n",
" \n",
" net.add_layer(first_layer, input=True)\n",
" net.add_layer(first_layer_activation)\n",
" net.add_layer(second_layer)\n",
" net.add_layer(second_layer_activation, output=True)\n",
" \n",
" print(\"Construction OK\")\n",
" net.clear()\n",
" print(\"Clearing OK\")\n",
" net.activate(digit_dataset)\n",
" print(\"Activation OK\")\n",
" net.backpropagate(digit_labels)\n",
" print(\"Backpropagation OK\")\n",
"\n",
" for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n",
" assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n",
" \n",
" print(\"Updates and parameters shapes OK\")\n",
" \n",
" trainer = Trainer(net, 0.01)\n",
" print(\"Trainer OK\")\n",
" epochs = 2000\n",
" \n",
" for epoch in range(epochs):\n",
" er = trainer.train(digit_dataset, digit_labels)\n",
" if epoch % 250 == 0:\n",
" print(\"epoch %d, Error %.2f\" % (epoch, er))\n",
" \n",
" print(\"Training OK\")\n",
" \n",
" net.clear()\n",
" np.set_printoptions(precision=2)\n",
" passed_predictions = []\n",
" plt.matshow(net.activate(digit_dataset), cmap = matplotlib.cm.Blues)\n",
" plt.xticks(np.arange(0,11), [str(w) for w in list(np.arange(0,11))])\n",
" plt.yticks(np.arange(0,11), [str(datum) for datum in digit_dataset])\n",
" plt.title(\"Prediction distribution for decimals from binary codes\")\n",
" \n",
" for data, prediction, label in zip(digit_dataset, net.activate(digit_dataset), digit_labels):\n",
" passed_predictions.append(prediction.argmax() == label)\n",
" print(\"%r => %r : %r\" % (data.astype(np.float64), prediction.argmax(), passed_predictions[-1]))\n",
" if all(passed_predictions):\n",
" print(\"Learning OK\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_softmax_net()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Initialization OK\n",
"Construction OK\n",
"Clearing OK\n",
"Activation OK\n",
"Backpropagation OK\n",
"Updates and parameters shapes OK\n",
"Trainer OK\n",
"epoch 0, Error 27.20\n",
"epoch 250, Error 0.63"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 500, Error 0.28"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 750, Error 0.18"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1000, Error 0.13"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1250, Error 0.10"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1500, Error 0.08"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1750, Error 0.07"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Training OK"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"array([ 0., 0., 0., 0.]) => 0 : True"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"array([ 0., 0., 0., 1.]) => 1 : True\n",
"array([ 0., 0., 1., 0.]) => 2 : True\n",
"array([ 0., 0., 1., 1.]) => 3 : True\n",
"array([ 0., 1., 0., 0.]) => 4 : True\n",
"array([ 0., 1., 0., 1.]) => 5 : True\n",
"array([ 0., 1., 1., 0.]) => 6 : True\n",
"array([ 0., 1., 1., 1.]) => 7 : True\n",
"array([ 1., 0., 0., 0.]) => 8 : True\n",
"array([ 1., 0., 0., 1.]) => 9 : True\n",
"array([ 1., 0., 1., 0.]) => 10 : True\n",
"Learning OK\n"
]
},
{
"metadata": {},
"output_type": "display_data",
"svg": [
"\n",
"\n",
"\n",
"\n"
],
"text": [
""
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def test_tensor_net():\n",
" # create a simple binary to decimal converter\n",
" digit_dataset, digit_labels = create_digit_dataset()\n",
" \n",
" # create a small network:\n",
"\n",
" net = Network(metric=CategoricalCrossEntropy)\n",
" print(\"Initialization OK\")\n",
" first_layer = LinearLayer(digit_dataset.shape[1], 3)\n",
" first_layer_activation = ActivationLayer(LogisticNeuron)\n",
" first_layer.connect_to(first_layer_activation)\n",
" \n",
" second_layer = LinearLayer(3, 11, tensor=True) # 0, 1, ..., 9, 10\n",
" first_layer_activation.connect_to(second_layer)\n",
" \n",
" second_layer_activation = ActivationLayer(SoftmaxNeuron)\n",
" second_layer.connect_to(second_layer_activation)\n",
" \n",
" net.add_layer(first_layer, input=True)\n",
" net.add_layer(first_layer_activation)\n",
" net.add_layer(second_layer)\n",
" net.add_layer(second_layer_activation, output=True)\n",
" \n",
" print(\"Construction OK\")\n",
" \n",
" net.clear()\n",
" print(\"Clearing OK\")\n",
" net.activate(digit_dataset)\n",
" print(\"Activation OK\")\n",
" net.backpropagate(digit_labels)\n",
" print(\"Backpropagation OK\")\n",
"\n",
" for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n",
" assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n",
" \n",
" print(\"Updates and parameters shapes OK\")\n",
" \n",
" trainer = Trainer(net, 0.01)\n",
" print(\"Trainer OK\")\n",
" epochs = 2000\n",
" \n",
" for epoch in range(epochs):\n",
" er = trainer.train(digit_dataset, digit_labels)\n",
" if epoch % 250 == 0:\n",
" print(\"epoch %d, Error %.2f\" % (epoch, er))\n",
" \n",
" print(\"Training OK\")\n",
" \n",
" net.clear()\n",
" np.set_printoptions(precision=2)\n",
" passed_predictions = []\n",
" plt.matshow(net.activate(digit_dataset), cmap = matplotlib.cm.Blues)\n",
" plt.xticks(np.arange(0,11), [str(w) for w in list(np.arange(0,11))])\n",
" plt.yticks(np.arange(0,11), [str(datum) for datum in digit_dataset])\n",
" plt.title(\"Prediction distribution for decimals from binary codes\")\n",
" \n",
" for data, prediction, label in zip(digit_dataset, net.activate(digit_dataset), digit_labels):\n",
" passed_predictions.append(prediction.argmax() == label)\n",
" print(\"%r => %r : %r\" % (data.astype(np.float64), prediction.argmax(), passed_predictions[-1]))\n",
" if all(passed_predictions):\n",
" print(\"Learning OK\")"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 31
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"test_tensor_net()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Initialization OK\n",
"Construction OK\n",
"Clearing OK\n",
"Activation OK\n",
"Backpropagation OK\n",
"Updates and parameters shapes OK\n",
"Trainer OK\n",
"epoch 0, Error 27.47\n",
"epoch 250, Error 0.13"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 500, Error 0.06"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 750, Error 0.04"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1000, Error 0.03"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1250, Error 0.02"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1500, Error 0.02"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 1750, Error 0.02"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Training OK"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"array([ 0., 0., 0., 0.]) => 0 : True"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"array([ 0., 0., 0., 1.]) => 1 : True\n",
"array([ 0., 0., 1., 0.]) => 2 : True\n",
"array([ 0., 0., 1., 1.]) => 3 : True\n",
"array([ 0., 1., 0., 0.]) => 4 : True\n",
"array([ 0., 1., 0., 1.]) => 5 : True\n",
"array([ 0., 1., 1., 0.]) => 6 : True\n",
"array([ 0., 1., 1., 1.]) => 7 : True\n",
"array([ 1., 0., 0., 0.]) => 8 : True\n",
"array([ 1., 0., 0., 1.]) => 9 : True\n",
"array([ 1., 0., 1., 0.]) => 10 : True\n",
"Learning OK\n"
]
},
{
"metadata": {},
"output_type": "display_data",
"svg": [
"\n",
"\n",
"\n",
"\n"
],
"text": [
""
]
}
],
"prompt_number": 32
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def update_step(data, step):\n",
" data[step,:,:] = data[step-1,:,:]\n",
" data[step,:,0] += 1\n",
" for stream in range(data.shape[1]):\n",
" if data[step,stream,0] > 1:\n",
" data[step,stream,0] = 0\n",
" data[step,stream,1] += 1\n",
" if data[step,stream,1] > 1:\n",
" data[step,stream,1] = 0\n",
" data[step,stream,2] += 1\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0\n",
" if data[step,stream,1] > 1:\n",
" data[step,stream,1] = 0\n",
" data[step,stream,2] += 1\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 43
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def binary_addition_data(TIMESTEPS = 20,\n",
" DIFFERENT_OBSERVABLES = 3,\n",
" OBSERVATION_DIMENSIONS = 3,\n",
" NOISE_SIZE = 0.03):\n",
"\n",
" recurrent_data = np.zeros([TIMESTEPS, DIFFERENT_OBSERVABLES, OBSERVATION_DIMENSIONS], dtype=np.float32)\n",
"\n",
" start_step = np.random.randint(0, 1, size=(DIFFERENT_OBSERVABLES, OBSERVATION_DIMENSIONS))\n",
"\n",
" def update_step(data, step):\n",
" data[step,:,:] = data[step-1,:,:]\n",
" data[step,:,0] += 1\n",
" for stream in range(data.shape[1]):\n",
" if data[step,stream,0] > 1:\n",
" data[step,stream,0] = 0\n",
" data[step,stream,1] += 1\n",
" if data[step,stream,1] > 1:\n",
" data[step,stream,1] = 0\n",
" data[step,stream,2] += 1\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0\n",
" if data[step,stream,1] > 1:\n",
" data[step,stream,1] = 0\n",
" data[step,stream,2] += 1\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0\n",
" if data[step,stream,2] > 1:\n",
" data[step,stream,2] = 0\n",
" \n",
" recurrent_data[0,:,:] = start_step\n",
" for i in range(1, TIMESTEPS):\n",
" update_step(recurrent_data, i)\n",
" \n",
" noisy_data = recurrent_data + NOISE_SIZE * np.random.standard_normal(recurrent_data.shape).astype(np.float32)\n",
" return noisy_data, recurrent_data\n",
"\n",
"def one_trick_pony(network, temporal=False):\n",
" print(\"Simple binary additions using network:\")\n",
" for num in range(0, 6):\n",
" bin_repr = np.binary_repr(num)[::-1][:3]\n",
" if len(bin_repr) < 3:\n",
" bin_repr = bin_repr + (3 - len(bin_repr)) * \"0\"\n",
" if temporal:\n",
" bin_repr = np.array([[list(bin_repr)]])\n",
" else:\n",
" bin_repr = np.array([list(bin_repr)])\n",
" print(\"%d + 1 ~= %d\" % (num, sum(2 ** k if i > 0 else 0. for k, i in enumerate(network.activate(bin_repr)[0].round()))))\n",
"\n",
"\n",
"def test_reccurent_net():\n",
" \n",
" # Binary addition problem\n",
"\n",
" TIMESTEPS = 20\n",
" DIFFERENT_OBSERVABLES = 3\n",
" OBSERVATION_DIMENSIONS = 3\n",
" NOISE_SIZE = 0.03\n",
"\n",
" noisy_data, recurrent_data = binary_addition_data(TIMESTEPS,\n",
" DIFFERENT_OBSERVABLES,\n",
" OBSERVATION_DIMENSIONS,\n",
" NOISE_SIZE)\n",
" \n",
" HIDDEN_DIMENSIONS = 8\n",
"\n",
" net = Network()\n",
" \n",
" input_layer = LinearLayer(OBSERVATION_DIMENSIONS, HIDDEN_DIMENSIONS)\n",
" activ_layer = ActivationLayer(TanhNeuron)\n",
" prediction_layer = LinearLayer(HIDDEN_DIMENSIONS, OBSERVATION_DIMENSIONS)\n",
" output_layer = ActivationLayer(LogisticNeuron)\n",
" \n",
" input_layer.connect_to(activ_layer)\n",
" activ_layer.connect_to(prediction_layer)\n",
" prediction_layer.connect_to(output_layer)\n",
" \n",
" temporal_loop = LoopLayer(OBSERVATION_DIMENSIONS, input_layer)\n",
" \n",
" slice_layer = SliceLayer((-1,-1))\n",
" temporal_loop.connect_to(slice_layer)\n",
"\n",
" net.add_layer(temporal_loop, input=True)\n",
" net.add_layer(slice_layer, output=True)\n",
" net.set_error(BinaryCrossEntropy)\n",
" net.activate(noisy_data[:-1,:,:])\n",
" \n",
" net.backpropagate(recurrent_data[-1,:,:].astype(np.int32))\n",
" print(\"Backpropagation Through Time OK\")\n",
"\n",
" for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n",
" assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n",
" \n",
" print(\"Updates and parameters shapes OK\")\n",
" \n",
" trainer = Trainer(net, method=\"adadelta\", rho=0.95)\n",
" print(\"Trainer OK\")\n",
" epochs = 5000\n",
" subepochs = 10\n",
" \n",
" print(\"before we start, here's the network's view of addition:\")\n",
" \n",
" one_trick_pony(net, True)\n",
" \n",
" # use last time step for prediction\n",
" \n",
" # Note: if you look closely, this is a really poor\n",
" # example, since we are showing many useless observations\n",
" # and finally closing with one useful one for training\n",
" # and context is not useful in this instance for prediction.\n",
" \n",
" er = 0.\n",
" for epoch in range(epochs):\n",
" \n",
" for subepoch in range(subepochs):\n",
" \n",
" random_range_begin = np.random.randint(0, TIMESTEPS-5)\n",
" random_range_end = random_range_begin + 1#np.random.randint(random_range_begin+4, TIMESTEPS)\n",
"\n",
" er += trainer.train(noisy_data[random_range_begin:random_range_end,:,:], recurrent_data[random_range_end,:,:].astype(np.int32))\n",
" \n",
" if epoch > 0 and epoch % 1000 == 0:\n",
" print(\"epoch %d, Error %.2f\" % (epoch * subepochs, er))\n",
" er = 0.\n",
"\n",
" print(\"Training OK\")\n",
" \n",
" one_trick_pony(net, True)\n",
" \n",
" return net\n",
"\n",
"def test_binary_addition_net():\n",
" # Binary addition problem\n",
"\n",
" TIMESTEPS = 200\n",
" DIFFERENT_OBSERVABLES = 10\n",
" OBSERVATION_DIMENSIONS = 3\n",
" NOISE_SIZE = 0.03\n",
"\n",
" noisy_data, recurrent_data = binary_addition_data(TIMESTEPS,\n",
" DIFFERENT_OBSERVABLES,\n",
" OBSERVATION_DIMENSIONS,\n",
" NOISE_SIZE)\n",
" \n",
" HIDDEN_DIMENSIONS = 8\n",
"\n",
" net = Network()\n",
" \n",
" input_layer = LinearLayer(OBSERVATION_DIMENSIONS, HIDDEN_DIMENSIONS)\n",
" activ_layer = ActivationLayer(TanhNeuron)\n",
" prediction_layer = LinearLayer(HIDDEN_DIMENSIONS, OBSERVATION_DIMENSIONS)\n",
" output_layer = ActivationLayer(LogisticNeuron)\n",
" \n",
" input_layer.connect_to(activ_layer)\n",
" activ_layer.connect_to(prediction_layer)\n",
" prediction_layer.connect_to(output_layer)\n",
"\n",
" net.add_layer(input_layer, input=True)\n",
" net.add_layer(activ_layer)\n",
" net.add_layer(prediction_layer)\n",
" net.add_layer(output_layer, output=True)\n",
" net.set_error(BinaryCrossEntropy)\n",
" net.activate(noisy_data[0,:,:])\n",
" \n",
" net.backpropagate(recurrent_data[1,:,:].astype(np.int32))\n",
"\n",
" for gparam, param in zip(net.get_gradients(), net.get_parameters()):\n",
" assert(gparam.shape == param.shape), \"Weight updates are not the same size\"\n",
" \n",
" print(\"Updates and parameters shapes OK\")\n",
" \n",
" trainer = Trainer(net, method=\"adadelta\", rho=0.95)\n",
" print(\"Trainer OK\")\n",
" epochs = 5000\n",
" subepochs = 10\n",
" \n",
" print(\"before we start, here's the network's view of addition:\")\n",
" \n",
" one_trick_pony(net)\n",
" \n",
" er = 0.\n",
" for epoch in range(epochs):\n",
" \n",
" for subepoch in range(subepochs):\n",
" \n",
" random_range_begin = np.random.randint(0, TIMESTEPS-5)\n",
" random_range_end = random_range_begin +1\n",
"\n",
" er += trainer.train(noisy_data[random_range_begin,:,:], recurrent_data[random_range_end,:,:].astype(np.int32))\n",
" \n",
" if epoch > 0 and epoch % 1000 == 0:\n",
" print(\"epoch %d, Error %.2f\" % (epoch * subepochs, er))\n",
" er = 0.\n",
"\n",
" print(\"Training OK\")\n",
" \n",
" one_trick_pony(net)\n",
"\n",
" return net"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 202
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"flat_calculator_net = test_binary_addition_net()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Updates and parameters shapes OK\n",
"Trainer OK\n",
"before we start, here's the network's view of addition:\n",
"Simple binary additions using network:\n",
"0 + 1 ~= 6\n",
"1 + 1 ~= 6\n",
"2 + 1 ~= 6\n",
"3 + 1 ~= 6\n",
"4 + 1 ~= 6\n",
"5 + 1 ~= 6\n",
"epoch 10000, Error 35194.14"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 20000, Error 792.28"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 30000, Error 527.60"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 40000, Error 426.45"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Training OK"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Simple binary additions using network:\n",
"0 + 1 ~= 1\n",
"1 + 1 ~= 2\n",
"2 + 1 ~= 3\n",
"3 + 1 ~= 4\n",
"4 + 1 ~= 5\n",
"5 + 1 ~= 6\n"
]
}
],
"prompt_number": 203
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"recurrent_net = test_reccurent_net()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Backpropagation Through Time OK\n",
"Updates and parameters shapes OK\n",
"Trainer OK\n",
"before we start, here's the network's view of addition:\n",
"Simple binary additions using network:\n",
"0 + 1 ~= 0\n",
"1 + 1 ~= 0\n",
"2 + 1 ~= 0\n",
"3 + 1 ~= 0\n",
"4 + 1 ~= 0\n",
"5 + 1 ~= 0\n",
"epoch 10000, Error 12438.66"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 20000, Error 701.64"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 30000, Error 329.03"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"epoch 40000, Error 251.96"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Training OK"
]
},
{
"output_type": "stream",
"stream": "stdout",
"text": [
"\n",
"Simple binary additions using network:\n",
"0 + 1 ~= 1\n",
"1 + 1 ~= 2\n",
"2 + 1 ~= 3\n",
"3 + 1 ~= 4\n",
"4 + 1 ~= 5\n",
"5 + 1 ~= 6\n"
]
}
],
"prompt_number": 200
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Drawing the network"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"net = Network()\n",
"first_layer = Layer(3, neuron=TanhNeuron)\n",
"net.add_layer(first_layer, input=True)\n",
"second_layer = Layer(5, 2, neuron=SoftmaxNeuron)\n",
"net.add_layer(second_layer, output=True)\n",
"first_layer.connect_to(second_layer)\n",
"cython_lstm.network_viewer.draw(net)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "display_data",
"svg": [
"\n",
"\n",
"\n",
"\n"
],
"text": [
""
]
}
],
"prompt_number": 18
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Future APIs issues and ideas:\n",
"\n",
"* Prevent parameter duplication by adding uuids to each, and using sets to identify them (at least initially)\n",
"\n",
"* Slice Layer, and add layer are clunky ways of achieving simple things. These should implicity act on the layer and connect it.\n",
"\n",
"* In doing so the network should also implicitly gobble up all the resulting layers into a coherent whole.\n",
"\n",
"* Optimization is then a matter of preventing the saving of the non transformed input of a linear layer, and converting all these classes either to Numba or Cython for compilation to avoid doing all sorts of polymorphic checks everywhere.\n",
"\n",
"## Future Steps:\n",
"\n",
"* Implement RNN with memory\n",
"\n",
"* Implement gate unit by overloading the `__add__` operator\n",
"\n",
"* Implement the loop layer in Cython with nogil where possible\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def topology_test():\n",
" # create a test dataset\n",
" xor_dataset, xor_labels = create_xor_dataset()\n",
" # create a small network:\n",
"\n",
" net = Network(metric = BinaryCrossEntropy)\n",
" print(\"Initialization OK\")\n",
" first_layer = LinearLayer(xor_dataset.shape[1], 6)\n",
" activation_layer = ActivationLayer(LogisticNeuron)\n",
" \n",
" first_layer.connect_to(activation_layer)\n",
" \n",
" second_input = cython_lstm.network.DataLayer()\n",
" \n",
" second_layer = LinearLayer(3, 6)\n",
" second_input.connect_to(second_layer)\n",
" \n",
" third_layer = activation_layer + second_layer\n",
" \n",
" net.add_layer(first_layer, input=True)\n",
" net.add_layer(activation_layer)\n",
" net.add_layer(second_layer)\n",
" net.add_layer(second_input)\n",
" net.add_layer(third_layer, output=True)\n",
" return net\n",
" \n",
" "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 9
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"a = topology_test()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Initialization OK\n"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"[b.layer for b in a.topsort()]"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 25,
"text": [
"[,\n",
" ,\n",
" ,\n",
" ,\n",
" ,\n",
" ]"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}