{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Iris Dataset with Mesh Neural Networks\n", "### 1. Import Libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from sklearn import datasets\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2. Set MNN hyperparameters" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "inputs=4 + 1 # bias input\n", "hidden=10\n", "outputs=3\n", "batch_size=10\n", "ticks = 3\n", "epochs = 1000\n", "lr = 0.001\n", "\n", "test_perc = 0.3" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3. Load Iris dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "raw_dataset = datasets.load_iris()\n", "\n", "perm = np.random.permutation(raw_dataset.data.shape[0])\n", "\n", "pivot = int(len(raw_dataset.data)*(1-test_perc))\n", "\n", "train_X = raw_dataset.data[perm][:pivot]\n", "train_Y = raw_dataset.target[perm][:pivot]\n", "\n", "test_X = raw_dataset.data[perm][pivot:]\n", "test_Y = raw_dataset.target[perm][pivot:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4. Define utility functions\n", "\n", "##### 4.1 ReLU Activation function and its derivative" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def f(x, derivative=False):\n", " gt = (x > 0)\n", " if derivative:\n", " return 1 * gt\n", " else:\n", " return x * gt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.2 CrossEntropy loss function and its derivative" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def softmax(x):\n", " exps = np.exp(x - np.max(x))\n", " return exps / np.expand_dims(np.sum(exps, axis=1), axis=1)\n", "\n", "\n", "def ce_loss(out, y, grad):\n", " y = y.astype(int)\n", " m = y.shape[0]\n", " p = softmax(out)\n", " log_likelihood = -np.log(p[range(m),y])\n", " loss = np.mean(log_likelihood, axis=0)\n", "\n", " de = p\n", " de[range(m),y] -= 1\n", " de = de/m\n", " de = np.expand_dims(np.expand_dims(de, axis=2), axis=2)\n", " grad = de*grad\n", "\n", " return (loss, grad.sum(axis=1).sum(axis=0))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 4.3 Adam Optimizer" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "class Adam:\n", " def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0., **kwargs):\n", " \n", " allowed_kwargs = {'clipnorm', 'clipvalue'}\n", " for k in kwargs:\n", " if k not in allowed_kwargs:\n", " raise TypeError('Unexpected keyword argument '\n", " 'passed to optimizer: ' + str(k))\n", "\n", " self.__dict__.update(kwargs)\n", " self.iterations = 0\n", " self.lr = lr\n", " self.beta_1 = beta_1\n", " self.beta_2 = beta_2\n", " self.decay = decay\n", " self.epsilon = epsilon\n", " self.initial_decay = decay\n", "\n", " def step(self, params, grads):\n", " original_shapes = [x.shape for x in params]\n", " params = [x.flatten() for x in params]\n", " grads = [x.flatten() for x in grads]\n", " \n", "\n", " lr = self.lr\n", " if self.initial_decay > 0:\n", " lr *= (1. / (1. + self.decay * self.iterations))\n", "\n", " t = self.iterations + 1\n", " lr_t = lr * (np.sqrt(1. - np.power(self.beta_2, t)) /\n", " (1. - np.power(self.beta_1, t)))\n", "\n", " if not hasattr(self, 'ms'):\n", " self.ms = [np.zeros(p.shape) for p in params]\n", " self.vs = [np.zeros(p.shape) for p in params]\n", " \n", " ret = [None] * len(params)\n", " for i, p, g, m, v in zip(range(len(params)), params, grads, self.ms, self.vs):\n", " m_t = (self.beta_1 * m) + (1. - self.beta_1) * g\n", " v_t = (self.beta_2 * v) + (1. - self.beta_2) * np.square(g)\n", " p_t = p - lr_t * m_t / (np.sqrt(v_t) + self.epsilon)\n", " self.ms[i] = m_t\n", " self.vs[i] = v_t\n", " ret[i] = p_t\n", " \n", " self.iterations += 1\n", " \n", " for i in range(len(ret)):\n", " ret[i] = ret[i].reshape(original_shapes[i])\n", " \n", " return ret\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5. Define Mesh Neural Networks Functions\n", "\n", "#### 5.1 FOP function as described in Section 2.2 of the paper" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def derivate(grad, t):\n", " sn = np.zeros(shape=grad.shape)\n", " sn[:,np.eye(neurons).astype(bool)] = state[:,np.newaxis]\n", " sn = np.transpose(sn, (0,3,2,1))\n", " return f(t, derivative=True)[:, np.newaxis, np.newaxis] * (np.matmul(grad, A) + sn)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.2 State update function as described in Section 2.1 of the paper" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def net(state, grad, x):\n", " # set inputs neurons state with input vector\n", " state[:,0:inputs] = np.concatenate((x, np.ones((batch_size,1))), axis=1)\n", " \n", " # compute ti\n", " t = np.matmul(state, A)\n", " \n", " # Forward propagate the gradients\n", " grad = derivate(grad, t)\n", " \n", " # Compute new state\n", " state = f(t)\n", " \n", " return state, grad" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### 5.3 Learning algorithm as described in Section 2.2.1 of the paper" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "epoch: 0 loss: 43.460476\n", "epoch: 50 loss: 0.570155\n", "epoch: 100 loss: 0.402548\n", "epoch: 150 loss: 0.318927\n", "epoch: 200 loss: 0.253403\n", "epoch: 250 loss: 0.202143\n", "epoch: 300 loss: 0.165184\n", "epoch: 350 loss: 0.139691\n", "epoch: 400 loss: 0.122168\n", "epoch: 450 loss: 0.109891\n", "epoch: 500 loss: 0.101037\n", "epoch: 550 loss: 0.094440\n", "epoch: 600 loss: 0.089369\n", "epoch: 650 loss: 0.085362\n", "epoch: 700 loss: 0.082120\n", "epoch: 750 loss: 0.079444\n", "epoch: 800 loss: 0.077197\n", "epoch: 850 loss: 0.075281\n", "epoch: 900 loss: 0.073625\n", "epoch: 950 loss: 0.072177\n" ] } ], "source": [ "neurons = inputs + hidden + outputs\n", "A = np.random.rand(neurons, neurons) # Adjacency Matrix\n", "\n", "optimizer = Adam(lr=lr)\n", "for epoch in range(0, epochs):\n", " losses = 0\n", " for i in range(0, len(train_X)//batch_size):\n", " # Training batches\n", " batch_X = train_X[i*batch_size:(i+1)*batch_size]\n", " batch_Y = train_Y[i*batch_size:(i+1)*batch_size]\n", " \n", " state = np.zeros(shape=(batch_size, neurons)) # Init state\n", " grad = np.zeros(shape=(batch_size, neurons, neurons, neurons)) # Init gradients\n", "\n", " # Update MNN in time\n", " for t in range(0, ticks):\n", " state, grad = net(state, grad, batch_X)\n", " \n", " # Permute gradients for error function\n", " grad = np.transpose(grad, (0,3,1,2))\n", " \n", " # Slice output values and gradients\n", " outs = state[:,inputs+hidden:neurons]\n", " outs_grad = grad[:,inputs+hidden:neurons]\n", " \n", " # Compute loss and error gradients\n", " loss, err_grad = ce_loss(outs, batch_Y, outs_grad)\n", " losses += loss\n", "\n", " # Update weights\n", " A = optimizer.step(A, err_grad)\n", " \n", " if epoch % 50 == 0:\n", " print(\"epoch: %d loss: %f\" % (epoch, losses/(len(train_X)//(batch_size))))\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6. Test the model" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.975000\n" ] } ], "source": [ "rights = 0\n", "tots = 0\n", "for i in range(0, len(test_X)//batch_size):\n", " # Testing batches\n", " batch_X = test_X[i*batch_size:(i+1)*batch_size]\n", " batch_Y = test_Y[i*batch_size:(i+1)*batch_size]\n", " \n", " # Init MNN\n", " state = np.zeros(shape=(batch_size, neurons))\n", " grad = np.zeros(shape=(batch_size, neurons, neurons, neurons))\n", " \n", " for t in range(0, ticks):\n", " state, grad = net(state, grad, batch_X)\n", " outs = state[:,inputs+hidden:neurons]\n", " \n", " rights += (np.argmax(outs, axis=1) == batch_Y).astype(int).sum() \n", " tots += batch_size\n", " \n", "print(\"Accuracy: %f\" % (rights/tots) )" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }