# Iris Dataset with Mesh Neural Networks
### 1. Import Libraries

In [1]:
from sklearn import datasets
import numpy as np

### 2. Set MNN hyperparameters

In [2]:
inputs=4 + 1 # bias input
hidden=10
outputs=3
batch_size=10
ticks = 3
epochs = 1000
lr = 0.001

test_perc = 0.3

### 3. Load Iris dataset

In [3]:
raw_dataset = datasets.load_iris()

perm = np.random.permutation(raw_dataset.data.shape[0])

pivot = int(len(raw_dataset.data)*(1-test_perc))

train_X = raw_dataset.data[perm][:pivot]
train_Y = raw_dataset.target[perm][:pivot]

test_X = raw_dataset.data[perm][pivot:]
test_Y = raw_dataset.target[perm][pivot:]

### 4. Define utility functions

##### 4.1 ReLU Activation function and its derivative

In [4]:
def f(x, derivative=False):
 gt = (x > 0)
 if derivative:
 return 1 * gt
 else:
 return x * gt

#### 4.2 CrossEntropy loss function and its derivative

In [5]:
def softmax(x):
 exps = np.exp(x - np.max(x))
 return exps / np.expand_dims(np.sum(exps, axis=1), axis=1)


def ce_loss(out, y, grad):
 y = y.astype(int)
 m = y.shape[0]
 p = softmax(out)
 log_likelihood = -np.log(p[range(m),y])
 loss = np.mean(log_likelihood, axis=0)

 de = p
 de[range(m),y] -= 1
 de = de/m
 de = np.expand_dims(np.expand_dims(de, axis=2), axis=2)
 grad = de*grad

 return (loss, grad.sum(axis=1).sum(axis=0))

#### 4.3 Adam Optimizer

In [6]:
class Adam:
 def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8, decay=0., **kwargs):
 
 allowed_kwargs = {'clipnorm', 'clipvalue'}
 for k in kwargs:
 if k not in allowed_kwargs:
 raise TypeError('Unexpected keyword argument '
 'passed to optimizer: ' + str(k))

 self.__dict__.update(kwargs)
 self.iterations = 0
 self.lr = lr
 self.beta_1 = beta_1
 self.beta_2 = beta_2
 self.decay = decay
 self.epsilon = epsilon
 self.initial_decay = decay

 def step(self, params, grads):
 original_shapes = [x.shape for x in params]
 params = [x.flatten() for x in params]
 grads = [x.flatten() for x in grads]
 

 lr = self.lr
 if self.initial_decay > 0:
 lr *= (1. / (1. + self.decay * self.iterations))

 t = self.iterations + 1
 lr_t = lr * (np.sqrt(1. - np.power(self.beta_2, t)) /
 (1. - np.power(self.beta_1, t)))

 if not hasattr(self, 'ms'):
 self.ms = [np.zeros(p.shape) for p in params]
 self.vs = [np.zeros(p.shape) for p in params]
 
 ret = [None] * len(params)
 for i, p, g, m, v in zip(range(len(params)), params, grads, self.ms, self.vs):
 m_t = (self.beta_1 * m) + (1. - self.beta_1) * g
 v_t = (self.beta_2 * v) + (1. - self.beta_2) * np.square(g)
 p_t = p - lr_t * m_t / (np.sqrt(v_t) + self.epsilon)
 self.ms[i] = m_t
 self.vs[i] = v_t
 ret[i] = p_t
 
 self.iterations += 1
 
 for i in range(len(ret)):
 ret[i] = ret[i].reshape(original_shapes[i])
 
 return ret


### 5. Define Mesh Neural Networks Functions

#### 5.1 FOP function as described in Section 2.2 of the paper

In [7]:
def derivate(grad, t):
 sn = np.zeros(shape=grad.shape)
 sn[:,np.eye(neurons).astype(bool)] = state[:,np.newaxis]
 sn = np.transpose(sn, (0,3,2,1))
 return f(t, derivative=True)[:, np.newaxis, np.newaxis] * (np.matmul(grad, A) + sn)

#### 5.2 State update function as described in Section 2.1 of the paper

In [8]:
def net(state, grad, x):
 # set inputs neurons state with input vector
 state[:,0:inputs] = np.concatenate((x, np.ones((batch_size,1))), axis=1)
 
 # compute ti
 t = np.matmul(state, A)
 
 # Forward propagate the gradients
 grad = derivate(grad, t)
 
 # Compute new state
 state = f(t)
 
 return state, grad

#### 5.3 Learning algorithm as described in Section 2.2.1 of the paper

In [9]:
neurons = inputs + hidden + outputs
A = np.random.rand(neurons, neurons) # Adjacency Matrix

optimizer = Adam(lr=lr)
for epoch in range(0, epochs):
 losses = 0
 for i in range(0, len(train_X)//batch_size):
 # Training batches
 batch_X = train_X[i*batch_size:(i+1)*batch_size]
 batch_Y = train_Y[i*batch_size:(i+1)*batch_size]
 
 state = np.zeros(shape=(batch_size, neurons)) # Init state
 grad = np.zeros(shape=(batch_size, neurons, neurons, neurons)) # Init gradients

 # Update MNN in time
 for t in range(0, ticks):
 state, grad = net(state, grad, batch_X)
 
 # Permute gradients for error function
 grad = np.transpose(grad, (0,3,1,2))
 
 # Slice output values and gradients
 outs = state[:,inputs+hidden:neurons]
 outs_grad = grad[:,inputs+hidden:neurons]
 
 # Compute loss and error gradients
 loss, err_grad = ce_loss(outs, batch_Y, outs_grad)
 losses += loss

 # Update weights
 A = optimizer.step(A, err_grad)
 
 if epoch % 50 == 0:
 print("epoch: %d loss: %f" % (epoch, losses/(len(train_X)//(batch_size))))
 

epoch: 0 loss: 43.460476
epoch: 50 loss: 0.570155
epoch: 100 loss: 0.402548
epoch: 150 loss: 0.318927
epoch: 200 loss: 0.253403
epoch: 250 loss: 0.202143
epoch: 300 loss: 0.165184
epoch: 350 loss: 0.139691
epoch: 400 loss: 0.122168
epoch: 450 loss: 0.109891
epoch: 500 loss: 0.101037
epoch: 550 loss: 0.094440
epoch: 600 loss: 0.089369
epoch: 650 loss: 0.085362
epoch: 700 loss: 0.082120
epoch: 750 loss: 0.079444
epoch: 800 loss: 0.077197
epoch: 850 loss: 0.075281
epoch: 900 loss: 0.073625
epoch: 950 loss: 0.072177


### 6. Test the model

In [10]:
rights = 0
tots = 0
for i in range(0, len(test_X)//batch_size):
 # Testing batches
 batch_X = test_X[i*batch_size:(i+1)*batch_size]
 batch_Y = test_Y[i*batch_size:(i+1)*batch_size]
 
 # Init MNN
 state = np.zeros(shape=(batch_size, neurons))
 grad = np.zeros(shape=(batch_size, neurons, neurons, neurons))
 
 for t in range(0, ticks):
 state, grad = net(state, grad, batch_X)
 outs = state[:,inputs+hidden:neurons]
 
 rights += (np.argmax(outs, axis=1) == batch_Y).astype(int).sum() 
 tots += batch_size
 
print("Accuracy: %f" % (rights/tots) )

Accuracy: 0.975000
