## Understanding Neural Networks (with Graphs)
---
 - Pablo Leo Muñoz - pleo@etsfactory.com

In [None]:
# data handling
import numpy as np
import pandas as pd

# plots
import seaborn as sns; sns.set()
import matplotlib.pyplot as plt

# create animations
from celluloid import Camera
from IPython.display import HTML

# deep learning
import tensorflow
from tensorflow.keras.layers import Dense, Input, Activation
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

In [None]:
# make it reproducible
seed = 12345
np.random.seed(seed)
tensorflow.random.set_seed(seed)

In [None]:
# make plots bigger
plt.rcParams['figure.figsize'] = [12, 8]

# animations
plt.rcParams['animation.html'] = 'html5'

# plot stlye
sns.set_style(style='white', rc={"xtick.major.size": 0, "ytick.major.size": 0})

## Dataset generation
---
For this specific problem a synthetic set has been created. This set contains a total of 400 samples with 2 clases (200 samples of each class). Each sample has 2 variables `X0` and `X1`, so it can be plotted in a 2D figure. 

In [None]:
# set the size of the dataset
size = 200

# generating x
x_train_1 = np.random.normal(-1, 0.5, (size//2, 2))
x_train_2 = np.random.normal(0, 0.5, (size, 2))
x_train_3 = np.random.normal(1, 0.5, (size//2, 2))

# generating y
y_train_1 = np.repeat(0, size//2)
y_train_2 = np.repeat(1, size)
y_train_3 = np.repeat(0, size//2)

# creating dataframe
df_train = pd.DataFrame(np.concatenate((x_train_1, x_train_2, x_train_3)), columns=['X0', 'X1'])
df_train['y'] = np.concatenate((y_train_1, y_train_2, y_train_3))

In [None]:
# show dataset
ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', palette=sns.diverging_palette(220, 20, n=2))
ax.figure.patch.set_alpha(0)

In [None]:
# convert to numpy arrays
x_train, y_train = df_train[['X0', 'X1']].values, df_train[['y']].values

## Network structure
---
To show the training evolution, a very simple network has been created. 

It contains 1 hidden layer with 2 neurons and an output layer with 1 neuron. The output layer has a `sigmoid` activation function 'shrinking' the output to a value between [0, 1], which will represent the probability of belonging to **class 0** or **class 1**.

In [None]:
def gen_model(activation='tanh', learning_rate=1e-1):
 '''
 Generates the model with the corresponding structure
 '''

 # define model structure
 x_in = Input(x_train.shape[1], name='input')

 # FC 2 units
 x = Dense(units=2, name='hidden_1')(x_in)

 # Tanh activation
 x = Activation(activation, name='activation_1')(x)

 # FC 1 unit
 x = Dense(units=1, name='output')(x)

 # Sigmoid activation
 x_out = Activation('sigmoid', name='activation_output')(x)

 # generate the model
 model = Model(x_in, x_out)

 # show information about the model
 model.summary()

 # parameters
 optimizer = Adam(lr=learning_rate)
 metrics = ["accuracy"]
 loss = "binary_crossentropy"

 # compile the model
 model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
 
 # return the model
 return model

In [None]:
# set the value of the activation and learning rate
activation = 'tanh'
learning_rate = 1e-1

# generate the model
model = gen_model(activation, learning_rate)

In [None]:
# figure holding the evolution
f, axes = plt.subplots(1, 3, figsize=(18, 6), gridspec_kw={'height_ratios':[.9]})
f.subplots_adjust(top=0.82)

# camera to record the evolution
camera = Camera(f)

# number of epochs
epochs = 20

# iterate epoch times
for i in range(epochs):

 # evaluate the model (acc, loss)
 evaluation = model.evaluate(x_train, y_train, verbose=0)

 # generate intermediate models
 model_hid_1 = Model(model.input, model.get_layer("hidden_1").output)
 model_act_1 = Model(model.input, model.get_layer("activation_1").output)

 # generate data
 df_hid_1 = pd.DataFrame(model_hid_1.predict(x_train), columns=['X0', 'X1'])
 df_hid_1['y'] = y_train

 df_act_1 = pd.DataFrame(model_act_1.predict(x_train), columns=['X0', 'X1'])
 df_act_1['y'] = y_train

 # generate meshgrid (200 values)
 x = np.linspace(x_train[:,0].min(), x_train[:,0].max(), 200)
 y = np.linspace(x_train[:,1].min(), x_train[:,1].max(), 200)
 xv, yv = np.meshgrid(x, y)

 # generate meshgrid intenisty
 df_mg_train = pd.DataFrame(np.stack((xv.flatten(), yv.flatten()), axis=1), columns=['X0', 'X1'])
 df_mg_train['y'] = model.predict(df_mg_train.values)

 df_mg_hid_1 = pd.DataFrame(model_hid_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])
 df_mg_hid_1['y'] = model.predict(df_mg_train.values[:,:-1])

 df_mg_act_1 = pd.DataFrame(model_act_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])
 df_mg_act_1['y'] = model.predict(df_mg_train.values[:,:-1])


 # show dataset 
 ax = sns.scatterplot(x='X0', y='X1', data=df_mg_train, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
 ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, n=2))
 ax.set_title('Input layer')

 ax = sns.scatterplot(x='X0', y='X1', data=df_mg_hid_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
 ax = sns.scatterplot(x='X0', y='X1', data=df_hid_1, hue='y', legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, n=2))
 ax.set_title('Hidden layer')

 # show the current epoch and the metrics
 ax.text(x=0.5, y=1.15, s='Epoch {}'.format(i+1), fontsize=16, weight='bold', ha='center', va='bottom', transform=ax.transAxes)
 ax.text(x=0.5, y=1.08, s='Accuracy {:.3f} - Loss {:.3f}'.format(evaluation[1], evaluation[0]), fontsize=13, ha='center', va='bottom', transform=ax.transAxes)

 ax = sns.scatterplot(x='X0', y='X1', data=df_mg_act_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)
 ax = sns.scatterplot(x='X0', y='X1', data=df_act_1, hue='y', legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, n=2))
 ax.set_title('Activation')
 
 # show the plot
 plt.show()

 # call to generate the GIF
 camera.snap()

 # stop execution if loss <= 0.263 (avoid looping 200 times if not needed)
 if evaluation[0] <= 0.263:
 break

 # train the model 1 epoch
 model.fit(x_train, y_train, epochs=1, verbose=0)

In [None]:
# create the animation
anim = camera.animate()

In [None]:
# show the animation
HTML(anim.to_html5_video())