{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Understanding Neural Networks (with Graphs)\n", "---\n", " - Pablo Leo Muñoz - pleo@etsfactory.com" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-23T12:39:31.202881Z", "start_time": "2020-03-23T12:39:25.997360Z" } }, "outputs": [], "source": [ "# data handling\n", "import numpy as np\n", "import pandas as pd\n", "\n", "# plots\n", "import seaborn as sns; sns.set()\n", "import matplotlib.pyplot as plt\n", "\n", "# create animations\n", "from celluloid import Camera\n", "from IPython.display import HTML\n", "\n", "# deep learning\n", "import tensorflow\n", "from tensorflow.keras.layers import Dense, Input, Activation\n", "from tensorflow.keras.models import Model\n", "from tensorflow.keras.optimizers import Adam" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "ExecuteTime": { "end_time": "2020-03-23T12:39:31.208882Z", "start_time": "2020-03-23T12:39:28.792Z" } }, "outputs": [], "source": [ "# make it reproducible\n", "seed = 12345\n", "np.random.seed(seed)\n", "tensorflow.random.set_seed(seed)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# make plots bigger\n", "plt.rcParams['figure.figsize'] = [12, 8]\n", "\n", "# animations\n", "plt.rcParams['animation.html'] = 'html5'\n", "\n", "# plot stlye\n", "sns.set_style(style='white', rc={\"xtick.major.size\": 0, \"ytick.major.size\": 0})" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Dataset generation\n", "---\n", "For this specific problem a synthetic set has been created. This set contains a total of 400 samples with 2 clases (200 samples of each class). Each sample has 2 variables `X0` and `X1`, so it can be plotted in a 2D figure. " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# set the size of the dataset\n", "size = 200\n", "\n", "# generating x\n", "x_train_1 = np.random.normal(-1, 0.5, (size//2, 2))\n", "x_train_2 = np.random.normal(0, 0.5, (size, 2))\n", "x_train_3 = np.random.normal(1, 0.5, (size//2, 2))\n", "\n", "# generating y\n", "y_train_1 = np.repeat(0, size//2)\n", "y_train_2 = np.repeat(1, size)\n", "y_train_3 = np.repeat(0, size//2)\n", "\n", "# creating dataframe\n", "df_train = pd.DataFrame(np.concatenate((x_train_1, x_train_2, x_train_3)), columns=['X0', 'X1'])\n", "df_train['y'] = np.concatenate((y_train_1, y_train_2, y_train_3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# show dataset\n", "ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', palette=sns.diverging_palette(220, 20, n=2))\n", "ax.figure.patch.set_alpha(0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# convert to numpy arrays\n", "x_train, y_train = df_train[['X0', 'X1']].values, df_train[['y']].values" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Network structure\n", "---\n", "To show the training evolution, a very simple network has been created. \n", "\n", "It contains 1 hidden layer with 2 neurons and an output layer with 1 neuron. The output layer has a `sigmoid` activation function 'shrinking' the output to a value between [0, 1], which will represent the probability of belonging to **class 0** or **class 1**." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def gen_model(activation='tanh', learning_rate=1e-1):\n", " '''\n", " Generates the model with the corresponding structure\n", " '''\n", "\n", " # define model structure\n", " x_in = Input(x_train.shape[1], name='input')\n", "\n", " # FC 2 units\n", " x = Dense(units=2, name='hidden_1')(x_in)\n", "\n", " # Tanh activation\n", " x = Activation(activation, name='activation_1')(x)\n", "\n", " # FC 1 unit\n", " x = Dense(units=1, name='output')(x)\n", "\n", " # Sigmoid activation\n", " x_out = Activation('sigmoid', name='activation_output')(x)\n", "\n", " # generate the model\n", " model = Model(x_in, x_out)\n", "\n", " # show information about the model\n", " model.summary()\n", "\n", " # parameters\n", " optimizer = Adam(lr=learning_rate)\n", " metrics = [\"accuracy\"]\n", " loss = \"binary_crossentropy\"\n", "\n", " # compile the model\n", " model.compile(optimizer=optimizer, loss=loss, metrics=metrics)\n", " \n", " # return the model\n", " return model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# set the value of the activation and learning rate\n", "activation = 'tanh'\n", "learning_rate = 1e-1\n", "\n", "# generate the model\n", "model = gen_model(activation, learning_rate)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# figure holding the evolution\n", "f, axes = plt.subplots(1, 3, figsize=(18, 6), gridspec_kw={'height_ratios':[.9]})\n", "f.subplots_adjust(top=0.82)\n", "\n", "# camera to record the evolution\n", "camera = Camera(f)\n", "\n", "# number of epochs\n", "epochs = 20\n", "\n", "# iterate epoch times\n", "for i in range(epochs):\n", "\n", " # evaluate the model (acc, loss)\n", " evaluation = model.evaluate(x_train, y_train, verbose=0)\n", "\n", " # generate intermediate models\n", " model_hid_1 = Model(model.input, model.get_layer(\"hidden_1\").output)\n", " model_act_1 = Model(model.input, model.get_layer(\"activation_1\").output)\n", "\n", " # generate data\n", " df_hid_1 = pd.DataFrame(model_hid_1.predict(x_train), columns=['X0', 'X1'])\n", " df_hid_1['y'] = y_train\n", "\n", " df_act_1 = pd.DataFrame(model_act_1.predict(x_train), columns=['X0', 'X1'])\n", " df_act_1['y'] = y_train\n", "\n", " # generate meshgrid (200 values)\n", " x = np.linspace(x_train[:,0].min(), x_train[:,0].max(), 200)\n", " y = np.linspace(x_train[:,1].min(), x_train[:,1].max(), 200)\n", " xv, yv = np.meshgrid(x, y)\n", "\n", " # generate meshgrid intenisty\n", " df_mg_train = pd.DataFrame(np.stack((xv.flatten(), yv.flatten()), axis=1), columns=['X0', 'X1'])\n", " df_mg_train['y'] = model.predict(df_mg_train.values)\n", "\n", " df_mg_hid_1 = pd.DataFrame(model_hid_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])\n", " df_mg_hid_1['y'] = model.predict(df_mg_train.values[:,:-1])\n", "\n", " df_mg_act_1 = pd.DataFrame(model_act_1.predict(df_mg_train.values[:,:-1]), columns=['X0', 'X1'])\n", " df_mg_act_1['y'] = model.predict(df_mg_train.values[:,:-1])\n", "\n", "\n", " # show dataset \n", " ax = sns.scatterplot(x='X0', y='X1', data=df_mg_train, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)\n", " ax = sns.scatterplot(x='X0', y='X1', data=df_train, hue='y', legend=None, ax=axes[0], palette=sns.diverging_palette(220, 20, n=2))\n", " ax.set_title('Input layer')\n", "\n", " ax = sns.scatterplot(x='X0', y='X1', data=df_mg_hid_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)\n", " ax = sns.scatterplot(x='X0', y='X1', data=df_hid_1, hue='y', legend=None, ax=axes[1], palette=sns.diverging_palette(220, 20, n=2))\n", " ax.set_title('Hidden layer')\n", "\n", " # show the current epoch and the metrics\n", " ax.text(x=0.5, y=1.15, s='Epoch {}'.format(i+1), fontsize=16, weight='bold', ha='center', va='bottom', transform=ax.transAxes)\n", " ax.text(x=0.5, y=1.08, s='Accuracy {:.3f} - Loss {:.3f}'.format(evaluation[1], evaluation[0]), fontsize=13, ha='center', va='bottom', transform=ax.transAxes)\n", "\n", " ax = sns.scatterplot(x='X0', y='X1', data=df_mg_act_1, hue='y', x_jitter=True, y_jitter=True, legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, as_cmap=True), alpha=0.15)\n", " ax = sns.scatterplot(x='X0', y='X1', data=df_act_1, hue='y', legend=None, ax=axes[2], palette=sns.diverging_palette(220, 20, n=2))\n", " ax.set_title('Activation')\n", " \n", " # show the plot\n", " plt.show()\n", "\n", " # call to generate the GIF\n", " camera.snap()\n", "\n", " # stop execution if loss <= 0.263 (avoid looping 200 times if not needed)\n", " if evaluation[0] <= 0.263:\n", " break\n", "\n", " # train the model 1 epoch\n", " model.fit(x_train, y_train, epochs=1, verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# create the animation\n", "anim = camera.animate()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# show the animation\n", "HTML(anim.to_html5_video())" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }