{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "colab": { "name": "MLP_from_scratch_in_TF.ipynb", "provenance": [], "collapsed_sections": [] }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "sMslI4wgPKYC" }, "source": [ "# Multi Layer Perceptron from Scratch in TensorFlow" ] }, { "cell_type": "code", "metadata": { "id": "vlFgPQQcPKYD" }, "source": [ "import tensorflow as tf\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" ], "execution_count": 1, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "_fjfb6DqPKYE" }, "source": [ "## Creating MLP class" ] }, { "cell_type": "code", "metadata": { "id": "cG0syZGkPKYE" }, "source": [ "class MLP:\n", " def __init__(self, layers):\n", " self.layers = layers\n", " self.L = len(layers)\n", " self.num_features = layers[0]\n", " self.num_classes = layers[-1]\n", " \n", " self.W = {}\n", " self.b = {}\n", " \n", " self.dW = {}\n", " self.db = {}\n", " \n", " self.setup()\n", " \n", " def setup(self):\n", " for i in range(1, self.L):\n", " self.W[i] = tf.Variable(tf.random.normal(shape=(self.layers[i], self.layers[i-1])))\n", " self.b[i] = tf.Variable(tf.random.normal(shape=(self.layers[i], 1)))\n", " \n", " \n", " def forward_pass(self, A):\n", " A = tf.convert_to_tensor(A, dtype=tf.float32)\n", " for i in range(1, self.L):\n", " Z = tf.matmul(A, tf.transpose(self.W[i])) + tf.transpose(self.b[i])\n", " if i != self.L-1:\n", " A = tf.nn.relu(Z)\n", " else:\n", " A = Z\n", " return A\n", " \n", " \n", " def compute_loss(self, A, Y):\n", " return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(Y, A))\n", " \n", " \n", " def update_params(self, lr):\n", " for i in range(1, self.L):\n", " self.W[i].assign_sub(lr * self.dW[i])\n", " self.b[i].assign_sub(lr * self.db[i])\n", " \n", " def predict(self, X):\n", " A = self.forward_pass(X)\n", " return tf.argmax(tf.nn.softmax(A), axis=1)\n", " \n", " def summary(self):\n", " num_params = 0\n", " for i in range(1, self.L):\n", " num_params += self.W[i].shape[0] * self.W[i].shape[1]\n", " num_params += self.b[i].shape[0]\n", " print('Input Features:\\t', self.num_features)\n", " print('Number of Classes:\\t', self.num_classes)\n", " print('Hidden Layers:')\n", " print('===================')\n", " for i in range(1, self.L-1):\n", " print('Layer {}, Units {}'.format(i, self.layers[i]))\n", " print('===================')\n", " print('Number of parameters:', num_params)\n", " \n", " \n", " def train_on_batch(self, X, Y, lr):\n", " X = tf.convert_to_tensor(X, dtype=tf.float32)\n", " Y = tf.convert_to_tensor(Y, dtype=tf.float32)\n", " \n", " with tf.GradientTape(persistent=True) as tape:\n", " A = self.forward_pass(X)\n", " loss = self.compute_loss(A, Y)\n", " for key in self.W.keys():\n", " self.dW[key] = tape.gradient(loss, self.W[key])\n", " self.db[key] = tape.gradient(loss, self.b[key])\n", " del tape\n", " self.update_params(lr)\n", " \n", " return loss.numpy()\n", " \n", " \n", " def train(self, x_train, y_train, x_test, y_test, epochs, batch_size, lr): # , steps_per_epoch, \n", " history = {\n", " 'val_loss': [],\n", " 'train_loss': [],\n", " 'val_acc': [],\n", " 'train_acc': []\n", " }\n", " steps_per_epoch = int(x_train.shape[0]/batch_size)\n", "\n", " for e in range(0, epochs):\n", " epoch_train_loss = 0.\n", " print('Epoch {0}/{1}\\t['.format(e+1, epochs), end='')\n", " for i in range(0, steps_per_epoch):\n", " x_batch = x_train[i*batch_size:(i+1)*batch_size]\n", " y_batch = y_train[i*batch_size:(i+1)*batch_size]\n", " batch_loss = self.train_on_batch(x_batch, y_batch, lr)\n", " epoch_train_loss += batch_loss\n", " \n", " if i%int(steps_per_epoch/5) == 0:\n", " print(end='=')\n", " print(']', end='') \n", " history['train_loss'].append(epoch_train_loss/steps_per_epoch)\n", " \n", " val_A = self.forward_pass(x_test)\n", " history['val_loss'].append(self.compute_loss(val_A, y_test).numpy())\n", " \n", " train_preds = self.predict(x_batch)\n", " history['train_acc'].append(np.mean(np.argmax(y_batch, axis=1) == train_preds.numpy()))\n", " val_preds = self.predict(x_test)\n", " history['val_acc'].append(np.mean(np.argmax(y_test, axis=1) == val_preds.numpy()))\n", " print('\\tTrain Acc:', history['train_acc'][-1], end='\\t')\n", " print('Val Acc:', history['val_acc'][-1])\n", " return history\n", "\n", " def evaluate(self, X, Y):\n", " preds = self.predict(X)\n", " return np.mean(np.argmax(Y, axis=1) == preds.numpy())\n", " # Since we are predicting label as int and in dataset it's OneHotEncoded" ], "execution_count": 2, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "NP9kVVumPKYE" }, "source": [ "## Testing out the MLP class" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ab2_fuGAPKYE", "outputId": "90b5b386-94c3-4153-b18d-d9db1a141c1f" }, "source": [ "# Importing Dataset\n", "\n", "(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n", "x_train = np.reshape(x_train, (x_train.shape[0], 784))/255. # scaling, so tha value lies between (0,1)\n", "x_test = np.reshape(x_test, (x_test.shape[0], 784))/255.\n", "y_train = tf.keras.utils.to_categorical(y_train)\n", "y_test = tf.keras.utils.to_categorical(y_test)\n", "\n", "x_val = x_train[55000:]\n", "x_train = x_train[:55000]\n", "y_val = y_train[55000:]\n", "y_train = y_train[:55000]\n", "\n", "print((x_train.shape), (x_val.shape), (x_test.shape))" ], "execution_count": 3, "outputs": [ { "output_type": "stream", "text": [ "(55000, 784) (5000, 784) (10000, 784)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 295 }, "id": "m5zrvURAPKYE", "outputId": "ff4748d9-114a-4482-cb77-973adbdb2743" }, "source": [ "plt.figure(figsize=(10, 5))\n", "\n", "for i, index in enumerate(range(10)):\n", " plt.subplot(2, 5, i+1)\n", " plt.imshow(x_train[index].reshape((28, 28)), cmap='binary')" ], "execution_count": 4, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "<Figure size 720x360 with 10 Axes>" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Vgc-Jg0DPKYG", "outputId": "81b2d2af-7e3b-49e7-fd5b-4cf4ad7313f1" }, "source": [ "mlp = MLP([784, 128, 128, 10])\n", "mlp.summary()" ], "execution_count": 5, "outputs": [ { "output_type": "stream", "text": [ "Input Features:\t 784\n", "Number of Classes:\t 10\n", "Hidden Layers:\n", "===================\n", "Layer 1, Units 128\n", "Layer 2, Units 128\n", "===================\n", "Number of parameters: 118282\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TFB4cJx9PKYG", "outputId": "fe9c4e7a-5e9b-4407-9e9a-6b6f6386eebb" }, "source": [ "batch_size = 64\n", "epochs = 20\n", "lr = 3e-3\n", "\n", "history = mlp.train(x_train, y_train, x_val, y_val, epochs, batch_size, lr)" ], "execution_count": 6, "outputs": [ { "output_type": "stream", "text": [ "Epoch 1/20\t[======]\tTrain Acc: 0.765625\tVal Acc: 0.8468\n", "Epoch 2/20\t[======]\tTrain Acc: 0.796875\tVal Acc: 0.8638\n", "Epoch 3/20\t[======]\tTrain Acc: 0.8125\tVal Acc: 0.876\n", "Epoch 4/20\t[======]\tTrain Acc: 0.84375\tVal Acc: 0.8756\n", "Epoch 5/20\t[======]\tTrain Acc: 0.859375\tVal Acc: 0.8812\n", "Epoch 6/20\t[======]\tTrain Acc: 0.859375\tVal Acc: 0.8814\n", "Epoch 7/20\t[======]\tTrain Acc: 0.875\tVal Acc: 0.89\n", "Epoch 8/20\t[======]\tTrain Acc: 0.875\tVal Acc: 0.8884\n", "Epoch 9/20\t[======]\tTrain Acc: 0.875\tVal Acc: 0.8884\n", "Epoch 10/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8904\n", "Epoch 11/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8856\n", "Epoch 12/20\t[======]\tTrain Acc: 0.90625\tVal Acc: 0.8818\n", "Epoch 13/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8832\n", "Epoch 14/20\t[======]\tTrain Acc: 0.90625\tVal Acc: 0.88\n", "Epoch 15/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8834\n", "Epoch 16/20\t[======]\tTrain Acc: 0.90625\tVal Acc: 0.9002\n", "Epoch 17/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8958\n", "Epoch 18/20\t[======]\tTrain Acc: 0.890625\tVal Acc: 0.8934\n", "Epoch 19/20\t[======]\tTrain Acc: 0.90625\tVal Acc: 0.8952\n", "Epoch 20/20\t[======]\tTrain Acc: 0.921875\tVal Acc: 0.8898\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 283 }, "id": "eZuK-dUuPKYH", "outputId": "cb0eb998-47b5-4f84-baef-15b4fec97daa" }, "source": [ "#Plotting Training-Validation accuracies and loss.\n", "\n", "plt.figure(figsize=(12, 4))\n", "epochs = len(history['val_loss'])\n", "plt.subplot(1, 2, 1)\n", "plt.plot(range(epochs), history['val_loss'], label='Val Loss')\n", "plt.plot(range(epochs), history['train_loss'], label='Train Loss')\n", "plt.xticks(list(range(epochs)))\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Loss')\n", "plt.legend()\n", "plt.subplot(1, 2, 2)\n", "plt.plot(range(epochs), history['val_acc'], label='Val Acc')\n", "plt.plot(range(epochs), history['train_acc'], label='Train Acc')\n", "plt.xticks(list(range(epochs)))\n", "plt.xlabel('Epochs')\n", "plt.ylabel('Accuracy')\n", "plt.legend()\n", "plt.show()" ], "execution_count": 7, "outputs": [ { "output_type": "display_data", "data": { "image/png": "\n", "text/plain": [ "<Figure size 864x288 with 2 Axes>" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "mucpLU4oPKYH" }, "source": [ "test_accuracy = mlp.evaluate(x_test, y_test)" ], "execution_count": 8, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hZWj2_deZKb7", "outputId": "df2dff06-d5ba-43c2-919a-3c250c2316c1" }, "source": [ "print('Test Accuracy : ', test_accuracy)" ], "execution_count": 9, "outputs": [ { "output_type": "stream", "text": [ "Test Accuracy : 0.8723\n" ], "name": "stdout" } ] } ] }