{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Feed Forward Networks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "
Creating differentiable computation graphs for classification tasks.
\n", "" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create the data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import random\n", "import torch\n", "from torch import nn, optim\n", "import torch.nn.functional as F\n", "import math\n", "import os" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%run plot_conf.py" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt_style()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython import display" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "seed=12345\n", "random.seed(seed)\n", "torch.manual_seed(seed)\n", "N = 1000 # num_samples_per_class\n", "D = 2 # dimensions\n", "C = 3 # num_classes\n", "H = 100 # num_hidden_units" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "X = torch.zeros(N * C, D)\n", "y = torch.zeros(N * C)\n", "\n", "for i in range(C):\n", " index = 0\n", " r = torch.linspace(0, 1, N)\n", " t = torch.linspace(\n", " i * 2 * math.pi / C,\n", " (i + 2) * 2 * math.pi / C,\n", " N\n", " ) + torch.randn(N) * 0.1\n", " \n", " for ix in range(N * i, N * (i + 1)):\n", " X[ix] = r[index] * torch.FloatTensor((\n", " math.sin(t[index]), math.cos(t[index])\n", " ))\n", " y[ix] = i\n", " index += 1\n", "\n", "print(\"SHAPES:\")\n", "print(\"-------------------\")\n", "print(\"X:\", tuple(X.size()))\n", "print(\"y:\", tuple(y.size()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_data(X, y, d=.0, auto=False):\n", " \"\"\"\n", " Plot the data.\n", " \"\"\"\n", " plt.clf()\n", " plt.scatter(X[:, 0], X[:, 1], c=y, s=20, cmap=plt.cm.Spectral)\n", " plt.axis('square')\n", " plt.axis((-1.1, 1.1, -1.1, 1.1))\n", " if auto is True: plt.axis('equal')\n", "# plt.savefig('spiral{:.2f}.png'.format(d))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create the data\n", "plot_data(X.numpy(), y.numpy())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def plot_model(X, y, model, e=.0, auto=False):\n", " \"\"\"\n", " Plot the model from torch weights.\n", " \"\"\"\n", " \n", " X = X.numpy()\n", " y = y.numpy(),\n", " w1 = torch.transpose(model.fc1.weight.data, 0, 1).numpy()\n", " b1 = model.fc1.bias.data.numpy()\n", " w2 = torch.transpose(model.fc2.weight.data, 0, 1).numpy()\n", " b2 = model.fc2.bias.data.numpy()\n", " \n", " h = 0.01\n", "\n", " x_min, x_max = (-1.1, 1.1)\n", " y_min, y_max = (-1.1, 1.1)\n", " \n", " if auto is True:\n", " x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n", " y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n", " xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n", " np.arange(y_min, y_max, h))\n", " Z = np.dot(np.maximum(0, np.dot(np.c_[xx.ravel(), yy.ravel()], w1) + b1), w2) + b2\n", " Z = np.argmax(Z, axis=1)\n", " Z = Z.reshape(xx.shape)\n", " fig = plt.figure()\n", " plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)\n", " plt.scatter(X[:, 0], X[:, 1], c=y[0], s=40, cmap=plt.cm.Spectral)\n", " plt.axis((-1.1, 1.1, -1.1, 1.1))\n", " plt.axis('square')\n", " if auto is True:\n", " plt.axis((xx.min(), xx.max(), yy.min(), yy.max()))\n", " \n", "# plt.savefig('train{:03.2f}.png'.format(e))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Linear model" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learning_rate = 1e-3\n", "lambda_l2 = 1e-5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Linear model\n", "class linear_model(nn.Module):\n", " \"\"\"\n", " Linear model.\n", " \"\"\"\n", " def __init__(self, D_in, H, D_out):\n", " \"\"\"\n", " Initialize weights.\n", " \"\"\"\n", " super(linear_model, self).__init__()\n", " self.fc1 = nn.Linear(D_in, H)\n", " self.fc2 = nn.Linear(H, D_out)\n", "\n", " def forward(self, x):\n", " \"\"\"\n", " Forward pass.\n", " \"\"\"\n", " z = self.fc1(x)\n", " z = self.fc2(z)\n", " return z" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# nn package to create our linear model\n", "# each Linear module has a weight and bias\n", "model = linear_model(D, H, C)\n", "model.to(device) #Convert to CUDA\n", "\n", "# nn package also has different loss functions.\n", "# we use cross entropy loss for our classification task\n", "criterion = torch.nn.CrossEntropyLoss()\n", "\n", "# we use the optim package to apply\n", "# stochastic gradient descent for our parameter updates\n", "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n", "\n", "# We convert our inputs and targets to Variables\n", "# so we can use automatic differentiation but we \n", "# use require_grad=False b/c we don't want the gradients\n", "# to alter these values.\n", "input_X = torch.tensor(X, requires_grad=False, dtype=torch.float32)\n", "y_true = torch.tensor(y, requires_grad=False, dtype=torch.long)\n", "\n", "# Training\n", "for t in range(1000):\n", " \n", " # Feed forward to get the logits\n", " y_pred = model(input_X)\n", " \n", " # Compute the loss and accuracy\n", " loss = criterion(y_pred, y_true)\n", " score, predicted = torch.max(y_pred, 1)\n", " acc = (y_true == predicted).sum().float() / len(y_true)\n", " print(\"[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f\" % (t, loss.item(), acc))\n", " display.clear_output(wait=True)\n", " \n", " # zero the gradients before running\n", " # the backward pass.\n", " optimizer.zero_grad()\n", " \n", " # Backward pass to compute the gradient\n", " # of loss w.r.t our learnable params. \n", " loss.backward()\n", " \n", " # Update params\n", " optimizer.step()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot trained model\n", "print(model)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_model(X, y, model)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Two-layered network" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "learning_rate = 1e-3\n", "lambda_l2 = 1e-5" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# NN model\n", "class two_layer_network(nn.Module):\n", " \"\"\"\n", " NN model.\n", " \"\"\"\n", " def __init__(self, D_in, H, D_out):\n", " \"\"\"\n", " Initialize weights.\n", " \"\"\"\n", " super(two_layer_network, self).__init__()\n", " self.fc1 = nn.Linear(D_in, H)\n", " self.fc2 = nn.Linear(H, D_out)\n", "\n", " def forward(self, x):\n", " \"\"\"\n", " Forward pass.\n", " \"\"\"\n", " z = F.relu(self.fc1(x))\n", " z = self.fc2(z)\n", " return z" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "# nn package to create our linear model\n", "# each Linear module has a weight and bias\n", "model = two_layer_network(D, H, C)\n", "model.to(device)\n", "\n", "# nn package also has different loss functions.\n", "# we use cross entropy loss for our classification task\n", "criterion = torch.nn.CrossEntropyLoss()\n", "\n", "# we use the optim package to apply\n", "# ADAM for our parameter updates\n", "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n", "\n", "# We convert our inputs and targest to Variables\n", "# so we can use automatic differentiation but we \n", "# use require_grad=False b/c we don't want the gradients\n", "# to alter these values.\n", "input_X = torch.tensor(X, requires_grad=False, dtype=torch.float32)\n", "y_true = torch.tensor(y, requires_grad=False, dtype=torch.long)\n", "\n", "# e = 1. # plotting purpose\n", "\n", "# Training\n", "for t in range(1000):\n", " \n", " # Feed forward to get the logits\n", " y_pred = model(input_X)\n", " \n", " # Compute the loss and accuracy\n", " loss = criterion(y_pred, y_true)\n", " score, predicted = torch.max(y_pred, 1)\n", " acc = (y_true == predicted).sum().float() / len(y_true)\n", " print(\"[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f\" % (t, loss.item(), acc))\n", " display.clear_output(wait=True)\n", " \n", " # zero the gradients before running\n", " # the backward pass.\n", " optimizer.zero_grad()\n", " \n", " # Backward pass to compute the gradient\n", " # of loss w.r.t our learnable params. \n", " loss.backward()\n", " \n", " # Update params\n", " optimizer.step()\n", " \n", "# # Plot some progress\n", "# if t % math.ceil(e) == 0:\n", "# plot_model(X, y, model, e)\n", "# e *= 1.5\n", "\n", "#! convert -delay 20 -crop 500x475+330+50 +repage $(gls -1v train*) train.gif" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Plot trained model\n", "print(model)\n", "plot_model(X, y, model)" ] } ], "metadata": { "kernelspec": { "display_name": "Codas ML", "language": "python", "name": "codasml" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }