{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Feed Forward Networks"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p1><center>Creating differentiable computation graphs for classification tasks.</center></p1>\n",
    "<img src=\"img/train.gif\">"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create the data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "import torch\n",
    "from torch import nn, optim\n",
    "import torch.nn.functional as F\n",
    "import math\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%run plot_conf.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt_style()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython import display"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "seed=12345\n",
    "random.seed(seed)\n",
    "torch.manual_seed(seed)\n",
    "N = 1000  # num_samples_per_class\n",
    "D = 2  # dimensions\n",
    "C = 3  # num_classes\n",
    "H = 100  # num_hidden_units"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X = torch.zeros(N * C, D)\n",
    "y = torch.zeros(N * C)\n",
    "\n",
    "for i in range(C):\n",
    "    index = 0\n",
    "    r = torch.linspace(0, 1, N)\n",
    "    t = torch.linspace(\n",
    "        i * 2 * math.pi / C,\n",
    "        (i + 2) * 2 * math.pi / C,\n",
    "        N\n",
    "    ) + torch.randn(N) * 0.1\n",
    "    \n",
    "    for ix in range(N * i, N * (i + 1)):\n",
    "        X[ix] = r[index] * torch.FloatTensor((\n",
    "            math.sin(t[index]), math.cos(t[index])\n",
    "        ))\n",
    "        y[ix] = i\n",
    "        index += 1\n",
    "\n",
    "print(\"SHAPES:\")\n",
    "print(\"-------------------\")\n",
    "print(\"X:\", tuple(X.size()))\n",
    "print(\"y:\", tuple(y.size()))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_data(X, y, d=.0, auto=False):\n",
    "    \"\"\"\n",
    "    Plot the data.\n",
    "    \"\"\"\n",
    "    plt.clf()\n",
    "    plt.scatter(X[:, 0], X[:, 1], c=y, s=20, cmap=plt.cm.Spectral)\n",
    "    plt.axis('square')\n",
    "    plt.axis((-1.1, 1.1, -1.1, 1.1))\n",
    "    if auto is True: plt.axis('equal')\n",
    "#     plt.savefig('spiral{:.2f}.png'.format(d))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the data\n",
    "plot_data(X.numpy(), y.numpy())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot_model(X, y, model, e=.0, auto=False):\n",
    "    \"\"\"\n",
    "    Plot the model from torch weights.\n",
    "    \"\"\"\n",
    "    \n",
    "    X = X.numpy()\n",
    "    y = y.numpy(),\n",
    "    w1 = torch.transpose(model.fc1.weight.data, 0, 1).numpy()\n",
    "    b1 = model.fc1.bias.data.numpy()\n",
    "    w2 = torch.transpose(model.fc2.weight.data, 0, 1).numpy()\n",
    "    b2 = model.fc2.bias.data.numpy()\n",
    "    \n",
    "    h = 0.01\n",
    "\n",
    "    x_min, x_max = (-1.1, 1.1)\n",
    "    y_min, y_max = (-1.1, 1.1)\n",
    "    \n",
    "    if auto is True:\n",
    "        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1\n",
    "        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1\n",
    "    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),\n",
    "                         np.arange(y_min, y_max, h))\n",
    "    Z = np.dot(np.maximum(0, np.dot(np.c_[xx.ravel(), yy.ravel()], w1) + b1), w2) + b2\n",
    "    Z = np.argmax(Z, axis=1)\n",
    "    Z = Z.reshape(xx.shape)\n",
    "    fig = plt.figure()\n",
    "    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.3)\n",
    "    plt.scatter(X[:, 0], X[:, 1], c=y[0], s=40, cmap=plt.cm.Spectral)\n",
    "    plt.axis((-1.1, 1.1, -1.1, 1.1))\n",
    "    plt.axis('square')\n",
    "    if auto is True:\n",
    "        plt.axis((xx.min(), xx.max(), yy.min(), yy.max()))\n",
    "    \n",
    "#     plt.savefig('train{:03.2f}.png'.format(e))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Linear model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 1e-3\n",
    "lambda_l2 = 1e-5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Linear model\n",
    "class linear_model(nn.Module):\n",
    "    \"\"\"\n",
    "    Linear model.\n",
    "    \"\"\"\n",
    "    def __init__(self, D_in, H, D_out):\n",
    "        \"\"\"\n",
    "        Initialize weights.\n",
    "        \"\"\"\n",
    "        super(linear_model, self).__init__()\n",
    "        self.fc1 = nn.Linear(D_in, H)\n",
    "        self.fc2 = nn.Linear(H, D_out)\n",
    "\n",
    "    def forward(self, x):\n",
    "        \"\"\"\n",
    "        Forward pass.\n",
    "        \"\"\"\n",
    "        z = self.fc1(x)\n",
    "        z = self.fc2(z)\n",
    "        return z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# nn package to create our linear model\n",
    "# each Linear module has a weight and bias\n",
    "model = linear_model(D, H, C)\n",
    "model.to(device) #Convert to CUDA\n",
    "\n",
    "# nn package also has different loss functions.\n",
    "# we use cross entropy loss for our classification task\n",
    "criterion = torch.nn.CrossEntropyLoss()\n",
    "\n",
    "# we use the optim package to apply\n",
    "# stochastic gradient descent for our parameter updates\n",
    "optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n",
    "\n",
    "# We convert our inputs and targets to Variables\n",
    "# so we can use automatic differentiation but we \n",
    "# use require_grad=False b/c we don't want the gradients\n",
    "# to alter these values.\n",
    "input_X = torch.tensor(X, requires_grad=False, dtype=torch.float32)\n",
    "y_true = torch.tensor(y, requires_grad=False, dtype=torch.long)\n",
    "\n",
    "# Training\n",
    "for t in range(1000):\n",
    "    \n",
    "    # Feed forward to get the logits\n",
    "    y_pred = model(input_X)\n",
    "    \n",
    "    # Compute the loss and accuracy\n",
    "    loss = criterion(y_pred, y_true)\n",
    "    score, predicted = torch.max(y_pred, 1)\n",
    "    acc = (y_true == predicted).sum().float() / len(y_true)\n",
    "    print(\"[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f\" % (t, loss.item(), acc))\n",
    "    display.clear_output(wait=True)\n",
    "    \n",
    "    # zero the gradients before running\n",
    "    # the backward pass.\n",
    "    optimizer.zero_grad()\n",
    "    \n",
    "    # Backward pass to compute the gradient\n",
    "    # of loss w.r.t our learnable params. \n",
    "    loss.backward()\n",
    "    \n",
    "    # Update params\n",
    "    optimizer.step()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot trained model\n",
    "print(model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_model(X, y, model)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Two-layered network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "learning_rate = 1e-3\n",
    "lambda_l2 = 1e-5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# NN model\n",
    "class two_layer_network(nn.Module):\n",
    "    \"\"\"\n",
    "    NN model.\n",
    "    \"\"\"\n",
    "    def __init__(self, D_in, H, D_out):\n",
    "        \"\"\"\n",
    "        Initialize weights.\n",
    "        \"\"\"\n",
    "        super(two_layer_network, self).__init__()\n",
    "        self.fc1 = nn.Linear(D_in, H)\n",
    "        self.fc2 = nn.Linear(H, D_out)\n",
    "\n",
    "    def forward(self, x):\n",
    "        \"\"\"\n",
    "        Forward pass.\n",
    "        \"\"\"\n",
    "        z = F.relu(self.fc1(x))\n",
    "        z = self.fc2(z)\n",
    "        return z"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# nn package to create our linear model\n",
    "# each Linear module has a weight and bias\n",
    "model = two_layer_network(D, H, C)\n",
    "model.to(device)\n",
    "\n",
    "# nn package also has different loss functions.\n",
    "# we use cross entropy loss for our classification task\n",
    "criterion = torch.nn.CrossEntropyLoss()\n",
    "\n",
    "# we use the optim package to apply\n",
    "# ADAM for our parameter updates\n",
    "optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n",
    "\n",
    "# We convert our inputs and targest to Variables\n",
    "# so we can use automatic differentiation but we \n",
    "# use require_grad=False b/c we don't want the gradients\n",
    "# to alter these values.\n",
    "input_X = torch.tensor(X, requires_grad=False, dtype=torch.float32)\n",
    "y_true = torch.tensor(y, requires_grad=False, dtype=torch.long)\n",
    "\n",
    "# e = 1.  # plotting purpose\n",
    "\n",
    "# Training\n",
    "for t in range(1000):\n",
    "    \n",
    "    # Feed forward to get the logits\n",
    "    y_pred = model(input_X)\n",
    "    \n",
    "    # Compute the loss and accuracy\n",
    "    loss = criterion(y_pred, y_true)\n",
    "    score, predicted = torch.max(y_pred, 1)\n",
    "    acc = (y_true == predicted).sum().float() / len(y_true)\n",
    "    print(\"[EPOCH]: %i, [LOSS]: %.6f, [ACCURACY]: %.3f\" % (t, loss.item(), acc))\n",
    "    display.clear_output(wait=True)\n",
    "    \n",
    "    # zero the gradients before running\n",
    "    # the backward pass.\n",
    "    optimizer.zero_grad()\n",
    "    \n",
    "    # Backward pass to compute the gradient\n",
    "    # of loss w.r.t our learnable params. \n",
    "    loss.backward()\n",
    "    \n",
    "    # Update params\n",
    "    optimizer.step()\n",
    "    \n",
    "#    # Plot some progress\n",
    "#     if t % math.ceil(e) == 0:\n",
    "#         plot_model(X, y, model, e)\n",
    "#         e *= 1.5\n",
    "\n",
    "#! convert -delay 20 -crop 500x475+330+50 +repage $(gls -1v train*) train.gif"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot trained model\n",
    "print(model)\n",
    "plot_model(X, y, model)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Codas ML",
   "language": "python",
   "name": "codasml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}