{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.2-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "Python 3.8.2 64-bit",
   "display_name": "Python 3.8.2 64-bit",
   "metadata": {
    "interpreter": {
     "hash": "f3131ab0c53afd587e929ab5f3e0081bb3b1675889ab2c259292a9bd8773e05a"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# "
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "# Gradiant descent with minibatches"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "From the [Data Science from Scratch book](https://www.oreilly.com/library/view/data-science-from/9781492041122/)."
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "# Libraries and helper functions"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import random\n",
    "from typing import TypeVar, List, Iterator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "Vector = List[float]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add(vector1: Vector, vector2: Vector) -> Vector:\n",
    "    assert len(vector1) == len(vector2)\n",
    "    return [v1 + v2 for v1, v2 in zip(vector1, vector2)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def vector_sum(vectors: List[Vector]) -> Vector:\n",
    "    assert vectors\n",
    "    \n",
    "    vector_length = len(vectors[0])\n",
    "    assert all(len(v) == vector_length for v in vectors)\n",
    "\n",
    "    sums = [0] * vector_length\n",
    "    for vector in vectors:\n",
    "        sums = add(sums, vector)\n",
    "\n",
    "    return sums"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "def scalar_multiply(c: float, vector: Vector) -> Vector:\n",
    "    return [c * v for v in vector]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def vector_mean(vectors: List[Vector]) -> Vector:\n",
    "    n = len(vectors)\n",
    "    return scalar_multiply(1/n, vector_sum(vectors))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:\n",
    "    \"\"\"Return vector adjusted with step. Step is gradient times step size.\n",
    "    \"\"\"\n",
    "    step = scalar_multiply(step_size, gradient)\n",
    "    return add(v, step)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def linear_gradient(x: float, y: float, theta: Vector) -> Vector:\n",
    "    slope, intercept = theta\n",
    "    predicted = slope * x + intercept\n",
    "    error = (predicted - y) #** 2\n",
    "    # print(x, y, theta, predicted, error)\n",
    "    return [2 * error * x, 2 * error]"
   ]
  },
  {
   "source": [
    "## Minibatch gradient"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "T = TypeVar('T')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def minibatches(dataset: List[T], batch_size=int, shuffle: bool = True) -> Iterator[List[T]]:\n",
    "    batch_starts = [start for start in range(0, len(dataset), batch_size)]\n",
    "\n",
    "    if shuffle: random.shuffle(batch_starts)\n",
    "\n",
    "    for start in batch_starts:\n",
    "        end = start + batch_size\n",
    "        yield dataset[start:end]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[(-50, -995),\n",
       " (-49, -975),\n",
       " (-48, -955),\n",
       " (-47, -935),\n",
       " (-46, -915),\n",
       " (-45, -895),\n",
       " (-44, -875),\n",
       " (-43, -855),\n",
       " (-42, -835),\n",
       " (-41, -815)]"
      ]
     },
     "metadata": {},
     "execution_count": 11
    }
   ],
   "source": [
    "inputs = [(x, 20 * x + 5) for x in range(-50, 50)]\n",
    "inputs[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "[(-50, -995), (-49, -975), (-48, -955), (-47, -935), (-46, -915)]\n[(-45, -895), (-44, -875), (-43, -855), (-42, -835), (-41, -815)]\n[(-40, -795), (-39, -775), (-38, -755), (-37, -735), (-36, -715)]\n[(-35, -695), (-34, -675), (-33, -655), (-32, -635), (-31, -615)]\n[(-30, -595), (-29, -575), (-28, -555), (-27, -535), (-26, -515)]\n[(-25, -495), (-24, -475), (-23, -455), (-22, -435), (-21, -415)]\n[(-20, -395), (-19, -375), (-18, -355), (-17, -335), (-16, -315)]\n[(-15, -295), (-14, -275), (-13, -255), (-12, -235), (-11, -215)]\n[(-10, -195), (-9, -175), (-8, -155), (-7, -135), (-6, -115)]\n[(-5, -95), (-4, -75), (-3, -55), (-2, -35), (-1, -15)]\n[(0, 5), (1, 25), (2, 45), (3, 65), (4, 85)]\n[(5, 105), (6, 125), (7, 145), (8, 165), (9, 185)]\n[(10, 205), (11, 225), (12, 245), (13, 265), (14, 285)]\n[(15, 305), (16, 325), (17, 345), (18, 365), (19, 385)]\n[(20, 405), (21, 425), (22, 445), (23, 465), (24, 485)]\n[(25, 505), (26, 525), (27, 545), (28, 565), (29, 585)]\n[(30, 605), (31, 625), (32, 645), (33, 665), (34, 685)]\n[(35, 705), (36, 725), (37, 745), (38, 765), (39, 785)]\n[(40, 805), (41, 825), (42, 845), (43, 865), (44, 885)]\n[(45, 905), (46, 925), (47, 945), (48, 965), (49, 985)]\n"
     ]
    }
   ],
   "source": [
    "for batch in minibatches(inputs, batch_size=5, shuffle=False):\n",
    "    print(batch)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[(45, 905), (46, 925), (47, 945), (48, 965), (49, 985)]"
      ]
     },
     "metadata": {},
     "execution_count": 13
    }
   ],
   "source": [
    "batch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[-85269.18707965006, -1812.6065734463045]"
      ]
     },
     "metadata": {},
     "execution_count": 14
    }
   ],
   "source": [
    "theta = [random.uniform(-1, 1), random.uniform(-1, 1)]\n",
    "vector_mean([linear_gradient(x, y, theta) for x, y in batch])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "inputs = [(x, 20 * x + 5) for x in range(-50, 50)]\n",
    "theta = [random.uniform(-1, 1), random.uniform(-1, 1)]\n",
    "learning_rate = 0.001\n",
    "\n",
    "minibatch_results = []\n",
    "\n",
    "for epoch in range(1000):\n",
    "    for batch in minibatches(inputs, batch_size=20):\n",
    "        grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch])\n",
    "        theta = gradient_step(theta, grad, -learning_rate)\n",
    "    minibatch_results.append([epoch, theta])"
   ]
  },
  {
   "source": [
    "Last twenty epochs"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "[[980, [20.00000231135919, 4.999994332476798]],\n",
       " [981, [19.99999999323285, 4.999994612954206]],\n",
       " [982, [20.000000061792846, 4.999994646372994]],\n",
       " [983, [19.99999994338701, 4.999994665516976]],\n",
       " [984, [19.99999978951187, 4.999994715352837]],\n",
       " [985, [20.000000045649184, 4.999994748354316]],\n",
       " [986, [19.99999982652112, 4.999994803275764]],\n",
       " [987, [20.000000353260692, 4.999994822918053]],\n",
       " [988, [20.000000203297585, 4.99999512614361]],\n",
       " [989, [20.000000013241355, 4.999995351690553]],\n",
       " [990, [20.00000018064206, 4.999995396803261]],\n",
       " [991, [20.00000031292562, 4.999995409193917]],\n",
       " [992, [19.99999796367368, 4.999995522343663]],\n",
       " [993, [19.9999998507973, 4.999995639004705]],\n",
       " [994, [20.00000000615185, 4.999995667269925]],\n",
       " [995, [19.999999823836678, 4.999995780888271]],\n",
       " [996, [20.000000260844455, 4.999995797933743]],\n",
       " [997, [20.00000006542973, 4.999995821702883]],\n",
       " [998, [19.999999347865106, 4.999995882305616]],\n",
       " [999, [20.000000771099824, 4.999995951108064]]]"
      ]
     },
     "metadata": {},
     "execution_count": 16
    }
   ],
   "source": [
    "minibatch_results[-20:]"
   ]
  }
 ]
}