{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2-final" }, "orig_nbformat": 2, "kernelspec": { "name": "Python 3.8.2 64-bit", "display_name": "Python 3.8.2 64-bit", "metadata": { "interpreter": { "hash": "f3131ab0c53afd587e929ab5f3e0081bb3b1675889ab2c259292a9bd8773e05a" } } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "source": [ "# " ], "cell_type": "markdown", "metadata": {} }, { "source": [ "# Gradiant descent with minibatches" ], "cell_type": "markdown", "metadata": {} }, { "source": [ "From the [Data Science from Scratch book](https://www.oreilly.com/library/view/data-science-from/9781492041122/)." ], "cell_type": "markdown", "metadata": {} }, { "source": [ "# Libraries and helper functions" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import random\n", "from typing import TypeVar, List, Iterator" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "Vector = List[float]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def add(vector1: Vector, vector2: Vector) -> Vector:\n", " assert len(vector1) == len(vector2)\n", " return [v1 + v2 for v1, v2 in zip(vector1, vector2)]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def vector_sum(vectors: List[Vector]) -> Vector:\n", " assert vectors\n", " \n", " vector_length = len(vectors[0])\n", " assert all(len(v) == vector_length for v in vectors)\n", "\n", " sums = [0] * vector_length\n", " for vector in vectors:\n", " sums = add(sums, vector)\n", "\n", " return sums" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def scalar_multiply(c: float, vector: Vector) -> Vector:\n", " return [c * v for v in vector]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "def vector_mean(vectors: List[Vector]) -> Vector:\n", " n = len(vectors)\n", " return scalar_multiply(1/n, vector_sum(vectors))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def gradient_step(v: Vector, gradient: Vector, step_size: float) -> Vector:\n", " \"\"\"Return vector adjusted with step. Step is gradient times step size.\n", " \"\"\"\n", " step = scalar_multiply(step_size, gradient)\n", " return add(v, step)\n" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def linear_gradient(x: float, y: float, theta: Vector) -> Vector:\n", " slope, intercept = theta\n", " predicted = slope * x + intercept\n", " error = (predicted - y) #** 2\n", " # print(x, y, theta, predicted, error)\n", " return [2 * error * x, 2 * error]" ] }, { "source": [ "## Minibatch gradient" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "T = TypeVar('T')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def minibatches(dataset: List[T], batch_size=int, shuffle: bool = True) -> Iterator[List[T]]:\n", " batch_starts = [start for start in range(0, len(dataset), batch_size)]\n", "\n", " if shuffle: random.shuffle(batch_starts)\n", "\n", " for start in batch_starts:\n", " end = start + batch_size\n", " yield dataset[start:end]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(-50, -995),\n", " (-49, -975),\n", " (-48, -955),\n", " (-47, -935),\n", " (-46, -915),\n", " (-45, -895),\n", " (-44, -875),\n", " (-43, -855),\n", " (-42, -835),\n", " (-41, -815)]" ] }, "metadata": {}, "execution_count": 11 } ], "source": [ "inputs = [(x, 20 * x + 5) for x in range(-50, 50)]\n", "inputs[:10]" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[(-50, -995), (-49, -975), (-48, -955), (-47, -935), (-46, -915)]\n[(-45, -895), (-44, -875), (-43, -855), (-42, -835), (-41, -815)]\n[(-40, -795), (-39, -775), (-38, -755), (-37, -735), (-36, -715)]\n[(-35, -695), (-34, -675), (-33, -655), (-32, -635), (-31, -615)]\n[(-30, -595), (-29, -575), (-28, -555), (-27, -535), (-26, -515)]\n[(-25, -495), (-24, -475), (-23, -455), (-22, -435), (-21, -415)]\n[(-20, -395), (-19, -375), (-18, -355), (-17, -335), (-16, -315)]\n[(-15, -295), (-14, -275), (-13, -255), (-12, -235), (-11, -215)]\n[(-10, -195), (-9, -175), (-8, -155), (-7, -135), (-6, -115)]\n[(-5, -95), (-4, -75), (-3, -55), (-2, -35), (-1, -15)]\n[(0, 5), (1, 25), (2, 45), (3, 65), (4, 85)]\n[(5, 105), (6, 125), (7, 145), (8, 165), (9, 185)]\n[(10, 205), (11, 225), (12, 245), (13, 265), (14, 285)]\n[(15, 305), (16, 325), (17, 345), (18, 365), (19, 385)]\n[(20, 405), (21, 425), (22, 445), (23, 465), (24, 485)]\n[(25, 505), (26, 525), (27, 545), (28, 565), (29, 585)]\n[(30, 605), (31, 625), (32, 645), (33, 665), (34, 685)]\n[(35, 705), (36, 725), (37, 745), (38, 765), (39, 785)]\n[(40, 805), (41, 825), (42, 845), (43, 865), (44, 885)]\n[(45, 905), (46, 925), (47, 945), (48, 965), (49, 985)]\n" ] } ], "source": [ "for batch in minibatches(inputs, batch_size=5, shuffle=False):\n", " print(batch)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[(45, 905), (46, 925), (47, 945), (48, 965), (49, 985)]" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "batch" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[-85269.18707965006, -1812.6065734463045]" ] }, "metadata": {}, "execution_count": 14 } ], "source": [ "theta = [random.uniform(-1, 1), random.uniform(-1, 1)]\n", "vector_mean([linear_gradient(x, y, theta) for x, y in batch])" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "tags": [] }, "outputs": [], "source": [ "inputs = [(x, 20 * x + 5) for x in range(-50, 50)]\n", "theta = [random.uniform(-1, 1), random.uniform(-1, 1)]\n", "learning_rate = 0.001\n", "\n", "minibatch_results = []\n", "\n", "for epoch in range(1000):\n", " for batch in minibatches(inputs, batch_size=20):\n", " grad = vector_mean([linear_gradient(x, y, theta) for x, y in batch])\n", " theta = gradient_step(theta, grad, -learning_rate)\n", " minibatch_results.append([epoch, theta])" ] }, { "source": [ "Last twenty epochs" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[[980, [20.00000231135919, 4.999994332476798]],\n", " [981, [19.99999999323285, 4.999994612954206]],\n", " [982, [20.000000061792846, 4.999994646372994]],\n", " [983, [19.99999994338701, 4.999994665516976]],\n", " [984, [19.99999978951187, 4.999994715352837]],\n", " [985, [20.000000045649184, 4.999994748354316]],\n", " [986, [19.99999982652112, 4.999994803275764]],\n", " [987, [20.000000353260692, 4.999994822918053]],\n", " [988, [20.000000203297585, 4.99999512614361]],\n", " [989, [20.000000013241355, 4.999995351690553]],\n", " [990, [20.00000018064206, 4.999995396803261]],\n", " [991, [20.00000031292562, 4.999995409193917]],\n", " [992, [19.99999796367368, 4.999995522343663]],\n", " [993, [19.9999998507973, 4.999995639004705]],\n", " [994, [20.00000000615185, 4.999995667269925]],\n", " [995, [19.999999823836678, 4.999995780888271]],\n", " [996, [20.000000260844455, 4.999995797933743]],\n", " [997, [20.00000006542973, 4.999995821702883]],\n", " [998, [19.999999347865106, 4.999995882305616]],\n", " [999, [20.000000771099824, 4.999995951108064]]]" ] }, "metadata": {}, "execution_count": 16 } ], "source": [ "minibatch_results[-20:]" ] } ] }