{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Matrix multiplication from foundations" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The *foundations* we'll assume throughout this course are:\n", "\n", "- Python\n", "- matplotlib\n", "- The Python standard library\n", "- Jupyter notebooks and nbdev" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from pathlib import Path\n", "import pickle, gzip, math, os, time, shutil, matplotlib as mpl, matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "MNIST_URL='https://github.com/mnielsen/neural-networks-and-deep-learning/blob/master/data/mnist.pkl.gz?raw=true'\n", "path_data = Path('data')\n", "path_data.mkdir(exist_ok=True)\n", "path_gz = path_data/'mnist.pkl.gz'" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[urlretrieve](https://docs.python.org/3/library/urllib.request.html#urllib.request.urlretrieve) - (read the docs!)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from urllib.request import urlretrieve\n", "if not path_gz.exists(): urlretrieve(MNIST_URL, path_gz)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total 16656\r\n", "-rw-rw-r-- 1 jhoward jhoward 17051982 Sep 30 04:37 mnist.pkl.gz\r\n" ] } ], "source": [ "!ls -l data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with gzip.open(path_gz, 'rb') as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding='latin-1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.0,\n", " 0.0,\n", " 0.0,\n", " 0.19140625,\n", " 0.9296875,\n", " 0.98828125,\n", " 0.98828125,\n", " 0.98828125,\n", " 0.98828125,\n", " 0.98828125]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lst1 = list(x_train[0])\n", "vals = lst1[200:210]\n", "vals" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def chunks(x, sz):\n", " for i in range(0, len(x), sz): yield x[i:i+sz]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[0.0, 0.0, 0.0, 0.19140625, 0.9296875],\n", " [0.98828125, 0.98828125, 0.98828125, 0.98828125, 0.98828125]]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(chunks(vals, 5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAN80lEQVR4nO3df6hcdXrH8c+ncf3DrBpTMYasNhuRWBWbLRqLSl2RrD9QNOqWDVgsBrN/GHChhEr6xyolEuqP0qAsuYu6sWyzLqgYZVkVo6ZFCF5j1JjU1YrdjV6SSozG+KtJnv5xT+Su3vnOzcyZOZP7vF9wmZnzzJnzcLife87Md879OiIEYPL7k6YbANAfhB1IgrADSRB2IAnCDiRxRD83ZpuP/oEeiwiPt7yrI7vtS22/aftt27d281oAesudjrPbniLpd5IWSNou6SVJiyJia2EdjuxAj/XiyD5f0tsR8U5EfCnpV5Ku6uL1APRQN2GfJekPYx5vr5b9EdtLbA/bHu5iWwC61M0HdOOdKnzjND0ihiQNSZzGA03q5si+XdJJYx5/R9L73bUDoFe6CftLkk61/V3bR0r6kaR19bQFoG4dn8ZHxD7bSyU9JWmKpAci4o3aOgNQq46H3jraGO/ZgZ7ryZdqABw+CDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUii4ymbcXiYMmVKsX7sscf2dPtLly5tWTvqqKOK686dO7dYv/nmm4v1u+66q2Vt0aJFxXU///zzYn3lypXF+u23316sN6GrsNt+V9IeSfsl7YuIs+toCkD96jiyXxQRH9TwOgB6iPfsQBLdhj0kPW37ZdtLxnuC7SW2h20Pd7ktAF3o9jT+/Ih43/YJkp6x/V8RsWHsEyJiSNKQJNmOLrcHoENdHdkj4v3qdqekxyTNr6MpAPXrOOy2p9o++uB9ST+QtKWuxgDUq5vT+BmSHrN98HX+PSJ+W0tXk8zJJ59crB955JHF+nnnnVesX3DBBS1r06ZNK6577bXXFutN2r59e7G+atWqYn3hwoUta3v27Cmu++qrrxbrL7zwQrE+iDoOe0S8I+kvauwFQA8x9AYkQdiBJAg7kARhB5Ig7EASjujfl9om6zfo5s2bV6yvX7++WO/1ZaaD6sCBA8X6jTfeWKx/8sknHW97ZGSkWP/www+L9TfffLPjbfdaRHi85RzZgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtlrMH369GJ948aNxfqcOXPqbKdW7XrfvXt3sX7RRRe1rH355ZfFdbN+/6BbjLMDyRF2IAnCDiRB2IEkCDuQBGEHkiDsQBJM2VyDXbt2FevLli0r1q+44opi/ZVXXinW2/1L5ZLNmzcX6wsWLCjW9+7dW6yfccYZLWu33HJLcV3UiyM7kARhB5Ig7EAShB1IgrADSRB2IAnCDiTB9ewD4JhjjinW200vvHr16pa1xYsXF9e9/vrri/W1a9cW6xg8HV/PbvsB2zttbxmzbLrtZ2y/Vd0eV2ezAOo3kdP4X0i69GvLbpX0bEScKunZ6jGAAdY27BGxQdLXvw96laQ11f01kq6uty0Adev0u/EzImJEkiJixPYJrZ5oe4mkJR1uB0BNen4hTEQMSRqS+IAOaFKnQ287bM+UpOp2Z30tAeiFTsO+TtIN1f0bJD1eTzsAeqXtabzttZK+L+l429sl/VTSSkm/tr1Y0u8l/bCXTU52H3/8cVfrf/TRRx2ve9NNNxXrDz/8cLHebo51DI62YY+IRS1KF9fcC4Ae4uuyQBKEHUiCsANJEHYgCcIOJMElrpPA1KlTW9aeeOKJ4roXXnhhsX7ZZZcV608//XSxjv5jymYgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSIJx9knulFNOKdY3bdpUrO/evbtYf+6554r14eHhlrX77ruvuG4/fzcnE8bZgeQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtmTW7hwYbH+4IMPFutHH310x9tevnx5sf7QQw8V6yMjIx1vezJjnB1IjrADSRB2IAnCDiRB2IEkCDuQBGEHkmCcHUVnnnlmsX7PPfcU6xdf3Plkv6tXry7WV6xYUay/9957HW/7cNbxOLvtB2zvtL1lzLLbbL9ne3P1c3mdzQKo30RO438h6dJxlv9LRMyrfn5Tb1sA6tY27BGxQdKuPvQCoIe6+YBuqe3XqtP841o9yfYS28O2W/8zMgA912nYfybpFEnzJI1IurvVEyNiKCLOjoizO9wWgBp0FPaI2BER+yPigKSfS5pfb1sA6tZR2G3PHPNwoaQtrZ4LYDC0HWe3vVbS9yUdL2mHpJ9Wj+dJCknvSvpxRLS9uJhx9sln2rRpxfqVV17ZstbuWnl73OHir6xfv75YX7BgQbE+WbUaZz9iAisuGmfx/V13BKCv+LoskARhB5Ig7EAShB1IgrADSXCJKxrzxRdfFOtHHFEeLNq3b1+xfskll7SsPf/888V1D2f8K2kgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSKLtVW/I7ayzzirWr7vuumL9nHPOaVlrN47eztatW4v1DRs2dPX6kw1HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2SW7u3LnF+tKlS4v1a665plg/8cQTD7mnidq/f3+xPjJS/u/lBw4cqLOdwx5HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2w0C7sexFi8abaHdUu3H02bNnd9JSLYaHh4v1FStWFOvr1q2rs51Jr+2R3fZJtp+zvc32G7ZvqZZPt/2M7beq2+N63y6ATk3kNH6fpL+PiD+X9FeSbrZ9uqRbJT0bEadKerZ6DGBAtQ17RIxExKbq/h5J2yTNknSVpDXV09ZIurpHPQKowSG9Z7c9W9L3JG2UNCMiRqTRPwi2T2ixzhJJS7rsE0CXJhx229+W9Iikn0TEx/a4c8d9Q0QMSRqqXoOJHYGGTGjozfa3NBr0X0bEo9XiHbZnVvWZknb2pkUAdWh7ZPfoIfx+Sdsi4p4xpXWSbpC0srp9vCcdTgIzZswo1k8//fRi/d577y3WTzvttEPuqS4bN24s1u+8886WtccfL//KcIlqvSZyGn++pL+V9LrtzdWy5RoN+a9tL5b0e0k/7EmHAGrRNuwR8Z+SWr1Bv7jedgD0Cl+XBZIg7EAShB1IgrADSRB2IAkucZ2g6dOnt6ytXr26uO68efOK9Tlz5nTSUi1efPHFYv3uu+8u1p966qli/bPPPjvkntAbHNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IIk04+znnntusb5s2bJiff78+S1rs2bN6qinunz66acta6tWrSque8cddxTre/fu7agnDB6O7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQRJpx9oULF3ZV78bWrVuL9SeffLJY37dvX7FeuuZ89+7dxXWRB0d2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUjCEVF+gn2SpIcknSjpgKShiPhX27dJuknS/1ZPXR4Rv2nzWuWNAehaRIw76/JEwj5T0syI2GT7aEkvS7pa0t9I+iQi7ppoE4Qd6L1WYZ/I/Owjkkaq+3tsb5PU7L9mAXDIDuk9u+3Zkr4naWO1aKnt12w/YPu4FusssT1se7i7VgF0o+1p/FdPtL8t6QVJKyLiUdszJH0gKST9k0ZP9W9s8xqcxgM91vF7dkmy/S1JT0p6KiLuGac+W9KTEXFmm9ch7ECPtQp729N425Z0v6RtY4NefXB30EJJW7ptEkDvTOTT+Ask/Yek1zU69CZJyyUtkjRPo6fx70r6cfVhXum1OLIDPdbVaXxdCDvQex2fxgOYHAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ9HvK5g8k/c+Yx8dXywbRoPY2qH1J9NapOnv7s1aFvl7P/o2N28MRcXZjDRQMam+D2pdEb53qV2+cxgNJEHYgiabDPtTw9ksGtbdB7Uuit071pbdG37MD6J+mj+wA+oSwA0k0Enbbl9p+0/bbtm9toodWbL9r+3Xbm5uen66aQ2+n7S1jlk23/Yztt6rbcefYa6i322y/V+27zbYvb6i3k2w/Z3ub7Tds31Itb3TfFfrqy37r+3t221Mk/U7SAknbJb0kaVFEbO1rIy3YflfS2RHR+BcwbP+1pE8kPXRwai3b/yxpV0SsrP5QHhcR/zAgvd2mQ5zGu0e9tZpm/O/U4L6rc/rzTjRxZJ8v6e2IeCcivpT0K0lXNdDHwIuIDZJ2fW3xVZLWVPfXaPSXpe9a9DYQImIkIjZV9/dIOjjNeKP7rtBXXzQR9lmS/jDm8XYN1nzvIelp2y/bXtJ0M+OYcXCarer2hIb7+bq203j309emGR+YfdfJ9OfdaiLs401NM0jjf+dHxF9KukzSzdXpKibmZ5JO0egcgCOS7m6ymWqa8Uck/SQiPm6yl7HG6asv+62JsG+XdNKYx9+R9H4DfYwrIt6vbndKekyjbzsGyY6DM+hWtzsb7ucrEbEjIvZHxAFJP1eD+66aZvwRSb+MiEerxY3vu/H66td+ayLsL0k61fZ3bR8p6UeS1jXQxzfYnlp9cCLbUyX9QIM3FfU6STdU92+Q9HiDvfyRQZnGu9U042p43zU+/XlE9P1H0uUa/UT+vyX9YxM9tOhrjqRXq583mu5N0lqNntb9n0bPiBZL+lNJz0p6q7qdPkC9/ZtGp/Z+TaPBmtlQbxdo9K3ha5I2Vz+XN73vCn31Zb/xdVkgCb5BByRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ/D+f1mbt6t55/AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "mpl.rcParams['image.cmap'] = 'gray'\n", "plt.imshow(list(chunks(lst1, 28)));" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[islice](https://docs.python.org/3/library/itertools.html#itertools.islice)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from itertools import islice" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "it = iter(vals)\n", "islice(it, 5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.0, 0.0, 0.0, 0.19140625, 0.9296875]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(islice(it, 5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.98828125, 0.98828125, 0.98828125, 0.98828125, 0.98828125]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(islice(it, 5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(islice(it, 5))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "it = iter(lst1)\n", "img = list(iter(lambda: list(islice(it, 28)), []))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAN80lEQVR4nO3df6hcdXrH8c+ncf3DrBpTMYasNhuRWBWbLRqLSl2RrD9QNOqWDVgsBrN/GHChhEr6xyolEuqP0qAsuYu6sWyzLqgYZVkVo6ZFCF5j1JjU1YrdjV6SSozG+KtJnv5xT+Su3vnOzcyZOZP7vF9wmZnzzJnzcLife87Md879OiIEYPL7k6YbANAfhB1IgrADSRB2IAnCDiRxRD83ZpuP/oEeiwiPt7yrI7vtS22/aftt27d281oAesudjrPbniLpd5IWSNou6SVJiyJia2EdjuxAj/XiyD5f0tsR8U5EfCnpV5Ku6uL1APRQN2GfJekPYx5vr5b9EdtLbA/bHu5iWwC61M0HdOOdKnzjND0ihiQNSZzGA03q5si+XdJJYx5/R9L73bUDoFe6CftLkk61/V3bR0r6kaR19bQFoG4dn8ZHxD7bSyU9JWmKpAci4o3aOgNQq46H3jraGO/ZgZ7ryZdqABw+CDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUii4ymbcXiYMmVKsX7sscf2dPtLly5tWTvqqKOK686dO7dYv/nmm4v1u+66q2Vt0aJFxXU///zzYn3lypXF+u23316sN6GrsNt+V9IeSfsl7YuIs+toCkD96jiyXxQRH9TwOgB6iPfsQBLdhj0kPW37ZdtLxnuC7SW2h20Pd7ktAF3o9jT+/Ih43/YJkp6x/V8RsWHsEyJiSNKQJNmOLrcHoENdHdkj4v3qdqekxyTNr6MpAPXrOOy2p9o++uB9ST+QtKWuxgDUq5vT+BmSHrN98HX+PSJ+W0tXk8zJJ59crB955JHF+nnnnVesX3DBBS1r06ZNK6577bXXFutN2r59e7G+atWqYn3hwoUta3v27Cmu++qrrxbrL7zwQrE+iDoOe0S8I+kvauwFQA8x9AYkQdiBJAg7kARhB5Ig7EASjujfl9om6zfo5s2bV6yvX7++WO/1ZaaD6sCBA8X6jTfeWKx/8sknHW97ZGSkWP/www+L9TfffLPjbfdaRHi85RzZgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtlrMH369GJ948aNxfqcOXPqbKdW7XrfvXt3sX7RRRe1rH355ZfFdbN+/6BbjLMDyRF2IAnCDiRB2IEkCDuQBGEHkiDsQBJM2VyDXbt2FevLli0r1q+44opi/ZVXXinW2/1L5ZLNmzcX6wsWLCjW9+7dW6yfccYZLWu33HJLcV3UiyM7kARhB5Ig7EAShB1IgrADSRB2IAnCDiTB9ewD4JhjjinW200vvHr16pa1xYsXF9e9/vrri/W1a9cW6xg8HV/PbvsB2zttbxmzbLrtZ2y/Vd0eV2ezAOo3kdP4X0i69GvLbpX0bEScKunZ6jGAAdY27BGxQdLXvw96laQ11f01kq6uty0Adev0u/EzImJEkiJixPYJrZ5oe4mkJR1uB0BNen4hTEQMSRqS+IAOaFKnQ287bM+UpOp2Z30tAeiFTsO+TtIN1f0bJD1eTzsAeqXtabzttZK+L+l429sl/VTSSkm/tr1Y0u8l/bCXTU52H3/8cVfrf/TRRx2ve9NNNxXrDz/8cLHebo51DI62YY+IRS1KF9fcC4Ae4uuyQBKEHUiCsANJEHYgCcIOJMElrpPA1KlTW9aeeOKJ4roXXnhhsX7ZZZcV608//XSxjv5jymYgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSIJx9knulFNOKdY3bdpUrO/evbtYf+6554r14eHhlrX77ruvuG4/fzcnE8bZgeQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtmTW7hwYbH+4IMPFutHH310x9tevnx5sf7QQw8V6yMjIx1vezJjnB1IjrADSRB2IAnCDiRB2IEkCDuQBGEHkmCcHUVnnnlmsX7PPfcU6xdf3Plkv6tXry7WV6xYUay/9957HW/7cNbxOLvtB2zvtL1lzLLbbL9ne3P1c3mdzQKo30RO438h6dJxlv9LRMyrfn5Tb1sA6tY27BGxQdKuPvQCoIe6+YBuqe3XqtP841o9yfYS28O2W/8zMgA912nYfybpFEnzJI1IurvVEyNiKCLOjoizO9wWgBp0FPaI2BER+yPigKSfS5pfb1sA6tZR2G3PHPNwoaQtrZ4LYDC0HWe3vVbS9yUdL2mHpJ9Wj+dJCknvSvpxRLS9uJhx9sln2rRpxfqVV17ZstbuWnl73OHir6xfv75YX7BgQbE+WbUaZz9iAisuGmfx/V13BKCv+LoskARhB5Ig7EAShB1IgrADSXCJKxrzxRdfFOtHHFEeLNq3b1+xfskll7SsPf/888V1D2f8K2kgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSKLtVW/I7ayzzirWr7vuumL9nHPOaVlrN47eztatW4v1DRs2dPX6kw1HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2SW7u3LnF+tKlS4v1a665plg/8cQTD7mnidq/f3+xPjJS/u/lBw4cqLOdwx5HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2w0C7sexFi8abaHdUu3H02bNnd9JSLYaHh4v1FStWFOvr1q2rs51Jr+2R3fZJtp+zvc32G7ZvqZZPt/2M7beq2+N63y6ATk3kNH6fpL+PiD+X9FeSbrZ9uqRbJT0bEadKerZ6DGBAtQ17RIxExKbq/h5J2yTNknSVpDXV09ZIurpHPQKowSG9Z7c9W9L3JG2UNCMiRqTRPwi2T2ixzhJJS7rsE0CXJhx229+W9Iikn0TEx/a4c8d9Q0QMSRqqXoOJHYGGTGjozfa3NBr0X0bEo9XiHbZnVvWZknb2pkUAdWh7ZPfoIfx+Sdsi4p4xpXWSbpC0srp9vCcdTgIzZswo1k8//fRi/d577y3WTzvttEPuqS4bN24s1u+8886WtccfL//KcIlqvSZyGn++pL+V9LrtzdWy5RoN+a9tL5b0e0k/7EmHAGrRNuwR8Z+SWr1Bv7jedgD0Cl+XBZIg7EAShB1IgrADSRB2IAkucZ2g6dOnt6ytXr26uO68efOK9Tlz5nTSUi1efPHFYv3uu+8u1p966qli/bPPPjvkntAbHNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IIk04+znnntusb5s2bJiff78+S1rs2bN6qinunz66acta6tWrSque8cddxTre/fu7agnDB6O7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQRJpx9oULF3ZV78bWrVuL9SeffLJY37dvX7FeuuZ89+7dxXWRB0d2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUjCEVF+gn2SpIcknSjpgKShiPhX27dJuknS/1ZPXR4Rv2nzWuWNAehaRIw76/JEwj5T0syI2GT7aEkvS7pa0t9I+iQi7ppoE4Qd6L1WYZ/I/Owjkkaq+3tsb5PU7L9mAXDIDuk9u+3Zkr4naWO1aKnt12w/YPu4FusssT1se7i7VgF0o+1p/FdPtL8t6QVJKyLiUdszJH0gKST9k0ZP9W9s8xqcxgM91vF7dkmy/S1JT0p6KiLuGac+W9KTEXFmm9ch7ECPtQp729N425Z0v6RtY4NefXB30EJJW7ptEkDvTOTT+Ask/Yek1zU69CZJyyUtkjRPo6fx70r6cfVhXum1OLIDPdbVaXxdCDvQex2fxgOYHAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ9HvK5g8k/c+Yx8dXywbRoPY2qH1J9NapOnv7s1aFvl7P/o2N28MRcXZjDRQMam+D2pdEb53qV2+cxgNJEHYgiabDPtTw9ksGtbdB7Uuit071pbdG37MD6J+mj+wA+oSwA0k0Enbbl9p+0/bbtm9toodWbL9r+3Xbm5uen66aQ2+n7S1jlk23/Yztt6rbcefYa6i322y/V+27zbYvb6i3k2w/Z3ub7Tds31Itb3TfFfrqy37r+3t221Mk/U7SAknbJb0kaVFEbO1rIy3YflfS2RHR+BcwbP+1pE8kPXRwai3b/yxpV0SsrP5QHhcR/zAgvd2mQ5zGu0e9tZpm/O/U4L6rc/rzTjRxZJ8v6e2IeCcivpT0K0lXNdDHwIuIDZJ2fW3xVZLWVPfXaPSXpe9a9DYQImIkIjZV9/dIOjjNeKP7rtBXXzQR9lmS/jDm8XYN1nzvIelp2y/bXtJ0M+OYcXCarer2hIb7+bq203j309emGR+YfdfJ9OfdaiLs401NM0jjf+dHxF9KukzSzdXpKibmZ5JO0egcgCOS7m6ymWqa8Uck/SQiPm6yl7HG6asv+62JsG+XdNKYx9+R9H4DfYwrIt6vbndKekyjbzsGyY6DM+hWtzsb7ucrEbEjIvZHxAFJP1eD+66aZvwRSb+MiEerxY3vu/H66td+ayLsL0k61fZ3bR8p6UeS1jXQxzfYnlp9cCLbUyX9QIM3FfU6STdU92+Q9HiDvfyRQZnGu9U042p43zU+/XlE9P1H0uUa/UT+vyX9YxM9tOhrjqRXq583mu5N0lqNntb9n0bPiBZL+lNJz0p6q7qdPkC9/ZtGp/Z+TaPBmtlQbxdo9K3ha5I2Vz+XN73vCn31Zb/xdVkgCb5BByRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ/D+f1mbt6t55/AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.imshow(img);" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Matrix and tensor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.98828125" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "img[20][15]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "class Matrix:\n", " def __init__(self, xs): self.xs = xs\n", " def __getitem__(self, idxs): return self.xs[idxs[0]][idxs[1]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.98828125" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = Matrix(img)\n", "m[20,15]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from torch import tensor" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([1, 2, 3])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tensor([1,2,3])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([50000, 784])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train,y_train,x_valid,y_valid = map(tensor, (x_train,y_train,x_valid,y_valid))\n", "x_train.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'torch.FloatTensor'" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x_train.type()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Tensor](https://pytorch.org/docs/stable/tensors.html)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([50000, 28, 28])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "imgs = x_train.reshape((-1,28,28))\n", "imgs.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAPsAAAD4CAYAAAAq5pAIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAN80lEQVR4nO3df6hcdXrH8c+ncf3DrBpTMYasNhuRWBWbLRqLSl2RrD9QNOqWDVgsBrN/GHChhEr6xyolEuqP0qAsuYu6sWyzLqgYZVkVo6ZFCF5j1JjU1YrdjV6SSozG+KtJnv5xT+Su3vnOzcyZOZP7vF9wmZnzzJnzcLife87Md879OiIEYPL7k6YbANAfhB1IgrADSRB2IAnCDiRxRD83ZpuP/oEeiwiPt7yrI7vtS22/aftt27d281oAesudjrPbniLpd5IWSNou6SVJiyJia2EdjuxAj/XiyD5f0tsR8U5EfCnpV5Ku6uL1APRQN2GfJekPYx5vr5b9EdtLbA/bHu5iWwC61M0HdOOdKnzjND0ihiQNSZzGA03q5si+XdJJYx5/R9L73bUDoFe6CftLkk61/V3bR0r6kaR19bQFoG4dn8ZHxD7bSyU9JWmKpAci4o3aOgNQq46H3jraGO/ZgZ7ryZdqABw+CDuQBGEHkiDsQBKEHUiCsANJEHYgCcIOJEHYgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUii4ymbcXiYMmVKsX7sscf2dPtLly5tWTvqqKOK686dO7dYv/nmm4v1u+66q2Vt0aJFxXU///zzYn3lypXF+u23316sN6GrsNt+V9IeSfsl7YuIs+toCkD96jiyXxQRH9TwOgB6iPfsQBLdhj0kPW37ZdtLxnuC7SW2h20Pd7ktAF3o9jT+/Ih43/YJkp6x/V8RsWHsEyJiSNKQJNmOLrcHoENdHdkj4v3qdqekxyTNr6MpAPXrOOy2p9o++uB9ST+QtKWuxgDUq5vT+BmSHrN98HX+PSJ+W0tXk8zJJ59crB955JHF+nnnnVesX3DBBS1r06ZNK6577bXXFutN2r59e7G+atWqYn3hwoUta3v27Cmu++qrrxbrL7zwQrE+iDoOe0S8I+kvauwFQA8x9AYkQdiBJAg7kARhB5Ig7EASjujfl9om6zfo5s2bV6yvX7++WO/1ZaaD6sCBA8X6jTfeWKx/8sknHW97ZGSkWP/www+L9TfffLPjbfdaRHi85RzZgSQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtlrMH369GJ948aNxfqcOXPqbKdW7XrfvXt3sX7RRRe1rH355ZfFdbN+/6BbjLMDyRF2IAnCDiRB2IEkCDuQBGEHkiDsQBJM2VyDXbt2FevLli0r1q+44opi/ZVXXinW2/1L5ZLNmzcX6wsWLCjW9+7dW6yfccYZLWu33HJLcV3UiyM7kARhB5Ig7EAShB1IgrADSRB2IAnCDiTB9ewD4JhjjinW200vvHr16pa1xYsXF9e9/vrri/W1a9cW6xg8HV/PbvsB2zttbxmzbLrtZ2y/Vd0eV2ezAOo3kdP4X0i69GvLbpX0bEScKunZ6jGAAdY27BGxQdLXvw96laQ11f01kq6uty0Adev0u/EzImJEkiJixPYJrZ5oe4mkJR1uB0BNen4hTEQMSRqS+IAOaFKnQ287bM+UpOp2Z30tAeiFTsO+TtIN1f0bJD1eTzsAeqXtabzttZK+L+l429sl/VTSSkm/tr1Y0u8l/bCXTU52H3/8cVfrf/TRRx2ve9NNNxXrDz/8cLHebo51DI62YY+IRS1KF9fcC4Ae4uuyQBKEHUiCsANJEHYgCcIOJMElrpPA1KlTW9aeeOKJ4roXXnhhsX7ZZZcV608//XSxjv5jymYgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSIJx9knulFNOKdY3bdpUrO/evbtYf+6554r14eHhlrX77ruvuG4/fzcnE8bZgeQIO5AEYQeSIOxAEoQdSIKwA0kQdiAJxtmTW7hwYbH+4IMPFutHH310x9tevnx5sf7QQw8V6yMjIx1vezJjnB1IjrADSRB2IAnCDiRB2IEkCDuQBGEHkmCcHUVnnnlmsX7PPfcU6xdf3Plkv6tXry7WV6xYUay/9957HW/7cNbxOLvtB2zvtL1lzLLbbL9ne3P1c3mdzQKo30RO438h6dJxlv9LRMyrfn5Tb1sA6tY27BGxQdKuPvQCoIe6+YBuqe3XqtP841o9yfYS28O2W/8zMgA912nYfybpFEnzJI1IurvVEyNiKCLOjoizO9wWgBp0FPaI2BER+yPigKSfS5pfb1sA6tZR2G3PHPNwoaQtrZ4LYDC0HWe3vVbS9yUdL2mHpJ9Wj+dJCknvSvpxRLS9uJhx9sln2rRpxfqVV17ZstbuWnl73OHir6xfv75YX7BgQbE+WbUaZz9iAisuGmfx/V13BKCv+LoskARhB5Ig7EAShB1IgrADSXCJKxrzxRdfFOtHHFEeLNq3b1+xfskll7SsPf/888V1D2f8K2kgOcIOJEHYgSQIO5AEYQeSIOxAEoQdSKLtVW/I7ayzzirWr7vuumL9nHPOaVlrN47eztatW4v1DRs2dPX6kw1HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2SW7u3LnF+tKlS4v1a665plg/8cQTD7mnidq/f3+xPjJS/u/lBw4cqLOdwx5HdiAJwg4kQdiBJAg7kARhB5Ig7EAShB1IgnH2w0C7sexFi8abaHdUu3H02bNnd9JSLYaHh4v1FStWFOvr1q2rs51Jr+2R3fZJtp+zvc32G7ZvqZZPt/2M7beq2+N63y6ATk3kNH6fpL+PiD+X9FeSbrZ9uqRbJT0bEadKerZ6DGBAtQ17RIxExKbq/h5J2yTNknSVpDXV09ZIurpHPQKowSG9Z7c9W9L3JG2UNCMiRqTRPwi2T2ixzhJJS7rsE0CXJhx229+W9Iikn0TEx/a4c8d9Q0QMSRqqXoOJHYGGTGjozfa3NBr0X0bEo9XiHbZnVvWZknb2pkUAdWh7ZPfoIfx+Sdsi4p4xpXWSbpC0srp9vCcdTgIzZswo1k8//fRi/d577y3WTzvttEPuqS4bN24s1u+8886WtccfL//KcIlqvSZyGn++pL+V9LrtzdWy5RoN+a9tL5b0e0k/7EmHAGrRNuwR8Z+SWr1Bv7jedgD0Cl+XBZIg7EAShB1IgrADSRB2IAkucZ2g6dOnt6ytXr26uO68efOK9Tlz5nTSUi1efPHFYv3uu+8u1p966qli/bPPPjvkntAbHNmBJAg7kARhB5Ig7EAShB1IgrADSRB2IIk04+znnntusb5s2bJiff78+S1rs2bN6qinunz66acta6tWrSque8cddxTre/fu7agnDB6O7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQRJpx9oULF3ZV78bWrVuL9SeffLJY37dvX7FeuuZ89+7dxXWRB0d2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUjCEVF+gn2SpIcknSjpgKShiPhX27dJuknS/1ZPXR4Rv2nzWuWNAehaRIw76/JEwj5T0syI2GT7aEkvS7pa0t9I+iQi7ppoE4Qd6L1WYZ/I/Owjkkaq+3tsb5PU7L9mAXDIDuk9u+3Zkr4naWO1aKnt12w/YPu4FusssT1se7i7VgF0o+1p/FdPtL8t6QVJKyLiUdszJH0gKST9k0ZP9W9s8xqcxgM91vF7dkmy/S1JT0p6KiLuGac+W9KTEXFmm9ch7ECPtQp729N425Z0v6RtY4NefXB30EJJW7ptEkDvTOTT+Ask/Yek1zU69CZJyyUtkjRPo6fx70r6cfVhXum1OLIDPdbVaXxdCDvQex2fxgOYHAg7kARhB5Ig7EAShB1IgrADSRB2IAnCDiRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ9HvK5g8k/c+Yx8dXywbRoPY2qH1J9NapOnv7s1aFvl7P/o2N28MRcXZjDRQMam+D2pdEb53qV2+cxgNJEHYgiabDPtTw9ksGtbdB7Uuit071pbdG37MD6J+mj+wA+oSwA0k0Enbbl9p+0/bbtm9toodWbL9r+3Xbm5uen66aQ2+n7S1jlk23/Yztt6rbcefYa6i322y/V+27zbYvb6i3k2w/Z3ub7Tds31Itb3TfFfrqy37r+3t221Mk/U7SAknbJb0kaVFEbO1rIy3YflfS2RHR+BcwbP+1pE8kPXRwai3b/yxpV0SsrP5QHhcR/zAgvd2mQ5zGu0e9tZpm/O/U4L6rc/rzTjRxZJ8v6e2IeCcivpT0K0lXNdDHwIuIDZJ2fW3xVZLWVPfXaPSXpe9a9DYQImIkIjZV9/dIOjjNeKP7rtBXXzQR9lmS/jDm8XYN1nzvIelp2y/bXtJ0M+OYcXCarer2hIb7+bq203j309emGR+YfdfJ9OfdaiLs401NM0jjf+dHxF9KukzSzdXpKibmZ5JO0egcgCOS7m6ymWqa8Uck/SQiPm6yl7HG6asv+62JsG+XdNKYx9+R9H4DfYwrIt6vbndKekyjbzsGyY6DM+hWtzsb7ucrEbEjIvZHxAFJP1eD+66aZvwRSb+MiEerxY3vu/H66td+ayLsL0k61fZ3bR8p6UeS1jXQxzfYnlp9cCLbUyX9QIM3FfU6STdU92+Q9HiDvfyRQZnGu9U042p43zU+/XlE9P1H0uUa/UT+vyX9YxM9tOhrjqRXq583mu5N0lqNntb9n0bPiBZL+lNJz0p6q7qdPkC9/ZtGp/Z+TaPBmtlQbxdo9K3ha5I2Vz+XN73vCn31Zb/xdVkgCb5BByRB2IEkCDuQBGEHkiDsQBKEHUiCsANJ/D+f1mbt6t55/AAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.imshow(imgs[0]);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0.9883)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "imgs[0,20,15]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([5, 0, 4, ..., 8, 4, 8]), torch.Size([50000]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "n,c = x_train.shape\n", "y_train, y_train.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor(0), tensor(9))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "min(y_train),max(y_train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor(0), tensor(9))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train.min(), y_train.max()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Random numbers" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Based on the Wichmann Hill algorithm used before Python 2.3." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "rnd_state = None\n", "def seed(a):\n", " global rnd_state\n", " a, x = divmod(a, 30268)\n", " a, y = divmod(a, 30306)\n", " a, z = divmod(a, 30322)\n", " rnd_state = int(x)+1, int(y)+1, int(z)+1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(4976, 20238, 499)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seed(457428938475)\n", "rnd_state" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def rand():\n", " global rnd_state\n", " x, y, z = rnd_state\n", " x = (171 * x) % 30269\n", " y = (172 * y) % 30307\n", " z = (170 * z) % 30323\n", " rnd_state = x,y,z\n", " return (x/30269 + y/30307 + z/30323) % 1.0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(0.7645251082582081, 0.7920889799553945, 0.06912886811267205)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rand(),rand(),rand()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "In parent: 0.9559050644103264\n", "In child: 0.9559050644103264\n" ] } ], "source": [ "if os.fork(): print(f'In parent: {rand()}')\n", "else:\n", " print(f'In child: {rand()}')\n", " os._exit(os.EX_OK)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "In parent: tensor([0.3242])\n", "In child: tensor([0.3242])\n" ] } ], "source": [ "if os.fork(): print(f'In parent: {torch.rand(1)}')\n", "else:\n", " print(f'In child: {torch.rand(1)}')\n", " os._exit(os.EX_OK)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.plot([rand() for _ in range(50)]);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAPHUlEQVR4nO3cf6zdd13H8efLlg0GIp29W2rb2WIq0BEJ4zonKEFrsjGMnQlLisIasqRRJ05jIh1/uD9Mk5EYg0QHaQZSIlnTjMXVH6BLEdHANu/Y2NbVuivV7rq6XkABMRm0vP3jfE2O3b3rueecey63n+cjac45n/P99vv55DbP++333vNNVSFJasP3rfQEJEmTY/QlqSFGX5IaYvQlqSFGX5IasnalJ3A+69evry1btqz0NCRpVXn44Ye/UlVT545/z0d/y5YtzMzMrPQ0JGlVSfJvC417eUeSGmL0JakhRl+SGmL0JakhRl+SGmL0JakhRl+SGmL0JakhRl+SGvI9/4lcLc2WvX+5Isf91zvetiLHhTbX3JqV+hrDhfd19kxfkhpi9CWpIV7e0Vis5H+/JQ3uvGf6ST6a5HSSJ/rGLk1yf5Knusd1fe/dlmQ2yfEk1/aNvyHJ4917H0yS8S9HkvRCBrm88zHgunPG9gJHqmobcKR7TZLtwC7gym6fO5Os6fb5ELAH2Nb9OffvlCQts/NGv6o+B3ztnOGdwIHu+QHghr7xg1X1XFWdAGaBq5NsAF5eVV+oqgI+3rePJGlChr2mf3lVnQKoqlNJLuvGNwIP9G031419p3t+7viCkuyh978CrrjiiiGnKGnc/NnN6jfuH+QudJ2+XmB8QVW1H9gPMD09veh2UquMr4Y17K9sPttdsqF7PN2NzwGb+7bbBDzTjW9aYFySNEHDRv8wsLt7vhu4r298V5KLk2yl9wPbh7pLQd9Mck33Wzs39e0jSZqQ817eSXI38BZgfZI54HbgDuBQkpuBk8CNAFV1NMkh4EngDHBLVZ3t/qpfpfebQC8BPtX9kSRN0HmjX1XvWOStHYtsvw/Yt8D4DPDaJc1OklbYhXZvJz+Ruwz8IVsb/DprNfLeO5LUkAv6TN8zMUn6/zzTl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JaojRl6SGGH1JashI0U/yW0mOJnkiyd1JXpzk0iT3J3mqe1zXt/1tSWaTHE9y7ejTlyQtxdDRT7IR+A1guqpeC6wBdgF7gSNVtQ040r0myfbu/SuB64A7k6wZbfqSpKUY9fLOWuAlSdYClwDPADuBA937B4Abuuc7gYNV9VxVnQBmgatHPL4kaQmGjn5V/Tvw+8BJ4BTw9ar6G+DyqjrVbXMKuKzbZSPwdN9fMdeNPU+SPUlmkszMz88PO0VJ0jlGubyzjt7Z+1bgh4CXJnnnC+2ywFgttGFV7a+q6aqanpqaGnaKkqRzjHJ55+eAE1U1X1XfAe4F3gg8m2QDQPd4utt+Dtjct/8mepeDJEkTMkr0TwLXJLkkSYAdwDHgMLC722Y3cF/3/DCwK8nFSbYC24CHRji+JGmJ1g67Y1U9mOQe4IvAGeARYD/wMuBQkpvpfWO4sdv+aJJDwJPd9rdU1dkR5y9JWoKhow9QVbcDt58z/By9s/6Ftt8H7BvlmJKk4fmJXElqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqiNGXpIYYfUlqyEjRT/KKJPck+ackx5L8ZJJLk9yf5KnucV3f9rclmU1yPMm1o09fkrQUo57p/yHw6ap6NfA64BiwFzhSVduAI91rkmwHdgFXAtcBdyZZM+LxJUlLMHT0k7wceDPwEYCq+nZV/RewEzjQbXYAuKF7vhM4WFXPVdUJYBa4etjjS5KWbpQz/VcC88CfJHkkyV1JXgpcXlWnALrHy7rtNwJP9+0/1409T5I9SWaSzMzPz48wRUlSv1Givxa4CvhQVb0e+BbdpZxFZIGxWmjDqtpfVdNVNT01NTXCFCVJ/UaJ/hwwV1UPdq/vofdN4NkkGwC6x9N922/u238T8MwIx5ckLdHQ0a+q/wCeTvKqbmgH8CRwGNjdje0G7uueHwZ2Jbk4yVZgG/DQsMeXJC3d2hH3fw/wiSQXAV8G3k3vG8mhJDcDJ4EbAarqaJJD9L4xnAFuqaqzIx5fkrQEI0W/qh4Fphd4a8ci2+8D9o1yTEnS8PxEriQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1xOhLUkOMviQ1ZOToJ1mT5JEkf9G9vjTJ/Ume6h7X9W17W5LZJMeTXDvqsSVJSzOOM/1bgWN9r/cCR6pqG3Cke02S7cAu4ErgOuDOJGvGcHxJ0oBGin6STcDbgLv6hncCB7rnB4Ab+sYPVtVzVXUCmAWuHuX4kqSlGfVM/wPA7wDf7Ru7vKpOAXSPl3XjG4Gn+7ab68aeJ8meJDNJZubn50ecoiTp/wwd/SQ/D5yuqocH3WWBsVpow6raX1XTVTU9NTU17BQlSedYO8K+bwJ+Icn1wIuBlyf5U+DZJBuq6lSSDcDpbvs5YHPf/puAZ0Y4viRpiYY+06+q26pqU1VtofcD2s9U1TuBw8DubrPdwH3d88PAriQXJ9kKbAMeGnrmkqQlG+VMfzF3AIeS3AycBG4EqKqjSQ4BTwJngFuq6uwyHF+StIixRL+qPgt8tnv+VWDHItvtA/aN45iSpKXzE7mS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1JCho59kc5K/TXIsydEkt3bjlya5P8lT3eO6vn1uSzKb5HiSa8exAEnS4EY50z8D/HZVvQa4BrglyXZgL3CkqrYBR7rXdO/tAq4ErgPuTLJmlMlLkpZm6OhX1amq+mL3/JvAMWAjsBM40G12ALihe74TOFhVz1XVCWAWuHrY40uSlm4s1/STbAFeDzwIXF5Vp6D3jQG4rNtsI/B0325z3dhCf9+eJDNJZubn58cxRUkSY4h+kpcBnwR+s6q+8UKbLjBWC21YVfurarqqpqempkadoiSpM1L0k7yIXvA/UVX3dsPPJtnQvb8BON2NzwGb+3bfBDwzyvElSUszym/vBPgIcKyq/qDvrcPA7u75buC+vvFdSS5OshXYBjw07PElSUu3doR93wS8C3g8yaPd2PuAO4BDSW4GTgI3AlTV0SSHgCfp/ebPLVV1doTjS5KWaOjoV9U/sPB1eoAdi+yzD9g37DElSaPxE7mS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1BCjL0kNMfqS1JCJRz/JdUmOJ5lNsnfSx5eklk00+knWAH8MvBXYDrwjyfZJzkGSWjbpM/2rgdmq+nJVfRs4COyc8BwkqVlrJ3y8jcDTfa/ngJ84d6Mke4A93cv/TnJ8yOOtB74y5L6rlWtuQ2trbm295P0jr/mHFxqcdPSzwFg9b6BqP7B/5IMlM1U1Perfs5q45ja0tubW1gvLt+ZJX96ZAzb3vd4EPDPhOUhSsyYd/X8EtiXZmuQiYBdweMJzkKRmTfTyTlWdSfLrwF8Da4CPVtXRZTzkyJeIViHX3IbW1tzaemGZ1pyq511SlyRdoPxEriQ1xOhLUkMuiOif79YO6flg9/5jSa5aiXmOywDr/eVunY8l+XyS163EPMdp0Nt3JPnxJGeTvH2S81sOg6w5yVuSPJrkaJK/m/Qcx22Af9s/kOTPk3ypW/O7V2Ke45Lko0lOJ3likffH366qWtV/6P1A+F+AVwIXAV8Ctp+zzfXAp+h9TuAa4MGVnvcyr/eNwLru+VtX83oHXXPfdp8B/gp4+0rPewJf51cATwJXdK8vW+l5T2DN7wPe3z2fAr4GXLTScx9hzW8GrgKeWOT9sbfrQjjTH+TWDjuBj1fPA8ArkmyY9ETH5LzrrarPV9V/di8foPd5iNVs0Nt3vAf4JHB6kpNbJoOs+ZeAe6vqJEBVrfZ1D7LmAr4/SYCX0Yv+mclOc3yq6nP01rCYsbfrQoj+Qrd22DjENqvFUtdyM70zhdXsvGtOshH4ReDDE5zXchrk6/yjwLokn03ycJKbJja75THImv8IeA29D3U+DtxaVd+dzPRWxNjbNenbMCyHQW7tMNDtH1aJgdeS5GfoRf+nlnVGy2+QNX8AeG9Vne2dBK56g6x5LfAGYAfwEuALSR6oqn9e7sktk0HWfC3wKPCzwI8A9yf5+6r6xjLPbaWMvV0XQvQHubXDhXT7h4HWkuTHgLuAt1bVVyc0t+UyyJqngYNd8NcD1yc5U1V/NpEZjt+g/66/UlXfAr6V5HPA64DVGv1B1vxu4I7qXfCeTXICeDXw0GSmOHFjb9eFcHlnkFs7HAZu6n4Sfg3w9ao6NemJjsl515vkCuBe4F2r+Kyv33nXXFVbq2pLVW0B7gF+bRUHHwb7d30f8NNJ1ia5hN4da49NeJ7jNMiaT9L7nw1JLgdeBXx5orOcrLG3a9Wf6dcit3ZI8ivd+x+m99sc1wOzwP/QO1tYlQZc7+8CPwjc2Z35nqlVfIfCAdd8QRlkzVV1LMmngceA7wJ3VdWCv/q3Ggz4df494GNJHqd36eO9VbVqb7mc5G7gLcD6JHPA7cCLYPna5W0YJKkhF8LlHUnSgIy+JDXE6EtSQ4y+JDXE6EtSQ4y+JDXE6EtSQ/4XLf0Ao/3fCcsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.hist([rand() for _ in range(10000)]);" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2.67 ms ± 15.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit -n 10 list(chunks([rand() for _ in range(7840)], 10))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "37.8 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit -n 10 torch.randn(784,10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Matrix multiplication" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "torch.manual_seed(1)\n", "weights = torch.randn(784,10)\n", "bias = torch.zeros(10)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m1 = x_valid[:5]\n", "m2 = weights" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([5, 784]), torch.Size([784, 10]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m1.shape,m2.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((5, 784), (784, 10))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ar,ac = m1.shape # n_rows * n_cols\n", "br,bc = m2.shape\n", "(ar,ac),(br,bc)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t1 = torch.zeros(ar, bc)\n", "t1.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(ar): # 5\n", " for j in range(bc): # 10\n", " for k in range(ac): # 784\n", " t1[i,j] += m1[i,k] * m2[k,j]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.9417, -0.6844, -7.0038, -4.0066, -2.0857, -3.3588, 3.9127,\n", " -3.4375, -11.4696, -2.1153],\n", " [ 14.5430, 5.9977, 2.8914, -4.0777, 6.5914, -14.7383, -9.2787,\n", " 2.1577, -15.2772, -2.6758],\n", " [ 2.2204, -3.2171, -4.7988, -6.0453, 14.1661, -8.9824, -4.7922,\n", " -5.4446, -20.6758, 13.5657],\n", " [ -6.7097, 8.8998, -7.4611, -7.8966, 2.6994, -4.7260, -11.0278,\n", " -12.9776, -6.4443, 3.6376],\n", " [ -2.4444, -6.4034, -2.3984, -9.0371, 11.1772, -5.7724, -8.9214,\n", " -3.7862, -8.9827, 5.2797]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t1.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.94, -0.68, -7.00, -4.01, -2.09, -3.36, 3.91, -3.44, -11.47, -2.12],\n", " [ 14.54, 6.00, 2.89, -4.08, 6.59, -14.74, -9.28, 2.16, -15.28, -2.68],\n", " [ 2.22, -3.22, -4.80, -6.05, 14.17, -8.98, -4.79, -5.44, -20.68, 13.57],\n", " [ -6.71, 8.90, -7.46, -7.90, 2.70, -4.73, -11.03, -12.98, -6.44, 3.64],\n", " [ -2.44, -6.40, -2.40, -9.04, 11.18, -5.77, -8.92, -3.79, -8.98, 5.28]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.set_printoptions(precision=2, linewidth=140, sci_mode=False)\n", "t1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "np.set_printoptions(precision=2, linewidth=140)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b):\n", " (ar,ac),(br,bc) = a.shape,b.shape\n", " c = torch.zeros(ar, bc)\n", " for i in range(ar):\n", " for j in range(bc):\n", " for k in range(ac): c[i,j] += a[i,k] * b[k,j]\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 421 ms, sys: 131 µs, total: 421 ms\n", "Wall time: 421 ms\n" ] } ], "source": [ "%time _=matmul(m1, m2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "39200" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ar*bc*ac" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numba" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from numba import njit" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@njit\n", "def dot(a,b):\n", " res = 0.\n", " for i in range(len(a)): res+=a[i]*b[i]\n", " return res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from numpy import array" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 184 ms, sys: 12 ms, total: 196 ms\n", "Wall time: 196 ms\n" ] }, { "data": { "text/plain": [ "20.0" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time dot(array([1.,2,3]),array([2.,3,4]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 15 µs, sys: 0 ns, total: 15 µs\n", "Wall time: 17.6 µs\n" ] }, { "data": { "text/plain": [ "20.0" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%time dot(array([1.,2,3]),array([2.,3,4]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now only two of our loops are running in Python, not three:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b):\n", " (ar,ac),(br,bc) = a.shape,b.shape\n", " c = torch.zeros(ar, bc)\n", " for i in range(ar):\n", " for j in range(bc): c[i,j] = dot(a[i,:], b[:,j])\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m1a,m2a = m1.numpy(),m2.numpy()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from fastcore.test import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(t1,matmul(m1a, m2a))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "236 µs ± 3.51 µs per loop (mean ± std. dev. of 7 runs, 50 loops each)\n" ] } ], "source": [ "%timeit -n 50 matmul(m1a,m2a)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Elementwise ops" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[TryAPL](https://tryapl.org/)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([10., 6., -4.]), tensor([2., 8., 7.]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a = tensor([10., 6, -4])\n", "b = tensor([2., 8, 7])\n", "a,b" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([12., 14., 3.])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a + b" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(0.67)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(a < b).float().mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 2., 3.],\n", " [4., 5., 6.],\n", " [7., 8., 9.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m = tensor([[1., 2, 3], [4,5,6], [7,8,9]]); m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Frobenius norm:\n", "\n", "$$\\| A \\|_F = \\left( \\sum_{i,j=1}^n | a_{ij} |^2 \\right)^{1/2}$$\n", "\n", "*Hint*: you don't normally need to write equations in LaTeX yourself, instead, you can click 'edit' in Wikipedia and copy the LaTeX from there (which is what I did for the above equation). Or on arxiv.org, click \"Download: Other formats\" in the top right, then \"Download source\"; rename the downloaded file to end in `.tgz` if it doesn't already, and you should find the source there, including the equations to copy and paste. This is the source LaTeX that I pasted to render the equation above:\n", "\n", "```latex\n", "$$\\| A \\|_F = \\left( \\sum_{i,j=1}^n | a_{ij} |^2 \\right)^{1/2}$$\n", "```" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(285.)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sf = (m*m).sum()\n", "sf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor(16.88)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sf.sqrt()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([7., 8., 9.]), tensor([3., 6., 9.]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[2,:],m[:,2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([7., 8., 9.])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m[2]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b):\n", " (ar,ac),(br,bc) = a.shape,b.shape\n", " c = torch.zeros(ar, bc)\n", " for i in range(ar):\n", " for j in range(bc): c[i,j] = (a[i,:] * b[:,j]).sum()\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(t1,matmul(m1, m2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "598 µs ± 4.2 µs per loop (mean ± std. dev. of 7 runs, 50 loops each)\n" ] } ], "source": [ "%timeit -n 50 _=matmul(m1, m2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b):\n", " (ar,ac),(br,bc) = a.shape,b.shape\n", " c = torch.zeros(ar, bc)\n", " for i in range(ar):\n", " for j in range(bc): c[i,j] = torch.dot(a[i,:], b[:,j])\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(t1,matmul(m1, m2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "481 µs ± 4.9 µs per loop (mean ± std. dev. of 7 runs, 50 loops each)\n" ] } ], "source": [ "%timeit -n 50 _=matmul(m1, m2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Broadcasting" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The term **broadcasting** describes how arrays with different shapes are treated during arithmetic operations.\n", "\n", "From the [Numpy Documentation](https://docs.scipy.org/doc/numpy-1.10.0/user/basics.broadcasting.html):\n", "\n", " The term broadcasting describes how numpy treats arrays with \n", " different shapes during arithmetic operations. Subject to certain \n", " constraints, the smaller array is “broadcast” across the larger \n", " array so that they have compatible shapes. Broadcasting provides a \n", " means of vectorizing array operations so that looping occurs in C\n", " instead of Python. It does this without making needless copies of \n", " data and usually leads to efficient algorithm implementations.\n", " \n", "In addition to the efficiency of broadcasting, it allows developers to write less code, which typically leads to fewer errors.\n", "\n", "*This section was adapted from [Chapter 4](http://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra/blob/master/nbs/4.%20Compressed%20Sensing%20of%20CT%20Scans%20with%20Robust%20Regression.ipynb#4.-Compressed-Sensing-of-CT-Scans-with-Robust-Regression) of the fast.ai [Computational Linear Algebra](https://github.com/fastai/numerical-linear-algebra) course.*" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Broadcasting with a scalar" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([10., 6., -4.])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([ True, True, False])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a > 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How are we able to do `a > 0`? 0 is being **broadcast** to have the same dimensions as a.\n", "\n", "For instance you can normalize our dataset by subtracting the mean (a scalar) from the entire data set (a matrix) and dividing by the standard deviation (another scalar), using broadcasting.\n", "\n", "Other examples of broadcasting with a scalar:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([11., 7., -3.])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a + 1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 2., 3.],\n", " [4., 5., 6.],\n", " [7., 8., 9.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 2., 4., 6.],\n", " [ 8., 10., 12.],\n", " [14., 16., 18.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "2*m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Broadcasting a vector to a matrix" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Although broadcasting a scalar is an idea that dates back to APL, the more powerful idea of broadcasting across higher rank tensors [comes from](https://mail.python.org/pipermail/matrix-sig/1995-November/000143.html) a little known language called [Yorick](https://software.llnl.gov/yorick-doc/manual/yorick_50.html).\n", "\n", "We can also broadcast a vector to a matrix:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([10., 20., 30.])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c = tensor([10.,20,30]); c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[1., 2., 3.],\n", " [4., 5., 6.],\n", " [7., 8., 9.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([3, 3]), torch.Size([3]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m.shape,c.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11., 22., 33.],\n", " [14., 25., 36.],\n", " [17., 28., 39.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m + c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11., 22., 33.],\n", " [14., 25., 36.],\n", " [17., 28., 39.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c + m" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "t = c.expand_as(m)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[10., 20., 30.],\n", " [10., 20., 30.],\n", " [10., 20., 30.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11., 22., 33.],\n", " [14., 25., 36.],\n", " [17., 28., 39.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m + t" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We don't really copy the rows, but it looks as if we did. In fact, the rows are given a *stride* of 0." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " 10.0\n", " 20.0\n", " 30.0\n", "[torch.storage._TypedStorage(dtype=torch.float32, device=cpu) of size 3]" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.storage()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((0, 1), torch.Size([3, 3]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "t.stride(), t.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can index with the special value [None] or use `unsqueeze()` to convert a 1-dimensional array into a 2-dimensional array (although one of those dimensions has value 1)." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[10., 20., 30.]]), tensor([[10., 20., 30.]]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.unsqueeze(0), c[None, :]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([3]), torch.Size([1, 3]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.shape, c.unsqueeze(0).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(tensor([[10.],\n", " [20.],\n", " [30.]]),\n", " tensor([[10.],\n", " [20.],\n", " [30.]]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.unsqueeze(1), c[:, None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([3]), torch.Size([3, 1]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.shape, c.unsqueeze(1).shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "You can always skip trailling ':'s. And '...' means '*all preceding dimensions*'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([1, 3]), torch.Size([3, 1]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[None].shape,c[...,None].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[10., 10., 10.],\n", " [20., 20., 20.],\n", " [30., 30., 30.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[:,None].expand_as(m)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11., 12., 13.],\n", " [24., 25., 26.],\n", " [37., 38., 39.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m + c[:,None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[11., 22., 33.],\n", " [14., 25., 36.],\n", " [17., 28., 39.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m + c[None,:]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Broadcasting Rules" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[10., 20., 30.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[None,:]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([1, 3])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[None,:].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[10.],\n", " [20.],\n", " [30.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[:,None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([3, 1])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[:,None].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[100., 200., 300.],\n", " [200., 400., 600.],\n", " [300., 600., 900.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[None,:] * c[:,None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[False, True, True],\n", " [False, False, True],\n", " [False, False, False]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c[None] > c[:,None]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 1., 4., 9.],\n", " [16., 25., 36.],\n", " [49., 64., 81.]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m*m" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "When operating on two arrays/tensors, Numpy/PyTorch compares their shapes element-wise. It starts with the **trailing dimensions**, and works its way forward. Two dimensions are **compatible** when\n", "\n", "- they are equal, or\n", "- one of them is 1, in which case that dimension is broadcasted to make it the same size\n", "\n", "Arrays do not need to have the same number of dimensions. For example, if you have a `256*256*3` array of RGB values, and you want to scale each color in the image by a different value, you can multiply the image by a one-dimensional array with 3 values. Lining up the sizes of the trailing axes of these arrays according to the broadcast rules, shows that they are compatible:\n", "\n", " Image (3d array): 256 x 256 x 3\n", " Scale (1d array): 3\n", " Result (3d array): 256 x 256 x 3\n", "\n", "The [numpy documentation](https://docs.scipy.org/doc/numpy-1.13.0/user/basics.broadcasting.html#general-broadcasting-rules) includes several examples of what dimensions can and can not be broadcast together." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Matmul with broadcasting" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([784]), torch.Size([784, 10]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digit = m1[0]\n", "digit.shape,m2.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([784, 1])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digit[:,None].shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([784, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "digit[:,None].expand_as(m2).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([784, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(digit[:,None]*m2).shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b):\n", " (ar,ac),(br,bc) = a.shape,b.shape\n", " c = torch.zeros(ar, bc)\n", " for i in range(ar):\n", "# c[i,j] = (a[i,:] * b[:,j]).sum() # previous version\n", " c[i] = (a[i,:,None] * b).sum(dim=0) # broadcast version\n", " return c" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(t1,matmul(m1, m2))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "70.1 µs ± 1.97 µs per loop (mean ± std. dev. of 7 runs, 50 loops each)\n" ] } ], "source": [ "%timeit -n 50 _=matmul(m1, m2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Our time has gone from ~500ms to <0.1ms, an over 5000x improvement! We can run on the whole dataset now." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[ 0.96, -2.96, -2.11, ..., -15.09, -17.69, 0.60],\n", " [ 6.89, -0.34, 0.79, ..., -17.13, -25.36, 16.23],\n", " [-10.18, 7.38, 4.13, ..., -6.73, -6.79, -1.58],\n", " ...,\n", " [ 7.40, 7.64, -3.50, ..., -1.02, -16.22, 2.07],\n", " [ 3.25, 9.52, -9.37, ..., 2.98, -19.58, -1.96],\n", " [ 15.70, 4.12, -5.62, ..., 8.08, -12.21, 0.42]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tr = matmul(x_train, weights)\n", "tr" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([50000, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tr.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPU times: user 6.59 s, sys: 200 ms, total: 6.79 s\n", "Wall time: 663 ms\n" ] } ], "source": [ "%time _=matmul(x_train, weights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Einstein summation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "[Einstein summation](https://ajcr.net/Basic-guide-to-einsum/) ([`einsum`](https://numpy.org/doc/stable/reference/generated/numpy.einsum.html)) is a compact representation for combining products and sums in a general way. The key rules are:\n", "\n", "- Repeating letters between input arrays means that values along those axes will be multiplied together.\n", "- Omitting a letter from the output means that values along that axis will be summed." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(torch.Size([5, 784]), torch.Size([784, 10]))" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "m1.shape,m2.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "torch.Size([5, 784, 10])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# c[i,j] += a[i,k] * b[k,j]\n", "# c[i,j] = (a[i,:] * b[:,j]).sum()\n", "mr = torch.einsum('ik,kj->ikj', m1, m2)\n", "mr.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.94, -0.68, -7.00, -4.01, -2.09, -3.36, 3.91, -3.44, -11.47, -2.12],\n", " [ 14.54, 6.00, 2.89, -4.08, 6.59, -14.74, -9.28, 2.16, -15.28, -2.68],\n", " [ 2.22, -3.22, -4.80, -6.05, 14.17, -8.98, -4.79, -5.44, -20.68, 13.57],\n", " [ -6.71, 8.90, -7.46, -7.90, 2.70, -4.73, -11.03, -12.98, -6.44, 3.64],\n", " [ -2.44, -6.40, -2.40, -9.04, 11.18, -5.77, -8.92, -3.79, -8.98, 5.28]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mr.sum(1)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.94, -0.68, -7.00, -4.01, -2.09, -3.36, 3.91, -3.44, -11.47, -2.12],\n", " [ 14.54, 6.00, 2.89, -4.08, 6.59, -14.74, -9.28, 2.16, -15.28, -2.68],\n", " [ 2.22, -3.22, -4.80, -6.05, 14.17, -8.98, -4.79, -5.44, -20.68, 13.57],\n", " [ -6.71, 8.90, -7.46, -7.90, 2.70, -4.73, -11.03, -12.98, -6.44, 3.64],\n", " [ -2.44, -6.40, -2.40, -9.04, 11.18, -5.77, -8.92, -3.79, -8.98, 5.28]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "torch.einsum('ik,kj->ij', m1, m2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(a,b): return torch.einsum('ik,kj->ij', a, b)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(tr, matmul(x_train, weights), eps=1e-3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "15.1 ms ± 176 µs per loop (mean ± std. dev. of 7 runs, 5 loops each)\n" ] } ], "source": [ "%timeit -n 5 _=matmul(x_train, weights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## pytorch op" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can use pytorch's function or operator directly for matrix multiplication." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "test_close(tr, x_train@weights, eps=1e-3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "15.2 ms ± 96.2 µs per loop (mean ± std. dev. of 7 runs, 5 loops each)\n" ] } ], "source": [ "%timeit -n 5 _=torch.matmul(x_train, weights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CUDA" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(grid, a,b,c):\n", " i,j = grid\n", " if i < c.shape[0] and j < c.shape[1]:\n", " tmp = 0.\n", " for k in range(a.shape[1]): tmp += a[i, k] * b[k, j]\n", " c[i,j] = tmp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.94, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],\n", " [ 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],\n", " [ 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],\n", " [ 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00],\n", " [ 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = torch.zeros(ar, bc)\n", "matmul((0,0), m1, m2, res)\n", "res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def launch_kernel(kernel, grid_x, grid_y, *args, **kwargs):\n", " for i in range(grid_x):\n", " for j in range(grid_y): kernel((i,j), *args, **kwargs)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "tensor([[-10.94, -0.68, -7.00, -4.01, -2.09, -3.36, 3.91, -3.44, -11.47, -2.12],\n", " [ 14.54, 6.00, 2.89, -4.08, 6.59, -14.74, -9.28, 2.16, -15.28, -2.68],\n", " [ 2.22, -3.22, -4.80, -6.05, 14.17, -8.98, -4.79, -5.44, -20.68, 13.57],\n", " [ -6.71, 8.90, -7.46, -7.90, 2.70, -4.73, -11.03, -12.98, -6.44, 3.64],\n", " [ -2.44, -6.40, -2.40, -9.04, 11.18, -5.77, -8.92, -3.79, -8.98, 5.28]])" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "res = torch.zeros(ar, bc)\n", "launch_kernel(matmul, ar, bc, m1, m2, res)\n", "res" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from numba import cuda" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def matmul(grid, a,b,c):\n", " i,j = grid\n", " if i < c.shape[0] and j < c.shape[1]:\n", " tmp = 0.\n", " for k in range(a.shape[1]): tmp += a[i, k] * b[k, j]\n", " c[i,j] = tmp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "@cuda.jit\n", "def matmul(a,b,c):\n", " i, j = cuda.grid(2)\n", " if i < c.shape[0] and j < c.shape[1]:\n", " tmp = 0.\n", " for k in range(a.shape[1]): tmp += a[i, k] * b[k, j]\n", " c[i,j] = tmp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "r = np.zeros(tr.shape)\n", "m1g,m2g,rg = map(cuda.to_device, (x_train,weights,r))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(50000, 10)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "r.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(3125, 1)" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TPB = 16\n", "rr,rc = r.shape\n", "blockspergrid = (math.ceil(rr / TPB), math.ceil(rc / TPB))\n", "blockspergrid" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "matmul[blockspergrid, (TPB,TPB)](m1g,m2g,rg)\n", "r = rg.copy_to_host()\n", "test_close(tr, r, eps=1e-3)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3.61 ms ± 708 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%%timeit -n 10\n", "matmul[blockspergrid, (TPB,TPB)](m1g,m2g,rg)\n", "r = rg.copy_to_host()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "m1c,m2c = x_train.cuda(),weights.cuda()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "r=(m1c@m2c).cpu()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "458 µs ± 93.1 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" ] } ], "source": [ "%timeit -n 10 r=(m1c@m2c).cpu()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Our broadcasting version was >500ms, and our CUDA version is around 0.5ms, which is another 1000x improvement compared to broadcasting. So our total speedup is around 5 million times!" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }