{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# k-means Clustering from scratch" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.1.3)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.1.0)\n", "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.1)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.6)\n", "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib) (1.14.0)\n", "Requirement already satisfied: setuptools in /usr/local/lib/python3.6/dist-packages (from kiwisolver>=1.0.1->matplotlib) (45.2.0)\n" ] } ], "source": [ "!pip3 install matplotlib" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import csv\n", "from random import sample\n", "from statistics import mean\n", "from math import sqrt, inf\n", "from pathlib import Path\n", "from collections import defaultdict\n", "from typing import List, Dict, Tuple\n", "from matplotlib import pyplot as plt" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Ensure that we have a `data` directory we use to store downloaded data\n", "!mkdir -p data\n", "data_dir: Path = Path('data')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "File ‘data/iris.data’ already there; not retrieving.\n", "\n" ] } ], "source": [ "# Downloading the Iris data set\n", "!wget -nc -P data https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6.9,3.1,5.1,2.3,Iris-virginica\n", "5.8,2.7,5.1,1.9,Iris-virginica\n", "6.8,3.2,5.9,2.3,Iris-virginica\n", "6.7,3.3,5.7,2.5,Iris-virginica\n", "6.7,3.0,5.2,2.3,Iris-virginica\n", "6.3,2.5,5.0,1.9,Iris-virginica\n", "6.5,3.0,5.2,2.0,Iris-virginica\n", "6.2,3.4,5.4,2.3,Iris-virginica\n", "5.9,3.0,5.1,1.8,Iris-virginica\n", "\n" ] } ], "source": [ "# The structure of the Iris data set is as follows:\n", "# Sepal Length, Sepal Width, Petal Length, Petal Width, Class\n", "!tail data/iris.data" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Create the Python path pointing to the `iris.data` file\n", "data_path: Path = data_dir / 'iris.data'" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# The list in which we store the \"petal length\" and \"sepal width\" as vectors (a vector is a list of floats)\n", "data_points: List[List[float]] = []\n", "\n", "# Indexes according to the data set description\n", "petal_length_idx: int = 2\n", "sepal_width_idx: int = 1\n", "\n", "# Read the `iris.data` file and parse it line-by-line\n", "with open(data_path) as csv_file:\n", " reader = csv.reader(csv_file, delimiter=',')\n", " for row in reader:\n", " # Check if the given row is a valid iris data point\n", " if len(row) == 5:\n", " label: str = row[-1]\n", " x1: float = float(row[petal_length_idx])\n", " x2: float = float(row[sepal_width_idx])\n", " data_points.append([x1, x2])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "150" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(data_points)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[1.4, 3.5], [1.4, 3.0], [1.3, 3.2], [1.5, 3.1], [1.4, 3.6]]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_points[:5]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEHCAYAAACjh0HiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nO3dfbQddX3v8fcnh1RPgZJSIsUDITYit14DBE4FGq+iXqpWFkYeigq9Uruktip4vdXVKFe9aIuaSnVpfYigF4SLVZ4udlktV0QBAT2JSJQIikUxtRJJgaCphuR7/9hzkpN99sPsPTN7Zs98Xmtl5ZzZ8/CbvWf298zM9/v7KSIwM7PmWlB2A8zMrFwOBGZmDedAYGbWcA4EZmYN50BgZtZwDgRmZg23V9EbkDQBzACbIuKkttfOBtYAm5JJH4qIi3ut74ADDoilS5cW0FIzs/pat27dzyJicafXCg8EwHnARuA3urz+DxHxurQrW7p0KTMzM7k0zMysKST9sNtrhd4aknQw8GKg51/5ZmZWnqKfEbwfeDOws8c8p0q6S9JVkg7pNIOkcyTNSJrZvHlzIQ01M2uqwgKBpJOAByNiXY/ZPgcsjYgjgBuASzvNFBFrI2I6IqYXL+54i8vMzIZU5BXBSuBkSfcDnwaeJ+nyuTNExEMR8cvk14uBYwpsj5mZdVBYIIiI1RFxcEQsBV4G3BgRZ82dR9JBc349mdZDZTMzG6FRZA3tQdIFwExEXA+cK+lk4HFgC3D2qNtjZtZ0GrduqKenp6PK6aPXfXMTa754D//68DaevGiSN73gcFatmCq7WWbWcJLWRcR0p9dGfkVQZ9d9cxOrr9nAtu07ANj08DZWX7MBwMHAzCrLXUzkaM0X79kVBGZt276DNV+8p6QWmZn150CQo399eNtA083MqsCBIEdPXjQ50HQzsypwIMjRm15wOJMLJ/aYNrlwgje94PCSWmRm1p8fFudo9oGws4bMbJw4EORs1Yopf/Gb2VjxrSEzs4ZzIDAzazgHAjOzhnMgMDNrOAcCM7OGcyAwM2s4BwIzs4ZzIDAzazgHAjOzhnNl8ZA8AI2Z1YUDwRA8AI2Z1YlvDQ3BA9CYWZ04EAzBA9CYWZ04EAzBA9CYWZ04EAzBA9CYWZ34YfEQPACNmdWJA8GQPACNmdWFA0HOXF9gZuPGgSBHri8ws3Hkh8U5cn2BmY0jB4Icub7AzMaRA0GOXF9gZuPIgSBHri8ws3Hkh8U5cn2BmY0jB4Kcub7AzMZN4YFA0gQwA2yKiJPaXnsCcBlwDPAQcEZE3F90m3pxHYCZNc0onhGcB2zs8tqfAv8eEU8F/g54zwja09VsHcCmh7cR7K4DuO6bm8pslplZoQoNBJIOBl4MXNxllpcAlyY/XwU8X5KKbFMvrgMwsyYq+org/cCbgZ1dXp8CHgCIiMeBR4Dfap9J0jmSZiTNbN68uai2ug7AzBqpsEAg6STgwYhYl3VdEbE2IqYjYnrx4sU5tK4z1wGYWRMVeUWwEjhZ0v3Ap4HnSbq8bZ5NwCEAkvYC9qP10LgUrgMwsyYqLBBExOqIODgilgIvA26MiLPaZrseeGXy82nJPFFUm/pZtWKKC09ZztSiSQRMLZrkwlOWO2vIzGpt5HUEki4AZiLieuAS4FOSvg9soRUwSuU6ADNrmpEEgoi4Cbgp+fltc6b/B3D6KNqQRRG1Ba5XMLOqcGVxH0WMMeBxC8ysStzpXB9F1Ba4XsHMqsSBoI8iagtcr2BmVeJA0EcRtQWuVzCzKnEg6KOI2gLXK5hZlfhhcR9FjDHgcQvMrEpUYv3WUKanp2NmZqbsZpiZjRVJ6yJiutNrviLIWaf6AJj/13+naZ2uCFxvYFZdWc7PKp3bviLIUXt9AMDCBQLB9h273+eFE4KA7Tt3T5tcODGvO4tO6+s0n5mNXpbzs4xzu9cVgR8W56hTfcD2nbFHEIBWUJgbBKBzHYHrDcyqK8v5WbVz24EgR1nrANqXd72BWXVlOT+rdm47EOQoax1A+/KuNzCrriznZ9XObQeCHHWqD1i4QK1nAnOnTaj17GCOTnUErjcwq64s52fVzm1nDeWoW31A2mntD4lcb2BWXVnOz6qd284aMjNrANcRjNCZH7+NW+/bsuv3lcv254pXH19ii8zMevMzghy1BwGAW+/bwpkfv62kFpmZ9edAkKP2INBvuplZFTgQmJk1nAOBmVnDORDkaOWy/QeabmZWBQ4EObri1cfP+9J31pCZVZ3TR3PmL30zGze+IjAza7jGXBHkPQhElQaVMLPh+DxuaUQgaB8EYtPD21h9zQaAoT70vNdnZqPn83i3RtwaynsQiKoNKmFmg/N5vFsjAkHeg0BUbVAJMxucz+PdGhEI8h4EomqDSpjZ4Hwe79aIQJD3IBBVG1TCzAbn83i3RjwsznsQiKoNKmFmg/N5vFuqgWkkTQGHMidwRMRXC2xXVx6YxsxscJkGppH0HuAM4G5g9hF7AD0DgaQnJvM8IdnOVRHx9rZ5zgbWAJuSSR+KiIv7tWnUzr9uA1fe8QA7IpiQePmxhwDMm/auVcs75iWD/+ows+rqe0Ug6R7giIj45UArlgTsHRGPSVoI3AKcFxG3z5nnbGA6Il6Xdr2jviI4/7oNXH77j1LNu3LZ/qz/0SN7pKQtXCAQbN+x+32eXDjBhacsdzAws5HpdUWQ5mHxD4CFg240Wh5Lfl2Y/BuvAZJp/dWf1q33bZmXl7x9Z+wRBKC5ucpmVk1dbw1J+iCtL+5fAHdK+hKw66ogIs7tt3JJE8A64KnA30fEHR1mO1XSs4F7gf8eEfO+eSWdA5wDsGTJkn6bzdWOFM9QhtHEXGUzq6Zezwhm77+sA65vey3Vt2NE7ACOkrQIuFbSMyLi23Nm+RxwZUT8UtKfAZcCz+uwnrXAWmjdGkqz7bxMSIUEgybmKptZNXW9NRQRl0bEpcCi2Z/nTPvNQTYSEQ8DXwZe2Db9oTnPHi4Gjhms+cWbfTCcxspl+8/LS164QCyc0B7TmpqrbGbVlOYZwSs7TDu730KSFidXAkiaBE4Evts2z0Fzfj0Z2JiiPSP1rlXLOeu4JUyo9WU+IXHWcUs6Trvi1cdz4SnLmVo0iYCpRZOsOf1I1px25B7T/KDYzKqka9aQpJcDrwCeBdw856V9gZ0R8fyeK5aOoHWrZ4JWwPlMRFwg6QJgJiKul3QhrQDwOLAF+POI+G7XleI6AjOzYQxbR/A14CfAAcD75kzfCtzVb6MRcRewosP0t835eTWwut+6qqhTbcG7Vi3nzI/fxq33bdk138pl+/OUxfvMm3f60P1T1Ra4v3TrZFyPi7TtHtf9G1epKourpApXBN1qCw7c99f46dZfpVrHAsHOOW99p9qC9v7Su81nzTKux0Xado/r/lXdUHUEkrZKerTbv+KaW33dagvSBgHYMwhA59oC95dunYzrcZG23eO6f+Os662hiNgXQNI7ad0i+hQg4EzgoG7LNcGoagvcX7p1Mq7HRdp2j+v+jbM0WUMnR8SHI2JrRDwaER8BXlJ0w6psNlsob+21Be4v3ToZ1+MibbvHdf/GWZpA8HNJZ0qakLRA0pnAz4tuWJV1qy04cN9fS72OBW2xpFNtgftLt07G9bhI2+5x3b9xliYQvAL4I+Cnyb/Tk2mN1a224I63nsjKZfvvMe/KZft3nPeiPzqqb23BqhVT8+oS/MDMxvW4SNvucd2/ceasITOzBhiqjkDSmyPivXM6n9tDmk7n6ixrnrPzpG0U6nacdavfSaNu70WeehWUzXb34D+/27TnOW96eBurr9kAkOrAyrq8WRp1O87a63d2ROz6vV8wqNt7kbdezwgekKT2DufmdDzXWFnznJ0nbaNQt+OsW/1OmjFD6vZe5K3XFcHFwO9IWkeru4lbgdsiYutIWlZhWfOcnSdto1C346xb/U6aup66vRd569UN9TRwMPDXtAakORf4vqRvSfrwiNpXSVnznJ0nbaNQt+OsW/1Omrqeur0XeeuZPhoRv4iIm4APAH8H/D2wN23jCjRN1jxn50nbKNTtOOtWv5NmzJC6vRd565U19Arg94GjaF0RfAO4A3hWRPzbaJpXTbMPl4bNQMi6vFkadTvOZh8ID5M1VLf3Im+9xiPYCtwDfBT4akTcO8qGdeM6AjOzwQ07HsEi4EhaVwXvkHQ4rc7nbqP10PjG3FtaAZ1yjWd+uGXo8QS6rdN/iVgWWfr1h2x/GZc5pkCZ+11nqSuLJR1Iq3uJNwBPiYiJPosUosgrgk79oLePGzBrYoHYMeeFbv2lu291y1uWfv0XLhAItu/of+zmve2sx32m/Z4QBGxPcc7W1bDjERwh6TWSLpP0fVrPCJ4FfBA4tpimlqtTrnGnIADsEQSge06y85ctb1n69d++M/YIAt2WLWLbWY/7TPu9I/YIAnm0p0563Rr638AtwD8B50fE/CG5aiZrTnGn5Z2/bHnL2q//IOvMe9tZjvsy97vuetURHB0R50bElU0IApA9p7jT8s5ftrxl7dd/kHXmve0sx32Z+113abqhboxOucbt4wbMmmh7oVtOsvOXLW9Z+vVfuECt++V9li1i21mP+0z7PaHW85Ec21MnvW4NNU63XOMsWUPOX7a8pT2mus2XZtmitp3luC9zv+vO4xGYmTXAsOMRfI4O4xDMioiTc2hb7Zx40U1878HdI3ke9qS9ee1zD/NfIja0LPn4ReT8590e8F/vZetVWfycXgtGxFcKaVEfVb4iaA8C3TQtf9mGlyUfv4ic/7zb0ym/P2utg3U2VB1BRHyl17/imju+0gQBcP6ypZclH7+InP+829Mpvz9rrYMNru/DYkmHARcCTweeODs9In6nwHbVnvOXLY0s+fhF5PwX0Z60fM4UJ0366CeBjwCPA88FLgMuL7JRTeD8ZUsjSz5+ETn/RbQnLZ8zxUkTCCYj4ku0nif8MCLeAby42GaNp8OetHeq+Zy/bGllyccvIuc/7/Z0yu/PWutgg0sTCH4paQHwPUmvk/RSYJ+C2zWWbnjjCfOCwWFP2pv3n3EUU4smETC1aNIPvSy1VSumuPCU5UMdP2mXHWQbebdnzWlHsub0I/ecdvqRrDntSJ8zI9S3jkDS7wEbaXVL/U5gP+C9EXF78c2br8pZQ2ZmVTXseAQARMQ3kpUsAM5NO3i9pCcCXwWekGznqoh4e9s8T6D1zOEY4CHgjIi4P83685Alx/qzMz/i1vu27Jpn5bL9ueLVx2fajtVT2jEuuo20lfcYGYP01Z923kH2p935120Yetluy2d5L4o4N6v+HZDmimCa1gPjfZNJjwCvioh1fZYTsHdEPCZpIa2eTM+beyUh6S+AIyLiNZJeBrw0Is7otd68rgiy5FiLzpV2nYKBxyNotkHGuDjruCXzvgDzHiNjkL76Tz1miqvXbeo7nkH7dnvtT7vzr9vA5bfP79MyzbK9ll8A7Jzz+6jGTOikKt8BQ9URzPEJ4C8iYmlELAVeSysw9BQtjyW/Lkz+tR8tLwEuTX6+Cnh+EkAKlyXHulvonHuFMOh2rJ4GGePiyjseyLR8mjEyBumr/8o7Hkg1nkGnIACd9yftPGmW7TXfzrbfRzVmQifj8B2QJhDsiIibZ3+JiFtopZL2JWlC0p3Ag8ANEXFH2yxTwAPJeh+ndbXxWx3Wc46kGUkzmzdvTrPpvoro2zzLdqyeBvmcd3S4Os/7+MvankGkWb7bPGm3PUgbRzFmwiDrq9J3QJpA8BVJH5N0gqTnSPowcJOkoyUd3WvBiNgREUcBBwPPlPSMYRoZEWsjYjoiphcvXjzMKuYpom/zLNuxehrkc57ocDGc9/GXtT2DSLN8t3nSbnuQNo5izIRB1lel74A0geBI4GnA24F3AL8LrADeB/xtmo1ExMPAl4EXtr20CTgEQNJetDKSHkqzzqyy5Fh3O/RWLtt/6O1YPQ0yxsXLjz0k0/JpxsgYpK/+lx97SKrxDNq3O6vT/qSdJ82yveZr/2Ib1ZgJnYzDd0CarKHnDrNiSYuB7RHxsKRJ4ETgPW2zXQ+8ErgNOA24MUbUL3bWvs3TZg15PIJmG2SMi04PR/MeI2PQvvo7rTPL/rSbnWfYrKFuy2d5L/I+N8fhOyBN1tCBwN8AT46IF0l6OnB8RFzSZ7kjaD0InqAVoD8TERdIugCYiYjrkxTTT9G6wtgCvCwiftBrva4jMDMbXKY6AlqD2H8SeGvy+73APwA9A0FE3EXrC759+tvm/PwfwOkp2mBmZgVJEwgOiIjPSFoNreweSTv6LVR3VS8QsXTK+hy7bTdtcVXdB3ip2vlVtfbkLc2toZuAU2mlfx4t6TjgPRHRc+CaolTh1lBVCkQsm7I+x27bPXrJfh1rUdqLq+o+wEvVzq+qtWdYWQvK3kjroe4ySbfS6hLi9Tm2b+yMQ4GI9VfW59htu52CAMwvmqr7AC9VO7+q1p4ipMkaWp8MW3k4rczJeyJie+Etq7BxKBCx/sr6HAddf3vRVN0HeKna+VW19hSh6xWBpN+T9Nuwq+r3GOCvgfdJmp8w3yDjUCBi/ZX1OQ66/vaiqboP8FK186tq7SlCr1tDHwN+BSDp2cC7ad0WegRYW3zTqmscCkSsv7I+x27b7VSQCPOLpuo+wEvVzq+qtacIvW4NTUTE7E3LM4C1EXE1cHXSf1BjjUOBiPVX1ufYa7tpsoYGKQorY/+yqtr5VbX2FKFr1pCkbwNHJemi3wXOiYivzr4WEUP1G5RVFbKGzMzGzbAFZVfS6nDuZ8A24OZkZU+ldXvIzNoMMujLKLbTaVqWQW0g+0AyaYxiG1BufUCVahN61hEkNQMHAf8cET9Ppj0N2Cci1o+miXvyFYFVVcf8/gJy+bPUEbQP2LJrettgN93amHUgmTRGsQ0otz6gjG0PXUcQEbdHxLWzQSCZdm9ZQcCsyjrm9xeQy5+ljqBTEID5g910a2PWgWTSGMU2oNz6gKrVJqQpKDOzFAbJK8+Sgz6q/PVO28k6kEwao9gGlFsfULXaBAcCs5wMkleeJQd9VPnrnbaTdSCZNEaxDSi3PqBqtQkOBGY56ZjfX0Auf5Y6gm4nfPvYMt3amHUgmTRGsQ0otz6garUJaXofNbMUBh30ZRTb6TQtS9ZQ1oFk0hjFNqDc+oCq1Sb07X20apw1ZGY2uKwD05hZB3mPHTAu/e2f+fHbUg3TWqU8eevNVwRmQ0ib6z6q2oIsBslpbw8Cs9qDQV368K+TrOMRmFmbtLnuo6otyGKQnPZuYya0T69anrz15kBgNoS0ue6jqi3Iooic9qrlyVtvDgRmQ0ib6z6q2oIsishpr1qevPXmQGA2hLS57qOqLchikJz2bmMmtE+vWp689eZAYDaEd61azlnHLdl1BTAhdewUbdWKKS48ZTlTiyYRMLVokjWnH8ma047cY1qZD1E7tbFbe6549fHzvvQ7ZQ0Nsk4rn7OGzMwawHUE1nijymkvYjtNzMdv4j53M4r3woHAaq89p33Tw9tYfc0GgFxPqCK2M6q2V0kT97mbUb0XfkZgtTeqnPYittPEfPwm7nM3o3ovHAis9kaV0+58/Hw0cZ+7GdV74UBgtTeqnHbn4+ejifvczajeCwcCq71R5bQXsZ0m5uM3cZ+7GdV74YfFVnuj6vu9iO1Urd/6UWjiPnczqvfCdQRmZg1QSh2BpEOAy4ADgQDWRsQH2uY5Afi/wL8kk66JiAuKapNV07jmjJc5zkDe71m39aUdc6FM43r8VElhVwSSDgIOioj1kvYF1gGrIuLuOfOcAPxlRJyUdr2+IqiXce23vsxxBvJ+z7qt7+gl+3XsdrpTVxplGdfjpwyljEcQET+JiPXJz1uBjYA/GdvDuOaMlznOQN7vWbf1dRt7oNtYDGUY1+OnakaSNSRpKbACuKPDy8dL+pakf5L0n7ssf46kGUkzmzdvLrClNmrjmjNe5jgDeb9ngy7XbSyGMozr8VM1hQcCSfsAVwNviIhH215eDxwaEUcCHwSu67SOiFgbEdMRMb148eJiG2wjNa4542WOM5D3ezboct3GYijDuB4/VVNoIJC0kFYQuCIirml/PSIejYjHkp8/DyyUdECRbbJqGdec8TLHGcj7Peu2vm5jD3Qbi6EM43r8VE2RWUMCLgE2RsRFXeb5beCnERGSnkkrMD1UVJusesY1Z7xbuztNq3q9Qq/1VT1raFyPn6opMmvoWcDNwAZgZzL5LcASgIj4qKTXAX8OPA5sA94YEV/rtV5nDZmZDa6UOoKIuAXoeTMxIj4EfKioNlj9jar/f/BfnVZf7mLCxtao+v9/01Xfgmilh+a1HbMqcadzNrZG1f//9h2xKwjktR2zKnEgsLE1yv7/896OWZU4ENjYGmX//3lvx6xKHAhsbI2q//+FE2r1I5TjdsyqxA+LbWyNsv//vLdjViUej8DMrAFKqSOoE/d3Xl1V+2xGVdfg48/y5EDQRxG56paPqn02o6pr8PFnefPD4j7c33l1Ve2zGVVdg48/y5sDQR/u77y6qvbZjLKuwcef5cmBoA/3d15dVftsRlnX4OPP8uRA0If7O6+uqn02o6pr8PFnefPD4j7c33l1Ve2zGWVdg48/y5PrCMzMGqBXHYFvDZmZNZxvDZnlaFTFX3XbjpXLgcAsJ6Mq/qrbdqx8vjVklpNRFX/VbTtWPgcCs5yMqvirbtux8jkQmOVkVMVfdduOlc+BwCwnoyr+qtt2rHx+WGyWk1EVf9VtO1Y+F5SZmTWAC8rMzKwrBwIzs4ZzIDAzazgHAjOzhnMgMDNrOAcCM7OGcyAwM2s4BwIzs4YrrLJY0iHAZcCBQABrI+IDbfMI+ADwh8AvgLMjYn1RbbLmcr/6Zt0V2cXE48D/iIj1kvYF1km6ISLunjPPi4DDkn/HAh9J/jfLjfvVN+utsFtDEfGT2b/uI2IrsBFoP+teAlwWLbcDiyQdVFSbrJncr75ZbyN5RiBpKbACuKPtpSnggTm//5j5wQJJ50iakTSzefPmopppNeV+9c16KzwQSNoHuBp4Q0Q8Osw6ImJtRExHxPTixYvzbaDVnvvVN+ut0EAgaSGtIHBFRFzTYZZNwCFzfj84mWaWG/erb9ZbYYEgyQi6BNgYERd1me164L+p5TjgkYj4SVFtsmZatWKKC09ZztSiSQRMLZrkwlOW+0GxWaLIrKGVwB8DGyTdmUx7C7AEICI+CnyeVuro92mlj/5Jge2xBlu1Yspf/GZdFBYIIuIWQH3mCeC1RbXBzMz6c2WxmVnDORCYmTWcA4GZWcM5EJiZNZwDgZlZw6mVuDM+JG0GfphhFQcAP8upOWWr075AvfanTvsC9dqfpu7LoRHRsWuGsQsEWUmaiYjpstuRhzrtC9Rrf+q0L1Cv/fG+zOdbQ2ZmDedAYGbWcE0MBGvLbkCO6rQvUK/9qdO+QL32x/vSpnHPCMzMbE9NvCIwM7M5HAjMzBquMYFA0ickPSjp22W3JStJh0j6sqS7JX1H0nllt2lYkp4o6euSvpXsy/8qu01ZSZqQ9E1J/1h2W7KSdL+kDZLulDRTdnuykrRI0lWSvitpo6Tjy27TMCQdnnwms/8elfSGodfXlGcEkp4NPAZcFhHPKLs9WUg6CDgoItZL2hdYB6yKiLtLbtrAkgGM9o6Ix5IR7W4BzouI20tu2tAkvRGYBn4jIk4quz1ZSLofmI6IWhRgSboUuDkiLpb0a8CvR8TDZbcrC0kTtEZ2PDYihiq2bcwVQUR8FdhSdjvyEBE/iYj1yc9bgY3AWI66Ei2PJb8uTP6N7V8nkg4GXgxcXHZbbE+S9gOeTWvkRCLiV+MeBBLPB+4bNghAgwJBXUlaCqwA7ii3JcNLbqXcCTwI3BARY7svwPuBNwM7y25ITgL4Z0nrJJ1TdmMyegqwGfhkcuvuYkl7l92oHLwMuDLLChwIxpikfYCrgTdExKNlt2dYEbEjIo4CDgaeKWksb91JOgl4MCLWld2WHD0rIo4GXgS8NrnFOq72Ao4GPhIRK4CfA39VbpOySW5vnQx8Nst6HAjGVHI//Wrgioi4puz25CG5TP8y8MKy2zKklcDJyX31TwPPk3R5uU3KJiI2Jf8/CFwLPLPcFmXyY+DHc644r6IVGMbZi4D1EfHTLCtxIBhDyQPWS4CNEXFR2e3JQtJiSYuSnyeBE4Hvltuq4UTE6og4OCKW0rpcvzEiziq5WUOTtHeSjEByC+UPgLHNuouIfwMekHR4Mun5wNglWLR5ORlvC0GBg9dXjaQrgROAAyT9GHh7RFxSbquGthL4Y2BDcm8d4C0R8fkS2zSsg4BLk8yHBcBnImLs0y5r4kDg2tbfHewF/J+I+EK5Tcrs9cAVyS2VHwB/UnJ7hpYE5xOBP8u8rqakj5qZWWe+NWRm1nAOBGZmDedAYGbWcA4EZmYN50BgZtZwDgRWO5J2JD0yflvSZyX9ep/535JyvfdLOiDt9LxIWiXp6XN+v0lSLQZft2pwILA62hYRRyW9zP4KeE2f+VMFghKtAp7edy6zITkQWN3dDDwVQNJZydgHd0r6WNLZ3buByWTaFcl81yWdrH1n2I7WkqrcTyTb+6aklyTTz5Z0jaQvSPqepPfOWeZPJd2bLPNxSR+S9Pu0+pJZk7RxWTL76cl890r6LxneH7PmVBZb80jai1ZfLF+Q9LvAGcDKiNgu6cPAmRHxV5Jel3R6N+tVEbEl6fLiG5KujoiHBtz8W2l1MfGqpAuNr0v6f8lrR9HqMfaXwD2SPgjsAP4nrb5vtgI3At+KiK9Juh74x4i4KtkvgL0i4pmS/hB4O/BfB2yf2S4OBFZHk3O63riZVr9M5wDH0PpiB5ik1e11J+dKemny8yHAYcCggeAPaHVA95fJ708EliQ/fykiHgGQdDdwKHAA8JWI2JJM/yzwtB7rn+1ocB2wdMC2me3BgcDqaFvbX/izHfVdGhGrey0o6QRaf10fHxG/kHQTrS/xQQk4NSLuaVv/sbSuBGbtYLjzcHYdwy5vtoufEVhTfAk4TdKTACTtL+nQ5LXtSbfeAPsB/54Egf8EHDfk9r4IvD4JQEgkNfsAAADASURBVEha0Wf+bwDPkfSbyS2tU+e8thXYd8h2mPXlQGCNkIznfD6t0bbuAm6g1fMpwFrgruRh8ReAvSRtBN4NpB07+S5JP07+XQS8k9awm3dJ+k7ye6/2bQL+Bvg6cCtwP/BI8vKngTclD52XdV6D2fDc+6hZRUjaJyIeS64IrgU+ERHXlt0uqz9fEZhVxzuSh9zfBv4FuK7k9lhD+IrAzKzhfEVgZtZwDgRmZg3nQGBm1nAOBGZmDedAYGbWcP8fAThEasSNtkIAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Plot the `data_points`\n", "plt.scatter([item[0] for item in data_points], [item[1] for item in data_points])\n", "plt.xlabel('Petal Length')\n", "plt.ylabel('Sepal Width');" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# Function to compute the Euclidean distance\n", "# See: https://en.wikipedia.org/wiki/Euclidean_distance\n", "def distance(a: List[float], b: List[float]) -> float:\n", " assert len(a) == len(b)\n", " return sqrt(sum((a_i - b_i) ** 2 for a_i, b_i in zip(a, b)))\n", "\n", "assert distance([1, 2], [1, 2]) == 0\n", "assert distance([1, 2, 3, 4], [5, 6, 7, 8]) == 8" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# Function which computes the element-wise average of a list of vectors (a vector is a list of floats)\n", "def vector_mean(xs: List[List[float]]) -> List[float]:\n", " # Check that all arrays have the same number of dimensions\n", " for prev, curr in zip(xs, xs[1:]):\n", " assert len(prev) == len(curr)\n", " num_items: int = len(xs)\n", " # Figure out how many dimensions we have to support\n", " num_dims: int = len(xs[0])\n", " # Dynamically create a list which contains lists for each dimension\n", " # to simplify the mean calculation later on\n", " dim_values: List[List[float]] = [[] for _ in range(num_dims)]\n", " for x in xs:\n", " for dim, val in enumerate(x):\n", " dim_values[dim].append(val)\n", " # Calculate the mean across the dimensions\n", " return [mean(item) for item in dim_values]\n", "\n", "assert vector_mean([[1], [2], [3]]) == [2]\n", "assert vector_mean([[1, 2], [3, 4], [5, 6]]) == [3, 4]\n", "assert vector_mean([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) == [4, 5, 6]" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "class KMeans:\n", " def __init__(self, k: int) -> None:\n", " self._k: int = k\n", " self._centroids: Dict[int, List[float]] = defaultdict(list)\n", " self._clusters: Dict[int, List[List[float]]] = defaultdict(list)\n", "\n", " def train(self, data_points: List[List[float]]) -> None:\n", " # Pick `k` random samples from the `data_points` and use them as the initial centroids\n", " centroids: List[List[float]] = sample(data_points, self._k)\n", " # Initialize the `_centroids` lookup dict with such centroids\n", " for i, centroid in enumerate(centroids):\n", " self._centroids[i] = centroid\n", " # Star the training process\n", " while True:\n", " # Starting a new round, removing all previous `cluster` associations (if any)\n", " self._clusters.clear() \n", " # Iterate over all items in the `data_points` and compute their distances to all `centroids`\n", " item: List[float]\n", " for item in data_points:\n", " smallest_distance: float = inf\n", " closest_centroid_idx: int = None\n", " # Identify the closest `centroid`\n", " centroid_idx: int\n", " centroid: List[float]\n", " for centroid_idx, centroid in self._centroids.items():\n", " current_distance: float = distance(item, centroid)\n", " if current_distance < smallest_distance:\n", " smallest_distance: float = current_distance\n", " closest_centroid_idx: int = centroid_idx\n", " # Append the current `item` to the `Cluster` whith the nearest `centroid`\n", " self._clusters[closest_centroid_idx].append(item)\n", " # The `vector_mean` of all items in the `cluster` should be the `cluster`s new centroid\n", " old_centroid: List[float]\n", " centroids_to_update: List[Tuple[int, List[float]]] = []\n", " for old_centroid_idx, old_centroid in self._centroids.items():\n", " items: List[List[float]] = self._clusters[old_centroid_idx]\n", " new_centroid: List[float] = vector_mean(items)\n", " if new_centroid != old_centroid:\n", " centroids_to_update.append((old_centroid_idx, new_centroid))\n", " # Update centroids if they changed\n", " if len(centroids_to_update):\n", " idx: int\n", " centroid: List[float]\n", " for idx, centroid in centroids_to_update:\n", " self._centroids[idx] = centroid\n", " # If nothing changed, we're done\n", " else:\n", " break\n", " \n", " @property\n", " def centroids(self) -> Dict[int, List[float]]:\n", " return self._centroids\n", " \n", " @property\n", " def clusters(self) -> Dict[int, List[List[float]]]:\n", " return self._clusters" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The clusters centroids are: [[1.4941176470588236, 3.4], [4.925252525252525, 2.875757575757576]]\n", "The number of elements in each cluster are: [51, 99]\n" ] } ], "source": [ "# Create a new KMeans instance and train it\n", "km: KMeans = KMeans(2)\n", "km.train(data_points)\n", "\n", "print(f'The clusters centroids are: {list(km.centroids.values())}')\n", "print(f'The number of elements in each cluster are: {[len(items) for items in km.clusters.values()]}')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEHCAYAAACjh0HiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzdd5hcZfXA8e+502dLeggJJaFKDSX03kR6UYqAFKWJ/AAVUEBBUIqoFEGaIE2aNIEIQhQiTZAE6SCChJqQns3uzk675/fHnezu1J2dndnZcj7Pkyc7d+6975nZnfvOvfe87xFVxRhjzPDl1DsAY4wx9WUdgTHGDHPWERhjzDBnHYExxgxz1hEYY8wwZx2BMcYMc/5aNyAiPmAW8Lmq7pvz3LHAr4DPM4uuVdWbS+1v7NixOnny5BpEaowxQ9fs2bMXquq4Qs/VvCMATgfeBZqLPH+fqp5a7s4mT57MrFmzqhKYMcYMFyLycbHnanppSERWAfYBSn7LN8YYUz+1vkdwFXA24JZY5+si8oaIPCAiqxZaQUROFJFZIjJrwYIFNQnUGGOGq5p1BCKyLzBfVWeXWO0xYLKqbgzMAG4vtJKq3qSq01R12rhxBS9xGWOMqVAtzwi2A/YXkTnAvcCuIvLH7iuo6iJVjWce3gxsXsN4jDHGFFCzjkBVz1HVVVR1MnA48LSqHtV9HRFZudvD/fFuKhtjjOlH/ZE1lEVELgJmqeqjwGkisj+QAhYDx/Z3PNWWSqZY9MUSRoxrJhwN1TscY4zpkQy2aainTZumAzV99OFrHuf28+8jnUzjqrLXd3blu1cci8/vq3doxphhTkRmq+q0Qs/1+xnBUDXzvhe45Zy7ibfHO5f99Q9P4w/4OPk3x9YvMGOM6YFNMVElf/zFg1mdAEC8PcH0G2eQTCTrFJUxxvTMOoIqWfT54oLL3bTS3hLr52iMMaZ81hFUyTrT1ii4vGFElKbRjf0cjTHGlM86gio5/rKjCEVDiHQtC0WDnPSbo3Ece5uNMQOXHaGqZO3N1uCq53/OlntvxqgJI1lv63W44MGz2P3IHesdmjHGlGTpo8YYMwyUSh+1MwJjjBnmrCMwxphhzjoCY4wZ5qwjMMaYYc46AmOMGeasIzDGmGHOOgJjjBnmrCMwxphhzqahrtAbz77DnRfez2fvf8Gam07hmJ8dytqbFZ5vyBhjBjLrCCrwz8dmcfE3ryTengBg0ReLee3pN7l8xvmsv826dY7OGGN6xy4N9ZKqct3pt3Z2At4yr/bAjWfeUcfIjDGmMtYR9FIynmT+pwsLPvfha3P6NxhjjKkC6wh6yR/0E4oECz43cvyIfo7GGGP6zjqCXnIchwNP24tQNLszCEVDHH7OQXWKyhhjKmc3iytwzIWHEW+LM/2mv+HzOagqh/3oQPY5Yfd6h2aMMb1m9Qj6INbWweK5Sxi3yhiC4cKXi4wxZiAoVY/Azgj6INIQZtJaK3c+XvLlUv58zRO89cJ7rPaVSRx8xj6suu6kOkZojDE9s46gSubNmc8p035ER1sHyXiKt55/jxl3PsvF089h6s4b1Ds8Y4wpym4WV8kt59xF29I2kvEUAG7aJd4e54oTb2CwXX4zxgwv1hFUyewZb+C6+Qf8+Z8sZPni1jpEZIwx5bGOoEoamiMFl4uQl2pqjDEDiXUEVXLg/+WPLQgE/Wx7wJaEIqE6RWWMMT2zjqBKDjxtb3b95vYEQgEaRkQIRYKsv806fP+mk+odmjHGlGTjCKps4eeL+OitT5kweZyljhpjBgwbR9ALcz/6kr/d+SytS9vYet/N2WSXDRGRsrcfO2kMYyeNqWGExhhTXTXvCETEB8wCPlfVfXOeCwF3AJsDi4DDVHVOrWMq5pn7XuDX374ON5UmlUzz+O//xuZfncr59/8Qx7GraMaYoak/jm6nA+8Wee47wBJVXQu4EvhlP8RTUKw1xm++cz2JWIJUMg1AR1uc2U+9zouPvFKvsIwxpuZq2hGIyCrAPsDNRVY5ALg98/MDwG7Sm+swVfT6zHfw+fPfjo62OE/f/VwdIjLGmP5R6zOCq4CzAbfI85OATwFUNQUsA/IusIvIiSIyS0RmLViwoCaB+gK+os8FQjYOwBgzdNWsIxCRfYH5qjq7r/tS1ZtUdZqqThs3blwVoss3decNCt4UDjeE2PO4XWrSpjHGDAS1PCPYDthfROYA9wK7isgfc9b5HFgVQET8wAi8m8b9LhgKcOGfzybSGCbSGCYYCRIMB9j35K+y2W4b1SMkY4zpF/0yjkBEdgbOLJA19D1gI1U9WUQOBw5W1UNL7avW4wjal8d48ZFXaG+JsflXN86aZtoYYwarATWOQEQuAmap6qPALcCdIvIBsBg4vL/jyRVtirD7UTtmLUsmkrzy19dYPHcpG2y7DlM2Wr3P7cz/dCGznnydcDTI1vtNI9pUeK4iY4ypNRtZ3IPP3v+CH+x0PvH2BOl0GhS22ndzzr37dHy+4jeYS7n7kgf54y8exOdzEEdQV7nw4bPZbPeNqxy9McZ4Sp0R2CipHvzs679m6fwW2pfHiLcniMcSvPyXV/nrLU9XtL/3/vVf7r7kIZIdSTra4sSWd9DRFudnB/+KWFtHlaM3xpieWUdQwtz/fcm8/32ZV1gm3h5n+o0zKtrnU7fPJNGRzFsuIsx68vWK9mmMMX1hHUEJyUQKcQqPb0vG8w/m5Uh0JNECBWy0D/s0xpi+sI6ghFXWWZnGkQ15y4ORILsesX1F+9zpkG0IN4TzlqeSKaZ9dWpF+zTGmL6wjqAEx3E4567TCTeECIQCAIQbw6y67kQOOn2fivY5bc9N2Hq/zQk3eMVqfH6HUCTIKVcdR/OYpqrFbowx5bKsoTIsmruEp257hvmfLmSTnTdku4O2xB+oPPNWVXntmbd44c//ItIYZvdv7cTq661SxYiNMSbbgBpHMBiNWXkU3zzn4LLWXTxvCXPe/owJk8cxcc0JALQta+P92f9j5Lhmpmy0OiLCBtuui+NzCEdDrLruxKL7U1X+++r/iC3vYN0t1yIctbKXxgwUml4IqffBNwnx9258kWoakm8AaQhMRSRQmyDLYB1BlbiuyzWn3sKTtz5DMBwgGU+y4fZfYcMd1uPeSx/GH/STTrlMWmsC+538VW46+87OMQSNoxq4ePo5eQPVPnv/C87d+2KWzG/BcQQ37XLqNd9hz2Nt7iNj6knVRVt+DrH7QUKgCTS4GTLyd4jT2PP2iVfRJacA8cwSH4y8GgltV9O4i7FLQ1Xy8G//wi3n3kO8Pd65zBdwUBfcdNfkq47PQV2X3Ld9xLhm7v3sxs5LTq7rctSUU1j42aKsdUPRIFc99wvW2nRKTV+PMaY4t+0uWH45EOu2NAih3XBGXV1yW3Vb0QU7gLZlPyERZOzfEF9tJta0AWX94KGrH8/qBADSSTerEwCvUyjU9ybjSWbPeKPz8VvPv0fr0ra8dZMdSR69/smqxW2MqUD7rWR3AgAJiP8dddtLb9vxFAUPAupCx/RqRdgr1hFUSevStp5XKkFdZfni1s7Hyxe3FpwW23WVpV8u61Nbxpg+cpcXf05zO4jc55cChcYMxVF3cV+iqph1BFWy2e4b4xQafFZmvbVUMs3GO63f+XiD7dYlGU/lrRduCLHN/gXP7owx/SW0HQUPn77x4IwuvW1wa6DAPGUSRYL1uUdgHUGVHH/pkURHRAkEvWv8js8hGAkyavwIghEvG0DEu8Y/YY3xhLpl/4QbQhz4f3sxftWxnctGjhvBEece1DneALxtJ641gd2O3KGfXpUxphBp/AFIE7CieqEPiCDNPy94Jp+1bWB9CH8N6D7jcASCW0Jwq9oE3AO7WVxFC79YzMNXP87bL77HqutO4hs/2Jexk0bz2A0zeGn6bMZOGsVBp+3D2puvwVO3zeSZe58n2hRh35P2YMu9Nyv4BzR7xus8et2TLF/cyo6HbMPXvr2rpZAaMwBoej7afgckZoF/ChI9DgmsU9626kL8SbT9fiCNRA6E8H549blqo9TNYusIqqx1WSv//ttbrLPFmqy0Wm3u/htjTG/ZgLJ+cuImP+SjNz7pfNw0upG75lxHpNGKzhhjBi67R1Al5+59cVYnAF7mz9FrnlqniIwxpjzWEVTJK0++VnD50gUtLFtcItXMGGPqzDqCailxq+Xz97/ovziMMaaXrCOoEp+/+Fu51mY2HYQxZuCyjqBKvnXBoQWXb7LrhgSDwYLPGWPMQGAdQZUced7X+c6lR+DPDCgTR9jjmJ341d8uqHNkxhhTmo0jMMaYYWDYzz76xYfzOHevi/la6HD2azqKq797E7HWHiaG6sGTtz3DEaudzJ6Bwzh23dN44c//qlK0xpj+oBrHbbkE98tNceetj7v4aDT1Qb3Dqoshf0bQsng5x657Gq1L2lDXe62BUIB1t1iTK5/9eUUxTL9pBjf84PasaadDkSDn3ft9ttnPJoQzZjBwF58AiZfoKg4jIA3I2CcQ30r1DK0mhvUZwV//8AyJ9kRnJwDe3P8f/Psj3p/9Ya/3p6rc9tN782oPxGMJ/nDu3X2O1xhTe5r6KKcTAFCv0lj78PscD/mO4INX/0c8lshbLiJ8/PZnvd5foiORVTeguy8+nNfr/Rlj6iD1ARSsEZyA5Fv9Hk69DfmOYK1NpxCK5KdvqiqrrTep1/sLhoM0jmoo+NyEKUPvdNKYIcm/Bmh+vQ8IQmD9AsuHtiHfEXzt27sSjASzpngOhPxM2Wh11pm2Zq/3JyIc87ND86aCDkWDfOeSI/ocrzGm9sS/JgSnATlTuksAiR5Zl5jqach3BM1jmvjtixezya4bdhaL2e2oHbnsyZ/0WECimP2+uycnXXEMo1ceBQITpoznrFtPZdsDtqhy9MaYWpFR10HkGyARwIHAFsjoexHfhHqH1u+GfNZQd6pa8cG/P/dpjOlfw+Fz3Od6BCIyCVi9+/qq+mx1wus/lf6ibzr7Th67/kkSHUnGrTqGH978Xea8/Sl3XPAn2lvaaR7bxMlXHMsaG63G7Rf8if+88gET1liJb/30G6yyzsrcceH9vDrjDUaMb+bQMw9g58O2HfJ/dMYMJsP989jjGYGI/BI4DHgHSGcWq6ru38N2YeBZvItwfuABVb0gZ51jgV8Bn2cWXauqN5fab3+PLP7pAZfx0mOzy1rXH/STTqZZ8Z4GwwEcn0OiI4mbdgGvPvGhZx3At84/pGYxG2NMrr6eERwIrKuq8R7XzBYHdlXVVhEJAM+LyBOq+lLOevep6oCs3tKyeHnZnQBAKpGdhZDoSOat09EW597L/szBZ+xDQ3O0zzEaY0xflXOz+H9AoYTbktSzIuE+kPk3qG5IvD7z7Zrs1x/08fHbn9Zk38YY01tFzwhE5Bq8A3c78JqI/J1uw/BU9bSedi4iPmA2sBbwO1V9ucBqXxeRHYH3ge+rat4RUkROBE4EWG211XpqtmpWW2+Vmuw3lUgxdtLomuzbGGN6q9QZwSy8g/ijwM+BFzOPZ2ee65GqplV1E2AVYEsR2TBnlceAyaq6MTADuL3Ifm5S1WmqOm3cuHHlNF0Vq6+3CmMmjip7fceX/XYGQv68gjWBkJ+NdliP8av13+swxphSinYEqnq7qt4OjFzxc7dl5R8dvX0tBZ4BvpazfFG3ew83A5v3Lvzau/7Vyxm36piuBQJ7fnsX1tpkctZ6m+y6Id+57EgijWHCDSGC4QB7HrsLZ99+KiPGNhFuCBEI+dnia5vy0/t/2L8vwhhjSigna+hVVd0sZ9m/VXXTHrYbByRVdamIRICngF+q6vRu66ysqnMzPx8E/EhVty6133rVI/jy4/l88eGXbLDtugTD3pQVS+YvZc5bn7L2ZlNoHNkIQCKeZOFnixi10ggijREA0uk08z9eSOOoBppGNfZ77MYYU1HWkIh8EzgCmCIij3Z7qglYXEa7KwO3Z+4TOMCfVHW6iFwEzFLVR4HTRGR/IJXZ57HlvKB6WGn18ay0+vjOx/9++k2u+/5tLPhkIausO5HTrjuB2PIYlx31WxbPW0q0Kczxlx/FVntvzlUn38hbz79H44gGjjr/G+x25A78/Y/P8cy9zxNpirDPiXuwxZ6bFGz3tWfe4tHrnqR1aRs7fH1r9jx2586OyAxPqgmIPYp2TAeJItHDkdCO9Q6rLBp/GW2/C3QZhPZEogfjZZrnrJf6BG27HVL/geDGSPToYTnit78UPSMQkdWBKcClwI+7PbUceEO14IxNNTcQKpQ98YenueL46ws+92udCcCZsrO3QMjLlRo5vpmOtjgdbd5VsXBDiINO35tv/yJ7rqJ7LnuYu37xYOeU16FoiNW+MpGrXriYYKjXiVxmCFBNoYuPhtTboJniShKByLdwms+sb3A9cFtvgtbfASuKQkXAPwUZcx8iXXP+aOJ1dMkxoAm874gBkLC3nn+tOkQ+NFRUj0BVP1bVmaq6jar+o9u/V+vVCQwU155acsxbtgL97NL5LZ2dAHhjCx64YjoLPlvUuWzZwhbuvPD+rLoH8fY4n/7nC5655/mK4jZDQPzvkHqnqxMA7+f229H03PrF1QN1l0Drb+nqBPB+Tn0Escey1225ALQdrxMASIK2oi2X9VO0w0/RjkBElotIS7F//RnkQNLW0l5woNivdSa/1plMZSFTWdj5uFx+vy9r3MJbz79HIJR/5a6jLc4LD1tZzOFKO57OHCRziC9TaGWASrwKUuiSZgzteKrzkWoCUu8VWE8hUSj73FRD0XsEqtoEICI/B+YCd+Jd6DgS7/r/sBQI1+aSjDhCU7c6B42jGih02c5xhBHjm2sSgxkEnFF4H9vck3IHZGQdAiqT00zh8aQO+LqPqfHjjT0tMJGBFK4DYvqunJHF+6vqdaq6XFVbVPV64IBaBzZQBYMBVllnYt7yM2VnzpSdeZ2xvM7Yzsfl8gf9bLbHxp2PN9z+K0Sbo+TOhRUIB9jv5K9WGr4Z5CR6CIW/v/khtF1/h1O+wOYgTXjfJbsLItGue2MiDkQOJK9OAGGIWr2PWimnI2gTkSNFxCcijogcCbTVOrCB7IpnL6R5THYa6NhVRhNpjmQtc3wOX9l67axl/qCfoy88lHBDiGhzhGhThDETR3H5jPMJBLvONnw+H7986qeMW3UskcYwDc1RQpEgp1x1HOts3vuCOmZoEP+aMOJi7waxNHrfkp1xyOjbkIKXXgYGEQcZfSs4K4NEvdgJQ9O5SGDj7HWbz4XgVkAo03kEIbwb0vjdeoQ+LJQzjmAycDWwHd653QvAGao6p8axFTQQsoZWePXvb/DuP99n0902Yv1t1gXgX4/PZub9/2SD7dZln+P3AODzD+cy874XmThlJXY6bFscx6GjPc47L/6HUDTEeluvjeMU7pNd1+U/r3xIe0s762+zTufYBDO8qcYy190jENjE+yY9CKgqJN8AbYXAVMQpPq5GUx9D+mPwr4X48s/CTe+UyhoaVoVpqqmtpZ2n73qOj9/9nLU3m8LOh21LKJJ7OluYqvLmc+/y4qOvEGkIs9uROxS83GRMX2nqUzT2CGgLEtoJgoO3FobrtsLy30BiFvgmQ/PZOP5Vy95ek++hHX8BTSORvZDARrULdgCqqCMQkbNV9fJuk89lKWfSuVoYCB3B5x/M5bRtzyMRS9DRFifcGKZpVAPXvnwpoyeUnn1DVbn82Gt5/qGXibfHcXw+fAEfp17zbfb69m799ArMcODGHodlP8YrI5IEohDaFhl57aA5g1jBTX0MC/ci7yb5iGtxIj3fM3Nbb8yMYUhkloQgegRO84+qHeqAVdE4AuDdzP8rJp/L/TdsXXHCDSxf1No5FqCjtYPFc5dy41l39rjt7Blv8PxDL9PRFkcV0qk0iViCa0+9hZbFy2sduhkm1G2HZecAHXidAEA7xF+A+Iw6RlahJd8jP1MKWNbzvF2a+gRar8V7L9zMvxi034Um36lunINUqcI0n4qIZCaZMxnJRJK3nn8vL7UznUrzz0df6XH7f/zphazBZCv4Aj5mP/UGuxw+gDM/zOCR/Jc3tiDvXD6Gxh5FwnvWI6rKpf9b5Ik4burT0peI4jOLPJFAO/6GBNbvY3CDX6mO4GZgDRGZjTcF9QvAP1V1WH9tdRyn6DVWn8/X4/b+oB9xBHWzP6GC4A/0vL0x5Skx3mUAZxcVV2Culk49jO0RP/lpq+CNvbCpWqD0FBPT8OoIXIw3uuM04AMReV1Eruun+AYcn9/Hlntvis+ffdAOhPzsdtQOPW6/x9GFJ41zXZdpXys88ZwxvRbcgoIfb4kgkW/0ezh9Figy2bE04fh7mIwutAeFOxEfEt67r5ENCSXvGKlqu6rOxEsfvRL4HdBATl2B4eb7N53MhCnjiTSFCYYDRBrDTNl4db5zSc8DXtbfeh0OO/sAguEAoUiQSGOYUDTE+Q+cSaQhfxZGYyohEkRGXe+NM5AoEAZCEDkcgtvWO7zeG3VjZkxBdz4YdWuPm4pvHIy4BG9cQgSIeD83/Rjxr16DYAefUllDRwDbApvgnRG8AryMd3loXr9FmGMgZA2BV2Pg1Rlv8Pl/5zFl49XYeMf1e5WWN2/OfF554t+EG8Jse8A0GkbY8HlTfeq2eRPV6XIIbj+oD3yu60Lsfki8AP41oeEkHKf8L0/qLoaOvwNpCO2K+Mb3uM1QUlE9AuBG4D/ADcCzqvp+LYIbaGJtHbz02Gxiy2NsuvtGrDxlJdpbY9x7ycN8+ckCtj94K3Y4eGtEBJ/fS/3MvUyU6+N3P+PNZ99lxLhmttpnM4KhABMmj2e/7w6yG3ZmwND0XIg/533DDe1SYmCWAD68j7p3AUA1CfFnwV3gDUYLfKV3bac+hMQr3rxHoV2KjmhWd4l3o1ZdCO2E+Mb2qp28VyICga+AuOCbVPL6vqbnea9Rwt5B32nEuyfgB3Uob1KF4aPUGYEPmIp3VrAtsC7e5HP/xDsreLq/guyulmcEbz3/LufucykAbtpFXZdt9p/Gsw+8lHVzd/TKo2gYEWHh54txUy7iCFM2Wo1fzjg/6/KO67pccfz1zLzvRcC7v+AP+fn13y9gykaD95uZqS+39YZMOqQPbzIqRUZeh+TMNaSJ2eiSEwAFTXv/hw+AxNPe1NWa9lYM7YyMvBLvI1+cqqIt50JsOiBeVhIBZPQdeZ2JN4bhR5kY1esMmn+CEz2sotesGkeXnAiJ17zXIT5wRiGj784rWOPVPbgG78DvAC5EjoH22zLL+h7PYFSVkcUishJwCHAGMEVV65LiUquOIJlIcuiEE2hdWt40SiKSlUIaCAXY96Q9OOWq4zqXPX33c1x50o156aITpoznjg+uHbQjPE39aPJNdNGReDnx3UgUGfci4kS99TSBzt/OqwSWveKKPXVbFoGmH+M0fLN027Hp6LLzyK4pADgTkXHPdP49a3ohumAX8mcQDSFj/4L4Vyv9Igtwl/8W2n6fs08fBLfAGX1HV4zF3p+CKo9nMKpoQJmIbCwiJ4vIHSLyAd49gu2Ba4CtahNq/bw+8x3vGmSZcjvQZDzJjDv/kbVs+o0zCo4ZWDp/GXPe+qSyQM2wpu0P0TU6tjuBxHNdDxOv4I0oztsD+Rk0MYjdU0bb95LXCQDo0uwaAvGnKJyumUY7nuixnYJiD5DfsaQhMQt1W7vF+DCF359C0mjH45XFM8SUukdwG/A88ATwE1Ud0keuZDy/2ExvpZPZH7xEkX2K45CID+sib6ZiCbyRsTmUTGnHFY/LPRj2Zv1i60hO20kKxogLWqDOQFmKfT6F7BHHRd6fgvoSz9BSahzBZqp6mqreM9Q7AYCpO2+QdyDvDZ/fYev9Ns9attsR2xOK5t9ICwT9rLXJ5IrbMsOXhPfCS3/MlcyuRxDcsuseQPYeCiwLQWTfnhsP71ek7QAENui2u12KtBNEwrv33E4hoT0p+L3VvwbidBXkkfDXisRYSB/iGWLs1nlGtCnCGTedRCgS7BzhG24IsfoG+UPXA6EAjSMbOg/y4YYQI1cayUm/PiZrvX1O3IM1Np5MuDGc2c5PKBrinLtO7zHTyJiCgttBePdMPvyKjKAQNJ2HOF2VvsRpgBG/wBs/kDmAShQCUzPbrviCEvUKyEe/3WPTEj0MAutlxiWAN6I3jIz8DSJdB2nxrwaNJ2XaXpGhE4boYRVP5yBNp4NvQre2wyCNyIjLs1cMbgfhPfLfn+DOOfFEMvFsgLFpqPN89t+5zLhjJm1L29h6vy3YbPeN+N8bH3Pzj//Igs8WM22PjTnu4m+SSqSZcec/+OSdz1h78zXZ5ZvbFRwQlk6l+edjs3j1b28wZuVR7HHMzoxftW9pdGZ4U/Xq92rHDHCiSOQAxL9W4XVTH6Oxh8BtQcK7egdKd6G3LD0XCW4J4a8iZU61oJqC+DNo/AVwxiLRryO+wpVrNfk2GpsOpJHw3kiwbyPnVePQ8TiaeA18k5HogYiTP9tvsfdHk++gsceqFs9gY/UI+tGzD7zIX//wDJvuthGH/HB/ANqWtfGfWf9j1PhmSxs1vabucki+Cc4Y8K/Tq2wzVYXU2+C2QmDjzqyiguumPssUglkzLyWzcDyjwb9uVeLR1AeQng+B9Tsv9ZQbjylPpfUIHqP4LE+o6v7VCa93BmpH0NHRwUEjjyOVyL4JfMCpX+OJm/9OIBQgnUozcc0JXPz4uYydOLrInozp4rbeAq1XZSaKS4FvNWTU78s6MGpqDrrkeHAXAo53z6D5fJzo17PX0w506RneFNUS9G78hvdCRlySdcmnJvE0fd8bl5D6b2awVwKix0D6g5x4voaMuDQvHlO+SjuCnUrtVFX/Uer5WhmoHcHhk05g0dylPa7n+BzWnLo61826vMd1zfCm8efRJd8jO2XTB/51cMY+Unpbdb1cfnce2d/nwsiYu5HAhp1L3GUXQOwhstMzw9B4Ik7jqVWMZ1dw5+bE4+Bdy+9+Y9tHfjZQfjymdyoaR6Cq/yj1r3bhDk7ldALgjVj+5N3P+ey/c2sckRnstO028vP205D6CE39r/TGyVdBW8g/qU+g7Xd3taFugU4AoAPa/ljleJYViMclf2hvNhwAACAASURBVLxDmvwiNPnxmOrpMWtIRNYWkQdE5B0R+d+Kf/0R3FDlC/hoWTSsyzqYcriLCy8XP7i5I4Zzt11G4RROF9ILuz1OUTRHX3NG2ZeMp4cvQkXj6YXceEzVlJM+eitwPd5fzC7AHYB1zTnEKf+P3HWVNafaTWPTg/CuQKjAE2kvjbOU4KaZgV25Ilm58yJB8K9TYD2BYM5VhJLx9JAWGty094PceorHVE05HUFEVf+Odz/hY1X9GbBPbcMafE769dEFlwfCAYIRLzVPBELRIKdcdSyhSKEPlDFdJHo0+MbRdfAVvHmBzkOk9PTL4oyGxlPIHlwVBv9qEMnO85DmizJ59yvGtvhBGpDm88qM59wy4/lefjzOxMz+VhyKQiAjved6iMdUTzm34OMi4gD/FZFTgc+BYnPeDltfP2NfRk8YyRUn3EBHWxxfwMdhZ+3PoWcdwGM3zOCl6bMZO2kUB5++D+tvs269wzWDgDjNMOYRtP0eiD8Dznik4RgkuFlZ2zuN30UDG6Ptd3mXbsJ7IdFv5B20JbiJ107brd6cQYGpSMNxeeMDahUPqQ+9ttOfQ3A7pOEocJflxHMs4ptY3htneq3HcQQisgXwLjAS+DkwArhcVV+qfXj5qp01tGT+MtR1GT0hf2BKd+3LY7QuaWXMpNGdtYkXfL6I9176LxvtuB4jx40ouq3ruiz8bBENI6JWgGaYUrfVu3nrrNQ53bPrLoXUh+BfF6doPYHMDV13HkgT4jRltnW9fHynCcc/uWvd9CLA9apy9TIe1Q5wF4EzrrPGgGoS3Pkgo7rNbFp+POVyU1+C+wX4N8BxeldT2XVTkHoTnJVw/BMzMSq4X4KECg46607dZd603M5KNZsRuDfx1EqlhWkAUNVXMjtxgNPKLV4v3teOZ/HO+/zAA6p6Qc46Ibx7DpsDi4DDVHVOOfvvq8/e/4JLjriaOW9/AgiT1l6Zc+8+nSkbZk9J29Ee58oTb+C5B1/G8QmhSIjjf3UkfzjnHpZ+2XXDbtLaE7jlnavyCtg/99DL/PaU3xNbHsN1Xbbebxpn3nIK0aZy50Mxg5lqB7rsJ9DxV7y58MNo448hdpd38Mpwg9vByFtwnOyrtW5sBiy/wBuAhYuGdvGulS//JSsya1xphOZLoe0GLx8fUN/qyMgrkED22acXz0+h44mueJrOhdT70P7HTH0DQRtOBiLQdjWQBnXRyNchuBUs/3l2PIFp0JoTz6jbcYIb9fj+uO5SWHgwuJ9llghu+DCckReV9f66y34FsZtZkY3kOuOh+UJYfimkv/RiDEz13oucsQ7qLkaXngmJl733whkNIy5FQtUt5amJ19BlZ/YYTz2Vc0YwDe+G8YqCocuAb6vq7B62E6BBVVvFG7/+PHB69zMJETkF2FhVTxaRw4GDVLVkpYhqnBHEY3GOnHwKLQuXd04nLQINIxu4a871WQfpiw79DS9Pn02io+fZSdfbem1+++IlnY/fffm/nLXbhcTbu1LzAiE/m+yyIZc8btc7hwN3yekQf5rs9EyvmEye0P44o37d+VCTb6CLjiJ7bn0/+amVRfYrzci4mVnVy9ylZ2TKNXaPZ0X1su43c4N4qZ2pnGVpstM9i8Xjh/Fv4Dilv2u683fKjC3I0fgjnMbvlN429nCm+E1PfOCbhIx9Cu/7bKbIzqKDvA4wK/4IMvYRpIKzmkI0/SW6cE/Q9px4JiJjZ3TG0x8qGkfQzR+AU1R1sqpOBr6H1zGUpJ4VE4UHMv9y//oPAG7P/PwAsJv0Q7WWF/78ColYIqumgCqkEqnOamIASxcs46XHyusEAN596b9Zj//0q0dIxLLzs5PxFK/PfJv5ny7EDG3qLvbqBefl6Bf58hX/S/ZarTcX2LbU9OU5+9UkdEzvFs8S6PhbkX3mZvQkCrSVID/nv1g8KWi/vchzHjc1p3AnAN7ZTU+WX9nzOgCkvUteiZe7hfcupD8iP/4k2n5nmfvtmcbuB81tI+2l4naPp87K6QjSqtpZ8UJVn6f0X2MnEfGJyGvAfGCGqua+8knAp5n9pvDONsYU2M+JIjJLRGYtWLCgnKZLmv/JwoIH9462OPM/6dr/4rlLCQQrH9I+98MvKXTC5Q/6WfjZoor3awaJ9MKSdXULbJDz8BNKzPJShhia/qLb/hb0Mp4+Svc0yOyD4s+VM2agp7EL2TuEdLdOJ/0FXVlJ3aUg9XEv9tuD1CcUruOQE0+dldMR/ENEbhSRnUVkJxG5DpgpIpuJSMl0AVVNq+omwCrAliKyYan1S+znJlWdpqrTxo0rfROsHF/Zci2C4fwPRKQxzFe2XLvz8cS1JvSqalnuWIKNd1q/c0rr7lKJFKuvv0ovIjaDkn81yi+SQiaFs5vglpSX2Fdsf1EkMDUnnn6cZDK4S+nnQyXGBTiFZzTN4l+j/FjUhUC3exaBDYqMswhn3vfqkOAWFKyPkBtPnZXTEUwF1gEuAH4GrAdsCvwG+HXxzbqo6lLgGeBrOU99DqwKIN5sUiPwbhrX1NSdN2CNjVfvzO8HCIYDTFp7ZbbYq2tq2nA0xFHnH0Io2pXz7ziC4yv8th18RvbwikPO3J9wYxinWwcRbghxyFn7W/bQMCAShsbTyD4QOBQ9uDeelb19w7dBGsj+mIYpPEI3QPZgryD4VofQzjnx/F+BeMJ01SdYIZRZ1r2tcGbbMuJxxuNEShd9cZyRxTuLET8vuS0AzRcXeSKI9350izG0IxLo+pInvpUz4ym6vxd+cJq8ugvVEtkXfGMLxLNDVjz1VrNpqEVkHJBU1aUiEgGeAn6pqtO7rfM9YKNuN4sPVtVDS+23Wumj8VicP/3qUZ667RlcV9n9qB05/McHEmnM773/cf8/ufeyh1k8bykb7bAex/78cB644lGeuPlp3LSLL+Djmz8+kGMuPDxv23lz5nPb+ffx77+/yYhxTRx65gHsduQOVrh+GNGOv6KtN4C7AAJbIE2nox3PQNu13iUQGQFNZ+NEv5G/beoztPW3kHgBnFFIw/GofyosOwNS/wEcCO4II34JsTuh/SHAhcj+SMNJXoGakvFMQ5rOgPR8tPXqzLTP6yKNp4MEvWXJN72brY3fA9+amXheBGdkJp5NYNnp2fGMvArHKT3IbAW35VJovweIg7MSjLgYJ7RDedvGZ3k3jN3PAT9EDoKG70P79dDxJEgIIod74x1yZi5Vdb06zO13eL+H0K5I46k9pt72lrpL0dbf9RhPrfWpHoGIrARcAkxU1b1EZH1gG1W9pYftNsa7EezD+wrxJ1W9SEQuAmap6qOZFNM78c4wFgOHq2rJC4sDdfZRY4wZyPo0jgCviP2twIp8x/eB+4CSHYGqvoF3gM9dfn63nzuAQ8qIYcB58rZnuP38+1j4xWImrjmBE355FNsdWL1ri6b2NP4s2nKplz3ijIGGU5DoEf1ytua2PwStV3uDjHyrQOPZ3hz8rb/DmwTOgdDXYMQVeWMLNPUJ2nKR961cAhDeHxpOhtYrvW+dqFc3uOls79tu7AHQDghugTSfX7Sa2UBS6P1xIl+tYzwPe3UYBkg81VbOGcErqrqFiPxbVTfNLHstcxO43w2EM4LpN83ghh/cRry9KxsgFAly3r3fZ5v9bGKswUDj/0SXnER2jn4EGk/FaTyhpm277fdByyVkT+lcJB8/uBPO6N93PlR3Gbrgq5kpnVfciF5xLT9FV+aRj655/Veki4o3Z8/YvyK+8dV7QVVW+P0Je4Ow6lBs3m3/E7RcPGDiqVRfxxG0icgYMukGIrI1XprnsKSq3PaTe7M6AYB4LMEt595Vp6hMb2nrFWR3AgAxaLveq8tbq3ZVvW+WefP6F2kz8Q9ctytObX/Qmw4hKxspgXew755+mu62vHNr0Ox6BAXtvLP3rw689+dq8t+fDnT5b+oUT6HfVwe6vKxcmUGhnEtDPwAeBdYUkReAcUD+Xa1hItGRYPmS1oLPzf3wy36OxlQsNafwck16c+f78oazVEkC3CW92yT9KTiZDJPU2+R3YL1sP/lWH7avtWTxugfpT/s3FMCLp0giY/qzwssHoXLmGno1U7ZyXbzzzP+oFkzAHRaC4SCNoxpoWZg/5dKEKSvVISJTEf9kSL6ev1z84BSfQLDvgt40y1rkYFeIb9Wun/3rAzOovDMIejn0haw4C/jHP7Ifz5xZYVuVCIAzqvDB11ePsTcBbw6igvFM6v9waqTopSER2UJEJkDnqN/NgYuB34jIsK28LiIc87NDs8YWgFdn4DuXHFGnqExvSeMZeDnw3UWg4eSapvWJCDSeTv4goyJtBrfPSsOU6Ne9FMSsj24A7z5B98GLvsyynLoXEkSiA/fvVESgodD7E0aafjDs46mVUvcIbiQzNlpEdgQuw5spdBlwU+1DG7j2++6efPfKYxi98igQmDBlPGfdeirbHrBFvUMzZZLQdsjIq8E32VvgjIGmHyANJ9a8bafhm9B8HjiZG7a+STDi19DwPbo6BIHg7jDy5uy4nZHImAcguA3exzfkDYwa+wSE9sTrFPzeQLKxf4HINzIjlh1vDMPoexFfkTPXmTO9fzvt5P1b8bifOQ2HF3h/LkPC9cnSKR7PnnWJpxaKZg2JyOuqOjXz8++ABZnqZMM+a6g7VbXBYYNcPX+Hhdp2XTcvZbTcbbtm081fXvZrrMslocIG2udroMXTG5WOI/CJiD9zWWg3oPtXpf4dEjeADdY/CtOlWr9Db6Tq/RC7A9wVI1W/B8nZaNuN3qRvwS2RxtMQ/2pF2+6pE1B3GdrqjZxVCUP0mxA5BNpvg5g3sljDB0D029DxOMRuB7cNzcSjbX/wag8Q9+b0GXExIgG09VpI/Q8eWg9pOq3gRBZu4t/eSN70J0DAa7fpJ2V1XL3htt3pZQ9pKyojoflHOJGDqtqG9/t6oPP9WfH7khKJAtX7W0l4v4fYg4AL4f2RhhM6i//0t1JnBOcBewMLgdWAzVRVRWQt4HZV3a7/wuwy0M4IjFnBXfYTiD1GV6qhH+8+RIqum7uOl8s/5s+If9VCuylJtQNduC+k59E1q2XYu/yjsW7thECi3kCyrHhy6w6sEOy2XIAwMvrWrDKUbuIdWHwQeRPXBabhjOkhJbUX3NZroPWa/CeafobTUL37G+6y8yH2CFnvjzMaGfu4V5azRlQVXXx0Jlmh2+/LvyYy5sHOinHVVtE4AlW9GPgh3sji7bWrx3CA/6t2kMYMZpqem3NQAa8DaCU7w8cFbUfbrqusodh078wi62DeAbokp514ZlluPIU6AXKWKxBDl1+avUrLeRScvTQ5Czf1Rf7ySrVeX2T55VVrQtPzMmdPOe+P2+Kd1dVSclamOl3O7ys9B+Iza9t2ESXP51T1JVV9WLVrcnBVfV9VX619aMYMIsm3ejHXfxoSr1TUjCZeJn9wU40k38t+nPqw+LpVOoC5bitFB9dlVfnqo+RbIIVqI3dA4p/Va6dg22+AFuiQtR1N/ru2bRfRf3XSjBnKfBPoVe2BSnPQfauSP2V0jTg5WeKlxlf416lSo6WukVfxcOWbAJpbbQ28MpKrFVheRc6ETApwrghSp7EJ1hEYUw3+DTMH6dw8Cof8A3cEaTipomYkeijkXUNeMcFvLofCVbgKyY07ArkxNp5eJKgROKWKzPSC4zgQKLKv0F5VaQMA/wbgX5381x1AGo6qXjuFhHfHG9+Rc+NZ/BDet7ZtF2EdgTFVICLIqNsgOA3vwB/25tYfeR2E98gsi3i1B5ovQELbVtaObwIy6hZwJtFZPCawMYy6Hfzr0TmIzLcmjLoVgltkxzPit/nVv0IHQ/Rb3joS9W48N56ARL+ZtZoT/QZEjiPrAOaMhTGPVPRaihp1m3eg7i6wFYyo3lxD3u8r//2RUdchval8VlHbIWTMPeD/Cl2/rzWQ0XciTlNN2y4aU60K09SKZQ2ZgU7dxeC2e8VcMumG6i735hjyTazKyGVVBfcLIJyV7qjpLwFFfBNKxuOmvvBSQAObdI5cVrcd3IXgWwkpeOnC47oJSL4GvpVxKsh8KpebXgSp/0JgfZxaZvEUeH/6i6bnA27W76tW+lqPwBiTw3VdaP0VxB4GFML7QNOPcZwg4ozOur6uyXfQ9tsh9TmEtoPoEeAuQ9tuh9R7EJyKRI/u1cFARAreZyg0ajg3HgDHPxH8E7tiTH2SFQ9F4nETH8HSU8CdAwRxo8fiNH8/bz1VhfhT3myppJHIQRDeq1epkY5vTA0n/+tS6P3pLwNlOnA7IzCmAu6C3TODqrpxxsHY57IGV7mxJ2HZWXjpmS5efn8DEMsUT08BAZAwMuZPiH/NfnsNK2jidXTJMZlMluLxuIn3YXGBa9j+zXHG3pO1yF32Y4g9QWeGk0QguB0y8nc2CLNO+lqPwBjTjRubnt8JgFcDONZVk0I1BS0/wcsXX5FRFPdmHtUYXWmSSW8EbUtO3n4/0ZbzM6mZufFckr3i0u8W3kFqNm56Xtf+ku9C7HGy0lw15lVUS9qXuIHIOgJjeqvjseLPxR7v+jk9B6/sZDkUEi/3IajKqCYyRecLxfOv7EVuifn32+7s+jnxItlFclbssh2NP1dJmKbGrCMwprdkVPHnfN2ek0boTbUzaag8por58WYsLSAvnhLX97vXCpARRfYZQpwS752pG+sIjOmtxhIzrDSe1vmj+CZ4qZ15ORm+AsvC3k3kfibiQORA8uoWFIqnaI67eFNrd663Z16K/Ir1CO9TcaymdqwjMKaXHP8kaPoJeUe7xjNwAl/JWiQjf5sZdRsBacKrH3BMpp5AKLMsCOHdkcYi1+BrTJrPheBWPcbjjLwcnAKjbkdkz5skThMy6vfemYE0dv6TUdcOmCwZk82yhoypkOu2Q+wB7/JP9DAcp/ilHU3+B9z5ENjAS1cENPUxpD8G/1qIb2LRbfuLpj7x7mv0EI+beAfa7/DKfUZOwPEVvmSkmvTGG2gagpshBef2Mf2lVNaQdQRmWFC3FY09CqkPkcAGENkbkdxSlVVoJ/UpGnsEtAUJ7QTBbfuULqmahvizaOIFcMYikQP7ZfBRvXm/r8cg9UHm97UXIrnlIocHTS9CY3+G9FwktIVXN6HsCQ67WEdghjVNfYwuOrRrbn6JgjR7c7/7xlWtHTf2OCz7MV7GTBKIQmhbZOS13rX43satCXTxsZB8B2jHK3zvQ0Zeh4TqUg6kX2jqE3TRId1qKUTBaUbGPDDsLi1p4t/okuMyE+TFvb9d3+rI6Ht6XcTGxhGYYU2X/QR0GZ157epNpaDLL6teG247LDsHb8zAipTRdoi/APEZle2z/X5vumRWTL+cAI2hS7/vjVEYorQl5/dFO7gLqvr7GgxUFV36/cwYj3hmYTuk/oe231bVtqwjMEOad536FfKniE5B/O/VayjxcoFZQQFi3iWpSnQ8QnbxkhWSkHqnsn0OcKqpTK2G3N9Xurq/r8Eg/bE3P1WeOFT6N1WEdQRmiBOK/5lXsSRgqWu2Fd8kLbZPZehOEyYUyT1l6L7mIiRA0RoXFdwjKMU6AjOkifghtAv5B5EghA+oXkPBLSl4AJMIEvlGRbuU6GHeHD15T4zMTDk99Ij4ILQrheoEEN6/HiHVjfgmZYrk5NYtiEDk0Kq2ZR2BGfKk+SJv5Ks00FnU3b8O0vSD6rUhQWTUDZkb0VG8ovUhiHwTgpXVHiC8L4S+2rUvaQAZ4c2ZP4QnbpMRF3lFfvJ+Xz+sd2j9TkZd682MuuK9IOJlouXUiuhzO5Y1ZIYDVRcSL0BqDgTWhcAWNTmYqtsK8adBW73ZNv2r932fyfe9eX+c0RDetSZprwNN1u/Lvw4EtxzSnV8pqgmvJnR6PgQ39dJpK2D1CMyA9vG7n/Hms+8yYlwzW+2zGcFQda9/QmYqhdAO3r8qUXeJ9wFVF0I7ZwrErCgPWax8ZO9JYB0IZNcE1vRciD/nXSYI7YI4jX1qwzvwvpQZ4LY2BDb3agp0PORNIudfC6LH4zj9MyisN78vTX3o3WB2RnnvxRAbuCYShPBXa9pGzToCEVkVuANYCe/u1k2qenXOOjsDjwAfZRY9pKoX1SomM7C4rssVx1/PzPteBMDx+wgE/fz66QuYslHfv0nXkhv7S2bMgA9EoeVnaPRbELsX0Ezet6LRo3Gaz6pu2603QOu1mbbFa2/k75DQ9hXtT93F6KKjwJ3rxS0OyBTQT0CXd63Yeg3u6D/hBDeqyuvoK1VFW86D2GOAZLK2AjD6DiRnqg9TWs0uDYnIysDKqvqqiDQBs4EDVfWdbuvsDJypqmVXbLZLQ0PH03c/x5Un3UhHWzxr+UqTx3HnhwO3gImmF6ILdqEzt7ukCDLqBiS0TXXaTr6JLjqSvLRSiSLjXuz1ICMAd8n/ZVIzyxibIM04Kw2Mz5/GpqPLziOr7gGAMxEZ98yA/fupl7oMKFPVuar6aubn5cC7QH5tPTNs/eWmGXmdAMCyBS189GaBwi8DRfwpiqc45oqhsfur1rS2P4RX7SyXQKL3c/17U1iU2QkAaAtual7P6/UDbb+PvE4AQJd6JTdN2fola0hEJgObAoUqb2wjIq+LyBMiUvAuiIicKCKzRGTWggULahip6U+JjsJFW8QRkvFyC7rUga4oO1nu+uWcOZSrSNtKJq7ecgvvr6Rqvp6+KBaHVPheDF817whEpBF4EDhDVVtynn4VWF1VpwLXAH8utA9VvUlVp6nqtHHjqjc3jKmvXY/cgVAk/8aeP+BnrU2n1CGiMoV2oewzAokikbKvfPa8u/BeQKHJ15JQwfxDIgEIbE75ZzghnCpkQlVFeH+81Npcfqgws2a4qmlHIN4UeQ8Cd6nqQ7nPq2qLqrZmfn4cCIjI2FrGZAaOfU7cgzU3mUyk0fswB0J+QtEQ59x1Oj5/FUf9Vpn4V4fGk/AOQg7eQTQCgR3xcr0zORgS9eb5D1Ux4yO4HYR3zww0E7zspDA0ndc5vXVvyYiLM1XFVnQwUWA0BUdej/hlRW3UgkQPhcD6mXEbAEEgjIz8jTeQ0JStljeLBbgdWKyqZxRZZwLwpaqqiGwJPIB3hlA0KLtZPLSkU2lemj6b2X97gzETRrLHMTszftXB8V1Ak2+jselAGgnvjQQ38WY6jT0M7jIkvKs3lqCCmUdLtqtefWPt+Bs4ESRyAOJfq2/7dJd702en3kcCG0J4X5QULL8cEq+Cb3Vo/hGOf3J1XkSVePc4nkHjz4NvHBI5GPGtXO+wBqS6TEMtItsDzwFv0nUR8lxgNQBVvUFETgW+i3enKgb8QFVfLLVf6whMd6oKqbfBbYXAxhVlzeTt010OyTfBGeONaBVBNQ6J10DCENio6gd3Y2qtLgPKVPV5erjwqKrXAtfWKgYztGnqI3TJ8eAuAhzQNNr8U5xoZXP7ALitN0Pr1d5EcZoC/6po5AhovdxrA9cr5zjq95arboYM+1pjBiVVF118DKQ/8+Zo11YgBi0Xocm3K9tn/DlovQaIZwZSxSD1ASy/sKsNbQf3S3TxMd4U18YMAdYRmMEpOTtzsM69tJlA2++uaJfadhv5eelugTa8dog/X1E7xgw01hGYwcldRuErjy64Cyvc5+JerKyZKlrGDH7WEZjBKbhZkUFDESS0W2X7DO+Gl/5ZBk1nahAYM/hZR2AGJXFGQ+MpZA+uCoN/dYhUVnBGokeDbxxdnYF4+3RWzWknAtGjEN/EitoxZqCxURdm0HIaT0EDU9H2u8BdCuG9kOghiJT5rT6HOM0w5hHvHkN8JjjjkYZjIbAB2v4wdDwG0oBED4fQztV8KcbUlRWmKVP78hitS1oZM2k0Pt/AHfU6HKm7DDQGzkoDYsbJasej6oI7D6QJcZqqEKEZjqwwTR90tMe58sQbeO7Bl3F8QigS4pSrj2O3I6pX4MRURt3F6NIfetW7cLwKXiMuq9qUzwMhHjc2A5Zf4A2Yw0VDuyAjLu1zIRpjurN7BD24/Nhref6hl0nGk8TbE7QsWs6VJ97A6zMry1U31aGq6OLjIPEykATi4M5Fl5yMpj4eEvFo8g1Y9sNMFlQHXsrqM+jS06oYuTHWEZS0dMEyXnpsdt50yfH2BPdcmjeHnulPqbch9TH58+gn0fY76xNPurrxaOvN5E+1nIDEK2j684r2aUwh1hGUsHjuUgLBwlfP5s2Z38/RmCzpL7ySinlSkKpDUZv0FxT+OKUyHVYl+/yEgoPZJAjpLyvbpzEFWEdQwsS1JuC6+UU7HJ/DRtuvV4eITKfABkXGEYS9qZ/7W2DDEvFsXdk+g1tS8DaeJrxi8sZUiXUEJYSjIY46/xBC0a50RMcRwg0hjjjv63WMzIhvEkT2Jzu/3w9OExI9pA7xTKx6PNLwbZAGsj+mEWg4zkt1NaZKLGuoB4eddQATJo/n3sseZvG8pWy0w3oc+/PDWXmNleod2rAnzb9A/RtC+x2gbRDaFWn8v7odJAvHc2rF8YhvAox5GG39LSReAGcU0nA8hCsbMGdMMTaOwBhjhoFS4wjs0pAxVaLagdvyC9wvN8Wdtx7u4m+hqQ+q307qE9zFx+POWx/3y6m4y36Kum3Vbyf+LO6CvXDnfQV3/na4bX9ksH1xNOWxjsCYKtEl34P2+7zLQqQh8S900WFounoZZuouQxcdAonngZQ3gjn2MLrkuKoepDX+ErrkVEh/iDej6wJY/iu07fdVa8MMHNYRGFMFmvoQEq+QnfevoPGK6yMUbKf9Qe/gT/dstgQk/wPJN6rXTusVeIPYuotB2w2o5o6VMIOddQTGVEPqQ5BCuRcJSL5VxXbeJv8ADYh4MVStnY8KL9ekN8GfGVKsIzCmGvxreDWO8wS9MQ9Va2d9IFzkuTWr2M7kwsvFD86I6rVjBgTrCIypAvGvBcHNyStsI0EkekT12ol+HSRE9kc3AL61IbBx9dppMaAW7wAACLdJREFU/D75HU4EGk5CJFC1dszAYB2BMVUio66HyCEgEcCBwBbI6PsQX/XGnIgzEhnzAAS38dogBJEDkNG3VnUKbglti4y8GnxTvAXOGGj6AdJwUtXaMAOHjSMwpgZUtea1Efqjjf5sx9SWjSMwpp/1x4Gzvw7O1gkMfdYRGGPMMGcdgTHGDHPWERhjzDBnHYExxgxz1hEYY8wwZx2BMcYMc9YRGGPMMGcVysywoKlP0LbbIfUeBKci0aO9CmDGmNqdEYjIqiLyjIi8IyJvi8jpBdYREfmtiHwgIm+IyGa1iscMX5p4HV20H8TugeQr0HY7unAfb+poY0xNLw2lgB+q6vrA1sD3RGT9nHX2AtbO/DsRuL6G8ZhhSlvOz8zhv2J20CRoK9pyaT3DMmbAqFlHoKpzVfXVzM/LgXeBSTmrHQDcoZ6XgJEisnKtYjLDj2oCUv8p9Awk/tXv8RgzEPXLzWIRmQxsCryc89Qk4NNujz8jv7NARE4UkVkiMmvBggW1CtMMST6gyLTJEu3XSIwZqGreEYhII/AgcIaqtlSyD1W9SVWnqeq0cePGVTdAM6SJ+CByAHl1AghD9Mh6hGTMgFPTjkC8ChYPAnep6kMFVvkcWLXb41Uyy4ypGmk+D4JbASGQJu//8O5I43frHZoxA0LN0kfFm7v2FuBdVb2iyGqPAqeKyL3AVsAyVZ1bq5jM8CQSQUbfjKY+gfQc8K+N+OxWlDEr1HIcwXbAt4A3ReS1zLJzgdUAVPUG4HFgb+ADoB04robxmGFO/KuBf7V6h2HMgFOzjkBVnwdKVrRQrzza92oVgzHGmJ7ZFBPGGDPMWUdgjDHDnHUExhgzzFlHYIwxw5x1BMYYM8yJl7gzeIjIAuDjPuxiLLCwSuHU21B6LTC0Xs9Qei0wtF7PcH0tq6tqwakZBl1H0FciMktVp9U7jmoYSq8FhtbrGUqvBYbW67HXks8uDRljzDBnHYExxgxzw7EjuKneAVTRUHotMLRez1B6LTC0Xo+9lhzD7h6BMcaYbMPxjMAYY0w31hEYY8wwN2w6AhH5g4jMF5G36h1LX4nIqiLyjIi8IyJvi8jp9Y6pUiISFpF/icjrmddyYb1j6isR8YnIv0Vker1j6SsRmSMib4rIayIyq97x9JWI/H979x9r9RzHcfz54jK3H4qFNVEWovmj0koiTWqiJYthspEtNqIZVmHZbNbYmo0xVJZJTT9nZpESYdRK0m9Dmxoy0Q+aWl7++H5uu7XW7X7PtW/nfN+P7axzvud7Pud1a933+X6+3/P+tJc0V9ImSRsl9Ss6Ux6SuqV/k4bbbknjco9XlnMEkgYAe4E3bV9WdJ5KSOoIdLS9WlJbYBUwwvaGgqM1W1rAqLXtvWlFu8+Ah21/WXC03CQ9AvQGTrc9rOg8lZC0Fehtuya+gCVpBrDc9lRJpwKtbP9ZdK5KSDqZbGXHvrZzfdm2NEcEtj8FdhadoyXY/tn26nR/D7AROLfYVPk4szc9PCXdqvbTiaROwI3A1KKzhMNJagcMIFs5Edv7q70IJIOA7/MWAShRIahVkroAPYGvik2SX5pKWQPsABbbrtqfBXgBeBz4t+ggLcTAh5JWSRpTdJgKXQD8BryRpu6mSmpddKgWcDswq5IBohBUMUltgHnAONu7i86Tl+2DtnsAnYA+kqpy6k7SMGCH7VVFZ2lBV9nuBQwFHkhTrNWqDugFvGK7J/AXML7YSJVJ01vDgTmVjBOFoEql+fR5wEzb84vO0xLSYfrHwPVFZ8mpPzA8zavPBq6V9FaxkSpje3v6cwewAOhTbKKKbAO2NTrinEtWGKrZUGC17V8rGSQKQRVKJ1inARttTyk6TyUknSWpfbpfDwwGNhWbKh/bE2x3st2F7HB9qe1RBcfKTVLrdDECaQplCFC1V93Z/gX4SVK3tGkQUHUXWBzhDiqcFoL/cfH6E42kWcBAoIOkbcAk29OKTZVbf+Au4Ns0tw4w0fb7BWbKqyMwI135cBLwju2qv+yyRpwDLMg+d1AHvG17UbGRKjYWmJmmVH4A7ik4T26pOA8G7qt4rLJcPhpCCOHoYmoohBBKLgpBCCGUXBSCEEIouSgEIYRQclEIQgih5KIQhJoj6WDqyLhO0hxJrZrYf+JxjrtVUofj3d5SJI2Q1L3R42WSamLx9XBiiEIQatE+2z1Sl9n9wP1N7H9chaBAI4DuTe4VQk5RCEKtWw5cCCBpVFr7YI2kV1Ozu8lAfdo2M+23MDVZW5+30Vr6Vu709H5fS7opbb9b0nxJiyR9J+m5Rq+5V9KW9JrXJb0k6UqyXjLPp4xd0+63pv22SLq6gr+fEMrzzeJQPpLqyHqxLJJ0KXAb0N/2AUkvA3faHi/pwdT0rsFo2ztTy4uVkubZ/r2Zb/8EWYuJ0amFxgpJH6XnepB1jP0H2CzpReAg8BRZ75s9wFLgG9tfSHoXeM/23PRzAdTZ7iPpBmAScF0z84VwSBSCUIvqG7XeWE7Wl2kMcDnZL3aAerK210fzkKSb0/3zgIuA5haCIWQN6B5Nj08Dzk/3l9jeBSBpA9AZ6AB8Yntn2j4HuPgY4zc0GlwFdGlmthAOE4Ug1KJ9R3zCb2jUN8P2hGO9UNJAsk/X/Wz/LWkZ2S/x5hIw0vbmI8bvS3Yk0OAg+f4fNoyR9/UhHBLnCEJZLAFukXQ2gKQzJXVOzx1Ibb0B2gF/pCJwCXBFzvf7ABibChCSejax/0rgGklnpCmtkY2e2wO0zZkjhCZFIQilkNZzfpJsta21wGKyzqcArwFr08niRUCdpI3AZOB4105eK2lbuk0BniFbdnOtpPXp8bHybQeeBVYAnwNbgV3p6dnAY+mkc9ejjxBCftF9NIQThKQ2tvemI4IFwHTbC4rOFWpfHBGEcOJ4Op3kXgf8CCwsOE8oiTgiCCGEkosjghBCKLkoBCGEUHJRCEIIoeSiEIQQQslFIQghhJL7D3acu5LIA2dRAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Plot the `clusters` and their `centroids`\n", "# Gather all the necessary data to plot the `clusters`\n", "xs: List[float] = []\n", "ys: List[float] = []\n", "cs: List[int] = []\n", "for cluster_idx, items in km.clusters.items():\n", " for item in items:\n", " cs.append(cluster_idx)\n", " xs.append(item[0])\n", " ys.append(item[1])\n", "\n", "fig = plt.figure()\n", "ax = fig.add_subplot()\n", "ax.scatter(xs, ys, c=cs)\n", "\n", "# Add the centroids\n", "for c in km.centroids.values():\n", " ax.scatter(c[0], c[1], c='red', marker='+')\n", "\n", "# Set labels\n", "ax.set_xlabel('Petal Length')\n", "ax.set_ylabel('Sepal Width');" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "# Function which quantifies how far apart two values are\n", "# We'll use it to calculate errors later on\n", "def squared_error(a: float, b: float) -> float:\n", " return (a - b) ** 2\n", "\n", "assert squared_error(2, 2) == 0\n", "assert squared_error(1, 2) == 1\n", "assert squared_error(1, 10) == 81" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAEGCAYAAACKB4k+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAfJklEQVR4nO3dfXAcd53n8fd3ZvQsyxpL8qOkkWOnCCF7tjUiFx6WTeJjLwQ2yVGQJXcsKS5XOSBwcNwVG/hna/fqrpaqrWWP2iVcIECyx1NIoBI4ikouJMDekhDZTpwnAo5jyZYdW7YlWZZkSTPzvT+mNR7JsiXZmukZzedVNTXdv26Nvnbi/kz/ft2/NndHREQEIBJ2ASIiUjoUCiIikqNQEBGRHIWCiIjkKBRERCQnFnYBl6K1tdW7urrCLkNEpKzs2rXruLu3zbetrEOhq6uL3t7esMsQESkrZtZ3vm3qPhIRkRyFgoiI5BQ0FMzsgJm9YGbPmVlv0LbGzB43s98H7/Gg3czsy2a2z8z2mll3IWsTEZFzFeNM4Tp33+7uPcH63cAT7n458ESwDvAe4PLgdSdwTxFqExGRPGF0H90M3B8s3w/cktf+gGc9DTSb2YYQ6hMRqViFDgUHHjOzXWZ2Z9C2zt2PBMtvAOuC5U3AwbyfPRS0zWJmd5pZr5n1Dg4OFqpuEZGKVOhLUt/p7gNmthZ43Mx+m7/R3d3MljRNq7vfC9wL0NPToyleRUSWUUHPFNx9IHg/BvwIuBo4OtMtFLwfC3YfADryfrw9aFt2u/pO8sWf/RZNGy4iMlvBQsHMGsxs1cwy8MfAi8CjwO3BbrcDjwTLjwIfCa5CugYYyetmWlYvDpzinqde4/DImUJ8vIhI2Spk99E64EdmNvN7vuPuPzOzZ4EHzewOoA+4Ndj/p8CNwD5gHPhooQpLJuIA7OobYlNzXaF+jYhI2SlYKLj7fmDbPO0ngJ3ztDtwV6HqyXfF+lXUVUXZ3TfETds2FuNXioiUhYq8ozkWjbC9o5ldfUNhlyIiUlIqMhQAuhPNvHzkFONTqbBLEREpGRUbCslEnHTG2XtoJOxSRERKRsWGwo6Os4PNIiKSVbGhEG+oZktbA7sVCiIiORUbCpDtQtrVP6Sb2EREAhUdCt2dcYbHp9l/fCzsUkRESkJFh8LMTWzqQhIRyaroUNjS1khTbYzd/QoFERGo8FCIRIzuRFxXIImIBCo6FACSnXF+d/Q0IxPTYZciIhK6ig+F7mBcYY+6kEREFArbOpqJGOzuHw67FBGR0FV8KDTWxLhifZOuQBIRQaEAZC9N3dM/RDqjm9hEpLIpFMiGwthUmlffGA27FBGRUCkUyHsSmwabRaTCKRSA9ngdrY01GlcQkYqnUADMjGSiWXc2i0jFUygEkok4fSfGGRydDLsUEZHQKBQCucnxdLYgIhVMoRB4y8bVVEcjGlcQkYqmUAjUVkV5y6YmTY4nIhVNoZAn2Rln78AIU6lM2KWIiIRCoZAnmYgzlcrw0uGRsEsREQmFQiHPzIyp6kISkUqlUMizrqmW9nidrkASkYqlUJgjGTyJzV2T44lI5VEozNHdGefoqUkGhifCLkVEpOgUCnOcvYlND90RkcqjUJjjivWrqKuK6iY2EalICoU5YtEI2zuadQWSiFSkgoeCmUXNbI+Z/SRY32xmz5jZPjP7vplVB+01wfq+YHtXoWs7n2QizstHTjE+lQqrBBGRUBTjTOHTwCt5618EvuTuW4Eh4I6g/Q5gKGj/UrBfKLoTzaQzzvMHdRObiFSWgoaCmbUD7wW+HqwbcD3wULDL/cAtwfLNwTrB9p3B/kW3o0MzpopIZSr0mcLfAZ8DZiYTagGG3X2mX+YQsClY3gQcBAi2jwT7z2Jmd5pZr5n1Dg4OFqToeEM1W9oaNNgsIhWnYKFgZu8Djrn7ruX8XHe/19173L2nra1tOT96lmQizq5+3cQmIpWlkGcK7wBuMrMDwPfIdhv9T6DZzGLBPu3AQLA8AHQABNtXAycKWN8FJRNxhsen2X98LKwSRESKrmCh4O6fd/d2d+8CPgT83N3/HfAk8IFgt9uBR4LlR4N1gu0/9xC/pic1OZ6IVKAw7lP4c+CzZraP7JjBfUH7fUBL0P5Z4O4Qasu5rLWRptqYxhVEpKLEFt7l0rn7U8BTwfJ+4Op59jkDfLAY9SxGJGJ0J+K6AklEKoruaL6AZGec3x09zcjEdNiliIgUhULhAmbGFfbobEFEKoRC4QK2dTQTMTSuICIVQ6FwAQ01Ma5Y38QunSmISIVQKCwgmYjzXP8w6YxuYhORlU+hsIBkIs7YVJpX3xgNuxQRkYJTKCwgdxObupBEpAIoFBbQHq+jbVWNBptFpCIoFBZgZiQ745ruQkQqgkJhEboTzfSfHGdwdDLsUkRECkqhsAgz4wqa8kJEVjqFwiK8ZeNqqqMRjSuIyIqnUFiE2qooV21q0riCiKx4CoVFSibi7B0YYTKVDrsUEZGCUSgsUndnnKlUhpcOnwq7FBGRglEoLFL3zGCzupBEZAVTKCzSuqZa2uN1ugJJRFY0hcISJBPZm9hCfHS0iEhBKRSWIJmIc/TUJAPDE2GXIiJSEAqFJejuDCbH07iCiKxQCoUluGL9KuqqouzpHw67FBGRglAoLEEsGmF7R7POFERkxVIoLFEyEeflI6cYn0qFXYqIyLJTKCxRMhEnnXGePzgSdikiIstOobBEOzqbAc2YKiIrk0JhiZrrq9nS1qBxBRFZkRQKFyGZiLO7XzexicjKo1C4CMlEnOHxafYfHwu7FBGRZaVQuAgzT2JTF5KIrDQKhYtwWWsjq+uqNGOqiKw4CoWLEIkYOzp1E5uIrDwKhYuU7Izz+2OnGZmYDrsUEZFlo1C4SDPjCnt0v4KIrCAFCwUzqzWz35jZ82b2kpn9ZdC+2cyeMbN9ZvZ9M6sO2muC9X3B9q5C1bYctnU0EzE9iU1EVpZCnilMAte7+zZgO3CDmV0DfBH4krtvBYaAO4L97wCGgvYvBfuVrIaaGG/e0MQunSmIyApSsFDwrNPBalXwcuB64KGg/X7glmD55mCdYPtOM7NC1bcckok4z/UPk0pnwi5FRGRZFHRMwcyiZvYccAx4HHgNGHb3mSlGDwGbguVNwEGAYPsI0DLPZ95pZr1m1js4OFjI8hfU3RlnbCrNq0dHQ61DRGS5FDQU3D3t7tuBduBq4Ipl+Mx73b3H3Xva2touucZLMTPYvFsP3RGRFaIoVx+5+zDwJPA2oNnMYsGmdmAgWB4AOgCC7auBE8Wo72K1x+toW1WjwWYRWTEKefVRm5k1B8t1wLuBV8iGwweC3W4HHgmWHw3WCbb/3Et8xjkzI9kZ101sIrJiFPJMYQPwpJntBZ4FHnf3nwB/DnzWzPaRHTO4L9j/PqAlaP8scHcBa1s2yUSc/pPjHBs9E3YpIiKXLLbwLhfH3fcCO+Zp3092fGFu+xngg4Wqp1C6E8FDd/qGueGq9SFXIyJyaXRH8yV6y8bVVEcjurNZRFYEhcIlqq2KctWmJo0riMiKoFBYBslEnL0DI0ym0mGXIiJySRYMheAGtL8pRjHlKpmIM5XK8NLhU2GXIiJySRYMBXdPA+8sQi1lq7szuIlNXUgiUuYWe/XRHjN7FPgBkHswsbv/sCBVlZm1TbW0x+vY1TfEf/jDsKsREbl4iw2FWrJ3F1+f1+aAQiGQTMR5ev8J3J0Sn8dPROS8FhUK7v7RQhdS7pKJOI88d5iB4Qna4/VhlyMiclEWdfWRmbWb2Y/M7FjwetjM2gtdXDmZGVfQpakiUs4We0nqN8nOTbQxeP04aJPAFetXUV8d1WCziJS1xYZCm7t/091TwetbQLjzVpeYWDTCtvZmPYlNRMraYkPhhJl9OLhnIWpmH6bEp7UOQzIR55Ujo4xPpRbeWUSkBC02FP49cCvwBnCE7NTWGnyeI5mIk844zx8cCbsUEZGLsuDVR2YWBd7v7jcVoZ6ytqMzmDG1f4i3bTnnSaIiIiVvsXc031aEWspec301W9c26gokESlbi7157f+Z2d8D32f2Hc27C1JVGevubOaxl4+SyTiRiG5iE5HysthQ2B68/1VemzP7DmchO67wYO8h9h8fY+vaxrDLERFZksWMKUSAe9z9wSLUU/aSiWByvP4hhYKIlJ3FjClkgM8VoZYV4bLWRlbXVekmNhEpS4u9JPX/mtl/NbMOM1sz8ypoZWUqEjG6O5s12CwiZWmxYwp/GrzfldfmwGXLW87KkEzEefLVQUbGp1ldXxV2OSIii7bYWVI3F7qQlST30J2DQ1z3prUhVyMisngX7D4ys8/lLX9wzrb/Uaiiyt22jmYiBnvUhSQiZWahMYUP5S1/fs62G5a5lhWjoSbGmzc0aXI8ESk7C4WCnWd5vnXJk0zEea5/mFQ6E3YpIiKLtlAo+HmW51uXPMlEnLGpNK8eHQ27FBGRRVtooHmbmZ0ie1ZQFywTrNcWtLIylxts7hviLRtXh1yNiMjiXPBMwd2j7t7k7qvcPRYsz6zrWssLaI/X0baqRvcriEhZWezNa7JEZkayM87u/uGwSxERWTSFQgElE3H6T45zbPRM2KWIiCyKQqGAumcmx+vT2YKIlAeFQgFdtamJ6miE3bpfQUTKRMFCIZg870kze9nMXjKzTwfta8zscTP7ffAeD9rNzL5sZvvMbK+ZdReqtmKpiUW5alOTBptFpGwU8kwhBfwXd78SuAa4y8yuBO4GnnD3y4EngnWA9wCXB687gXsKWFvRJBNxXhgYYTKVDrsUEZEFFSwU3P3IzOM63X0UeAXYBNwM3B/sdj9wS7B8M/CAZz0NNJvZhkLVVyzJRJypVIaXDp9aeGcRkZAVZUzBzLqAHcAzwDp3PxJsegNYFyxvAg7m/dihoG3uZ91pZr1m1js4OFiwmpdL/k1sIiKlruChYGaNwMPAZ9x91tdld3eWOF2Gu9/r7j3u3tPW1raMlRbG2qZaOtbUaVxBRMpCQUPBzKrIBsK33f2HQfPRmW6h4P1Y0D4AdOT9eHvQVva6O+Ps6hsim4EiIqWrkFcfGXAf8Iq7/23epkeB24Pl24FH8to/ElyFdA0wktfNVNaSiTjHRicZGJ4IuxQRkQta7OM4L8Y7gD8DXjCz54K2LwB/DTxoZncAfcCtwbafAjcC+4Bx4KMFrK2oZsYVdvUN0R6vD7kaEZHzK1gouPs/cf5nLuycZ39n9jOgV4wr1q+ivjrK7r4hbt5+zti5iEjJ0B3NRRCLRtje0awnsYlIyVMoFEkyEeeVI6OMTabCLkVE5LwUCkXS3RknnXGeP6TJ8USkdCkUimRHZzMAe/R8BREpYQqFImmur2br2kbdxCYiJU2hUETZJ7ENkcnoJjYRKU0KhSJKJuIMj0+z//hY2KWIiMxLoVBE3YnsuIImxxORUqVQKKLLWhtZXVelJ7GJSMlSKBRRJGJ0dzZrsFlESpZCociSiTi/P3aakfHpsEsRETmHQqHIuhPBQ3cO6mxBREqPQqHItrU3E42YBptFpCQpFIqsoSbGFetXaVxBREqSQiEEyUSc5w8Ok0pnwi5FRGQWhUIIkok4Y1NpXj06GnYpIiKzKBRCMPMkNo0riEipUSiEoD1ex9pVNRpXEJGSo1AIgZnR3RnXk9hEpOQoFEKSTMQ5eHKCY6Nnwi5FRCRHoRCS3E1sfXrojoiUDoVCSK7a1ER1NKLJ8USkpCgUQlITi/IH7as12CwiJUWhEKLuzmZeODTCZCoddikiIoBCIVTJRJypdIYXB06FXYqICKBQCNXMTWx7NK4gIiVCoRCitU21dKyp07iCiJQMhULIkp1xevuGcPewSxERUSiELZmIMzg6yaGhibBLERFRKIRtx8zkeBpXEJESoFAI2RXrV1FfHdWMqSJSEhQKIYtFI2zvaNbkeCJSEhQKJSCZiPPKkVHGJlNhlyIiFa5goWBm3zCzY2b2Yl7bGjN73Mx+H7zHg3Yzsy+b2T4z22tm3YWqqxR1J+KkM87zhzQ5noiEq5BnCt8CbpjTdjfwhLtfDjwRrAO8B7g8eN0J3FPAukpOd4eexCYipaFgoeDuvwROzmm+Gbg/WL4fuCWv/QHPehpoNrMNhaqt1Kyur2Lr2kbdxCYioSv2mMI6dz8SLL8BrAuWNwEH8/Y7FLSdw8zuNLNeM+sdHBwsXKVFluyMs+fgMJmMbmITkfCENtDs2Vt4l3wEdPd73b3H3Xva2toKUFk4kok4w+PT7D8+FnYpIlLBih0KR2e6hYL3Y0H7ANCRt1970FYxzj6JTV1IIhKeYofCo8DtwfLtwCN57R8JrkK6BhjJ62aqCJe1NtBcX6VxBREJVaxQH2xm3wWuBVrN7BDwF8BfAw+a2R1AH3BrsPtPgRuBfcA48NFC1VWqIhFjh25iE5GQFSwU3P2282zaOc++DtxVqFrKRU/XGp589VX+6scv87E/uoy1TbVhlyQiFaZgoSBL95G3Jdg/OMb9vz7At5/p47arO/n4tVtYp3AQkSKxcp7Hv6enx3t7e8MuY9n1nRjj73++jx/uGSAaMf7t1Z187I+2sH61wkFELp2Z7XL3nnm3KRRKV/+Jcf7hyX08vPsQkYhx21s7+Pi1WxUOInJJFApl7uDJbDg8tOsQETP+9K0dfPzaLWxsrgu7NBEpQwqFFeLgyXG+8tQ+ftCbDYdb39rOJ67dqnAQkSVRKKwwh4bG+cpTr/GD3uzMILf2dPCJ67aySeEgIougUFihBoYn+MqT+3gwCIcP9nTwiWu30B6vD7kyESllCoUVbmB4gnue2seDzx7CcT6QzHYrdaxROIjIuRQKFeLw8AT3PPUa33/2IBnPhsNd1ykcRGQ2hUKFOTIywVefeo3vPnuQTMZ5f/cmPnnd5XS2KBxERKFQsd4YOcNXf/Ea3/lNP+mM8/4dm/jk9VtJtDSEXZqIhEihUOGOngrC4Zl+Uhnn3+zYxCev20pXq8JBpBIpFASAY6fO8NVf7Ofbz/SRyjg3b9/Ip66/nM0KB5GKolCQWY6NnuF/BeEwlcpwy/Zst9JlbY1hlyYiRaBQkHkNjk5y7y9f4x+fzobDTds28qmdl7NF4SCyoikU5IIGRyf52q/284+/7mMyleZPtmW7lbauVTiIrEQKBVmU46cn+dov9/PAr/s4k0rzJ/9iI/9p51a2rl0VdmkisowUCrIkJ05P8rVfvc4Dvz7AxHSa9/7BBj5+7RbevL6JSMTCLk9ELpFCQS7KybEpvvar/TzwzwcYm0pTHYuQWFNPV2sDm1sb6GppoKu1ns2tDaxvqsVMgSFSDhQKckmGxqb42Utv8PrxMV4/PsaB42P0nRxnKpXJ7VNbFaGrJQiL1gY2t2Tfu1rraWusUWCIlJALhYKe0SwLijdUc9vVnbPa0hnn8PAEB05kQ+L14+McODHGq2+M8vjLR0llzn7ZaKiOBgFxNiw2t9bT1dLAmobqsguMM9NpTo5NcXJsihNjU5wcm+TE6alc2/HTU4xNptjYXMfm1no2tzbmzqjqq/VPTkqbzhRk2aXSGQaGJ3JnFQdOjGeXT4xxaGiCdF5grKqN5XVFnQ2Lza0NNNdXF6Xeiak0J8Ymzx7kT5894J84ndcevE5Ppub9nFjEWNNQzZqGahpqYgwMTfDGqTOz9lnXVENXSwOXtTXk/pybWxvobKmnJhYtxh9XRGcKUlyxaIRES0N2jqU3zd42lcpwaCh7VvH68fEgNMbY3T/Ej/ceJv87SnN9VfYA2tow50yjnlW1VfP+bndnfCp9zrf4mYN69hv97AP9+FR63s+qihotDTWsaaimpbGaREs9axqqaW2syR38W3LvNTTVxc456xmfSnHg+Myf9+zrsZeOcmJsKrdfxAjOLM4GRVdr9s++qbmOWDRycf8xRJZIZwpSMiZTaQ6eHM+FxetB19SB42McHpn9jbu1sZqulgbWr65l9Ewqr+tmksm8sY581bEILcEBfk1DTe6Anj3QZ9tyB/rGalbVnHuQX04jE9NB19vZ14ETY7w+OMZo3tlIVdToiNefHa/Je61vqtUVYbJkGmiWsjcxlabvZN74RRAaR0+doam2KjjQz3xzr8k7+Ge/xa9prKahOloW4xfuzomxKQ4cH2P/8bFZwXHgxBhnps+GXk0sMqv77bK84GhtLL/xGikOhYLICpHJOEdHz/D6YDYUXx8cy3VN9Z8cZzp99t9zY01sztlFPYmWBtbUV9NUV8Wq2hhV6paqSBpTEFkhIhFjw+o6Nqyu4+1bW2dtS6UzHB4+w/7jp8+eXZwY57mDQ/yfvYfJzPP9r746SlNtFU11MVbVVtFUG6OprirXln2fbz27f3VMoXIp0hlnMpVmcjrDZCqTXU5lgvX02bY526dSGd6+pZUrNzYte00KBZEVIhaN0NlSn33C3pwB/pnxmv6T44xMTHNqIsWpiWlOnQmWz2SXj5+eYv/xsWBbataVYvOprYrMCoqlBEpTXWxZrrjKZJyMO2l3MhlIu5POOJnMTJvntWW3Z87Tnp75rLyfn0pl5j84B8tT6QyT0zMH8PkP4rMP8mf3Ty3w93sh/+2WqxQKInJxamJRtq5dtaR5rGau5JoJjtEzc0IkCI78cDkZjIXMtC900KuJRbJdWTXZQ9HZA3b2W/S5B/VzD/5hMoPaWJSaqgg1sQg1sWj2verscmNNNvwutE9NLEL1edprquZfrqsqzCXMCgURmZeZ0VATo6EmxobVS/95d2diOs3omXnOSuYGypkUBkQjRtSMSMSIWHY9Yjbr/ewyuX3P/ky2/dx9z+4zs/3cz2WefY3q6PkP1LGIrbjBfIWCiBSEmVFfHaO+Osa6ptqwy5FF0iiRiIjklFQomNkNZvaqme0zs7vDrkdEpNKUTCiYWRT4B+A9wJXAbWZ2ZbhViYhUlpIJBeBqYJ+773f3KeB7wM0h1yQiUlFKKRQ2AQfz1g8FbSIiUiSlFAqLYmZ3mlmvmfUODg6GXY6IyIpSSqEwAHTkrbcHbbO4+73u3uPuPW1tbUUrTkSkEpRSKDwLXG5mm82sGvgQ8GjINYmIVJSSmiXVzG4E/g6IAt9w9/++wP6DQN9F/rpW4PhF/mwhqa6lUV1LV6q1qa6luZS6Eu4+b1dLSYVCMZlZ7/mmjg2T6loa1bV0pVqb6lqaQtVVSt1HIiISMoWCiIjkVHIo3Bt2AeehupZGdS1dqdamupamIHVV7JiCiIicq5LPFEREZA6FgoiI5FRcKJjZN8zsmJm9GHYt+cysw8yeNLOXzewlM/t02DUBmFmtmf3GzJ4P6vrLsGvKZ2ZRM9tjZj8Ju5YZZnbAzF4ws+fMrDfsemaYWbOZPWRmvzWzV8zsbSVQ05uCv6eZ1ykz+0zYdQGY2X8O/p9/0cy+a2Yl8aQgM/t0UNNLhfi7qrgxBTN7F3AaeMDdrwq7nhlmtgHY4O67zWwVsAu4xd1fDrkuAxrc/bSZVQH/BHza3Z8Os64ZZvZZoAdocvf3hV0PZEMB6HH3krrhyczuB37l7l8PZg2od/fhsOuaEUyfPwD8S3e/2JtSl6uWTWT/X7/S3SfM7EHgp+7+rZDruorsDNJXA1PAz4CPufu+5fodFXem4O6/BE6GXcdc7n7E3XcHy6PAK5TALLGedTpYrQpeJfFNwszagfcCXw+7llJnZquBdwH3Abj7VCkFQmAn8FrYgZAnBtSZWQyoBw6HXA/Am4Fn3H3c3VPAL4D3L+cvqLhQKAdm1gXsAJ4Jt5KsoIvmOeAY8Li7l0RdZKdE+RyQCbuQORx4zMx2mdmdYRcT2AwMAt8Mutu+bmYNYRc1x4eA74ZdBIC7DwB/A/QDR4ARd38s3KoAeBH4QzNrMbN64EZmTyR6yRQKJcbMGoGHgc+4+6mw6wFw97S7byc7c+3VwSlsqMzsfcAxd98Vdi3zeKe7d5N9iuBdQZdl2GJAN3CPu+8AxoCSeeRt0J11E/CDsGsBMLM42Yd8bQY2Ag1m9uFwqwJ3fwX4IvAY2a6j54D0cv4OhUIJCfrsHwa+7e4/DLueuYLuhieBG8KuBXgHcFPQf/894Hoz+9/hlpQVfMvE3Y8BPyLb/xu2Q8ChvLO8h8iGRKl4D7Db3Y+GXUjgXwGvu/ugu08DPwTeHnJNALj7fe6edPd3AUPA75bz8xUKJSIY0L0PeMXd/zbsemaYWZuZNQfLdcC7gd+GWxW4++fdvd3du8h2O/zc3UP/JmdmDcGFAgTdM39M9pQ/VO7+BnDQzN4UNO0EQr2IYY7bKJGuo0A/cI2Z1Qf/NneSHecLnZmtDd47yY4nfGc5Pz+2nB9WDszsu8C1QKuZHQL+wt3vC7cqIPvN98+AF4L+e4AvuPtPQ6wJYANwf3BlSAR40N1L5vLPErQO+FH2OEIM+I67/yzcknI+BXw76KrZD3w05HqAXHi+G/iPYdcyw92fMbOHgN1ACthD6Ux38bCZtQDTwF3LfcFAxV2SKiIi56fuIxERyVEoiIhIjkJBRERyFAoiIpKjUBARkRyFgsgyMrPTecs3mtnvzCwRZk0iS1Fx9ymIFIOZ7QS+DPzrEprgTWRBCgWRZRbMdfQ14EZ3fy3sekSWQjeviSwjM5sGRoFr3X1v2PWILJXGFESW1zTwz8AdYRcicjEUCiLLKwPcSnaK8S+EXYzIUmlMQWSZufu4mb0X+JWZHS2RCRdFFkWhIFIA7n7SzG4Afmlmg+7+aNg1iSyGBppFRCRHYwoiIpKjUBARkRyFgoiI5CgUREQkR6EgIiI5CgUREclRKIiISM7/BzdSFx7LBl6XAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Create an \"Elbow chart\" to find the \"best\" `k`\n", "# See: https://en.wikipedia.org/wiki/Elbow_method_(clustering)\n", "\n", "# Lists to record the `k` values and the computed `error` sums\n", "# which are used for plotting later on\n", "ks: List[int] = []\n", "error_sums: List[float] = []\n", "\n", "# Create clusterings for the range of `k` values\n", "for k in range(1, 10):\n", " # Create and train a new KMeans instance for the current `k`\n", " km: KMeans = KMeans(k)\n", " km.train(data_points)\n", " # List to keep track of the individual KMean errors\n", " errors: List[float] = []\n", " # Iterate over all `clusters` and extract their `centroid_idx`s and `items`\n", " centroid_idx: List[float]\n", " items: List[List[float]]\n", " for centroid_idx, items in km.clusters.items():\n", " # Lookup `centroid` coordinates based on its index\n", " centroid: List[float] = km.centroids[centroid_idx]\n", " # Iterate over each `item` in the cluster\n", " item: List[float]\n", " for item in items:\n", " # Calculate how far the current `cluster`s `item` is from the `centroid`\n", " dist: float = distance(centroid, item)\n", " # The closer the `item` in question, the better (less error)\n", " # (the closest one can be is `0`)\n", " error: float = squared_error(dist, 0)\n", " # Record the `error` value\n", " errors.append(error)\n", " # Append the current `k` and the sum of all `errors`\n", " ks.append(k)\n", " error_sums.append(sum(errors))\n", "\n", "# Plot the `k` and error values to see which `k` is \"best\"\n", "plt.plot(ks, error_sums)\n", "plt.xlabel('K')\n", "plt.ylabel('Error');" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }