{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Euclidean vs. Cosine Distance\n", "https://cmry.github.io/notes/euclidean-v-cosine" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "\n", "X = np.array([[6.6, 6.2, 1],\n", " [9.7, 9.9, 2],\n", " [8.0, 8.3, 2],\n", " [6.3, 5.4, 1],\n", " [1.3, 2.7, 0],\n", " [2.3, 3.1, 0],\n", " [6.6, 6.0, 1],\n", " [6.5, 6.4, 1],\n", " [6.3, 5.8, 1],\n", " [9.5, 9.9, 2],\n", " [8.9, 8.9, 2],\n", " [8.7, 9.5, 2],\n", " [2.5, 3.8, 0],\n", " [2.0, 3.1, 0],\n", " [1.3, 1.3, 0]])" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weightlengthlabel
06.66.21.0
19.79.92.0
28.08.32.0
36.35.41.0
41.32.70.0
52.33.10.0
66.66.01.0
76.56.41.0
86.35.81.0
99.59.92.0
108.98.92.0
118.79.52.0
122.53.80.0
132.03.10.0
141.31.30.0
\n", "
" ], "text/plain": [ " weight length label\n", "0 6.6 6.2 1.0\n", "1 9.7 9.9 2.0\n", "2 8.0 8.3 2.0\n", "3 6.3 5.4 1.0\n", "4 1.3 2.7 0.0\n", "5 2.3 3.1 0.0\n", "6 6.6 6.0 1.0\n", "7 6.5 6.4 1.0\n", "8 6.3 5.8 1.0\n", "9 9.5 9.9 2.0\n", "10 8.9 8.9 2.0\n", "11 8.7 9.5 2.0\n", "12 2.5 3.8 0.0\n", "13 2.0 3.1 0.0\n", "14 1.3 1.3 0.0" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(X, columns=['weight', 'length', 'label'])\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAGHlJREFUeJzt3X+QXWWd5/H3Nz/ahJAAQhdEo+lU\nTfiZbExoECaDCiQroxSK4yBZtWRopJZalHFcZrF0iFpbLmVRKpbW1KLBKGKIMM7IzrgWBAcYMIb8\nIIgmmdWSEKIg16iRZRPS0N/949xAJzbdt7vv7XO7z/tV1Zx7z719zje3yP3kOc95nicyE0lSdU0q\nuwBJUrkMAkmqOINAkirOIJCkijMIJKniDAJJqjiDQJIqziCQpIozCCSp4qaUXUAjjjvuuOzq6iq7\nDEkaVzZv3vybzOwc6n3jIgi6urrYtGlT2WVI0rgSEU808j4vDUlSxRkEklRxBoEkVdy46CMYSG9v\nL7t372b//v1ll1KqadOmMWfOHKZOnVp2KZLGqZYFQUTcAlwIPJOZC+r7Xg2sBbqAncAlmfm7kRx/\n9+7dzJw5k66uLiKiOUWPM5nJnj172L17N/PmzSu7HEnjVCsvDa0GLjhs33XAvZk5H7i3/nxE9u/f\nz7HHHlvZEACICI499tjKt4okjU7LgiAzHwB+e9judwBfrz/+OvDO0ZyjyiFwkJ+BpNEa687i4zPz\nqfrjp4HjX+mNEXFlRGyKiE21Wm1sqpOksVKrwcaNxXY4r7VAaXcNZbFY8isumJyZN2dmd2Z2d3YO\nOTBOksaPNWtg7lxYvrzYrlnT2GstMtZB8OuImA1Q3z4zxueXpHLVatDTA/v2wd69xbanp9g/2Gst\nNNZBcBfwgfrjDwDfHcuTN7O1df311/OFL3zhpecf//jHuemmm7j22mtZsGABCxcuZO3atQDcd999\nXHjhhS+99+qrr2b16tVAMX3GypUrWbJkCQsXLmTHjh31WmssX76c0047jSuuuIK5c+fym9/8ZvSF\nSyrXzp3Q0XHovqlTi/2DvdZCLQuCiFgDrAdOiojdEdED3AAsj4ifAcvqz8dEs1tbl19+Od/4xjcA\n6Ovr4/bbb2fOnDls3bqVRx99lHXr1nHttdfy1FNPDXEkOO6449iyZQtXXXUVN954IwCf+tSnOO+8\n8/jpT3/Ku9/9bnbt2jW6giW1h64uOHDg0H29vcX+wV5roVbeNbQiM2dn5tTMnJOZqzJzT2aen5nz\nM3NZZh5+V1FLtKK11dXVxbHHHssjjzzC3XffzeLFi3nwwQdZsWIFkydP5vjjj+fNb34zGzduHPJY\n73rXuwA4/fTT2VlP/gcffJBLL70UgAsuuIBjjjlm5MVKah+dnbBqFUyfDrNmFdtVq4r9g73WQuN2\nZPFwHGxt7dv38r6Dra3RfL5XXHEFq1ev5umnn+byyy/nnnvuGfB9U6ZMoa+v76Xnh9/3/6pXvQqA\nyZMn88ILL4y8IEnjw4oVsGxZ8SXU1XXoF9Fgr7VIJeYaalVr6+KLL+b73/8+Gzdu5K1vfSvnnHMO\na9eu5cUXX6RWq/HAAw9w5plnMnfuXLZt28bzzz/P73//e+69994hj7106VK+/e1vA3D33Xfzu9+N\naAC2pHbV2QlnnDHwF/1gr7VAJVoEB1tbPT1FS6C3tzmtrY6ODs4991yOPvpoJk+ezMUXX8z69etZ\ntGgREcFnP/tZTjjhBAAuueQSFixYwLx581i8ePGQx165ciUrVqzg1ltv5eyzz+aEE05g5syZoytY\nkgYQxe387a27uzsPX5hm+/btnHLKKcM6Tq3W3NZWX18fS5Ys4Y477mD+/PmjP2A/zz//PJMnT2bK\nlCmsX7+eq666iq1btw743pF8FpL6afaXQ5uIiM2Z2T3U+yrRIjjoYF9MM2zbto0LL7yQiy++uOkh\nALBr1y4uueQS+vr66Ojo4Ctf+UrTzyGJ4hbCnp6iI/HAgeJywYoVZVc1pioVBM106qmn8otf/KJl\nx58/fz6PPPJIy44viUNvKTx4N0lPT9FZO4FaBkOpRGexJA2opAFc7cYgkFRdJQ3gajcGgaTqKmkA\nV7uxj0BStZUwgKvd2CJosbvuuosbbhh4SqUjjzxyjKuRNKAxHsDVbmwRtNhFF13ERRddVHYZkvSK\nqtUi2F+DPRuLbRPs3LmTk08+mcsuu4wTTzyR9773vaxbt46lS5cyf/58Hn74YVavXs3VV18NwOOP\nP87ZZ5/NwoUL+cQnPtGUGiRptKoTBDvXwHfnwg+WF9udzVn15+c//zkf/ehH2bFjBzt27OBb3/oW\nDz74IDfeeCOf+cxnDnnvNddcw1VXXcVjjz3G7Nmzm3J+SRqtagTB/hps6IEX90Hv3mK7oacpLYN5\n8+axcOFCJk2axGmnncb5559PRLBw4cKXppQ+6KGHHmJFfcTi+9///lGfW5KaoRpB8NxOmHTYoJFJ\nU4v9o3RwCmmASZMmvfR80qRJA04pHRGjPqckNVM1gmBGF/QdNmikr7fYP4aWLl3K7bffDsBtt902\npueWNELNXOO2TVUjCKZ1whtXweTpMHVWsX3jqmL/GLrpppv48pe/zMKFC/nlL385pueWNALNXuO2\nTVVqGmr214rLQTO6xjwEWslpqKUWqNWKL//+SxtOnw5PPDFuxhs4DfVApnVOqACQ1EKtWuO2DVXj\n0pAkDVeFJqQzCCRpIBWakK5al4YkaTgqMiGdQSBJg2nmGrdtyktDklRxBkEL9Z9w7pXs3LmTBQsW\nALB161a+973vjUVpkvQSg6CNGASSylCtIGjyUPF3vvOdnH766Zx22mncfPPNAHzta1/jxBNP5Mwz\nz+Shhx566b2XXXYZd95550vPD1+U5sCBA1x//fWsXbuWN7zhDaxdu7YpNUrSUKrTWbxmDfT0FANE\nDhwobgOrzwQ6UrfccguvfvWr2bdvH2eccQZvf/vbWblyJZs3b+aoo47i3HPPZfHixQ0dq6Ojg09/\n+tNs2rSJL33pS6OqS5KGoxotglqtCIF9+2Dv3mLb0zPqlsEXv/hFFi1axFlnncWTTz7Jrbfeylve\n8hY6Ozvp6OjgPe95T5P+AJLUOtUIgoNDxfs7OFR8hO677z7WrVvH+vXrefTRR1m8eDEnn3zyK75/\nypQp9PX1AdDX18eBw0csShpYBWb/LFs1gqAFQ8X37t3LMcccwxFHHMGOHTv40Y9+xL59+7j//vvZ\ns2cPvb293HHHHf1K6GLz5s1AsaB9b2/vHx1z5syZPPvssyOuSZpwKjL7Z9mqEQQtGCp+wQUX8MIL\nL3DKKadw3XXXcdZZZzF79mw++clPcvbZZ7N06dJDZgT94Ac/yP3338+iRYtYv349M2bM+KNjnnvu\nuWzbts3OYgladklXf6xa01DXahNyqLjTUGtC2rixaAns3fvyvlmzYN06OOOM8uoaR5yGeiAVGCou\nTRgVmv2zbNW4NCRp/KnQ7J9lK6VFEBEfAa4AEngM+KvM3D/c42Rm5ReDHw+X9qQRq8jsn2Ub8xZB\nRLwW+DDQnZkLgMnApcM9zrRp09izZ0+lvwgzkz179jBt2rSyS5Fap7Oz6BMwBFqmrD6CKcD0iOgF\njgB+NdwDzJkzh927d1Or+B0E06ZNY86cOWWXIWkcG/MgyMxfRsSNwC5gH3B3Zt493ONMnTqVefPm\nNb0+SaqaMi4NHQO8A5gHvAaYERHvG+B9V0bEpojYVPV/9UtSK5Vx19Ay4PHMrGVmL/Ad4E8Pf1Nm\n3pyZ3ZnZ3em1QUlqmTKCYBdwVkQcEcUtP+cD20uoQ5JECUGQmRuAO4EtFLeOTgJuHus6JEmFUu4a\nysyVwMoyzi1JOpQjiyWp4gwCSao4g0CSKs4gkKSKMwgkqeIMAkmqOINAkirOIJCkijMIJKniDAJJ\nqjiDQJIqziCQpIozCCSp4gwCSao4g0CSKs4gkKSKMwgkqeIMAkmqOINAkirOIJCkijMIJKniDAJJ\nqjiDQJIqziCQpIozCCSp4gwCSao4g0CSKs4gkKSKMwikiWJ/DfZsLLbSMEwpuwBJTbBzDWzogUkd\n0HcA3rgKulaUXZXGCVsE0ni3v1aEwIv7oHdvsd3QY8tADTMIpPHuuZ1FS6C/SVOL/VIDDAJpvJvR\nVVwO6q+vt9gvNcAgkMa7aZ1Fn8Dk6TB1VrFd8vmiReDlITXAzmJpIuhaAScsK778f7sFtnzEjmM1\nrOEgiIjJwPH9fyczd7WiKEkjMK2z2K57c9Fh/OK+4vmGniIkDr4uHaahIIiIDwErgV8DffXdCfyH\nFtUlaSQOdhwfDAF4uePYINAraLRFcA1wUmbuacZJI+Jo4KvAAopAuTwz1zfj2FKl2XGsEWi0s/hJ\nYG8Tz3sT8P3MPBlYBGxv4rGl6jrYcTxpGkyeUWzfuMrWgAY1aIsgIv6m/vAXwH0R8S/A8wdfz8zP\nDfeEEXEU8CbgsvoxDgAHBvsdScMUAQH1/0iDGqpFMLP+swu4B+jot+/IEZ5zHlADvhYRj0TEVyNi\nxgiPJam//qOMX3jOUcZqyKAtgsz8FEBE/GVm3tH/tYj4y1GccwnwoczcEBE3AdcBf3fY8a8ErgR4\n/etfP8JTSRVjZ7FGoNE+go81uK8Ru4Hdmbmh/vxOimA4RGbenJndmdnd2en/wFJDs4vaWawRGKqP\n4M+BtwGvjYgv9ntpFvDCSE6YmU9HxJMRcVJm/jtwPrBtJMeSKqPR2UUPdhZv6ClaAn29dhZrSEPd\nPvorYBNwEbC53/5ngY+M4rwfAm6LiA6Kjui/GsWxpImt/3X/RgaJ9R9lPKPLENCQhuojeBR4NCK+\nlZm9zTppZm4Fupt1PGlCG8l1/2mdBoAa1uiAsi0RkYft20vRWvjvzRpoJmkAXvdXizXaWfy/gX8B\n3lv/+V8UIfA0sLollUkqDDS7qNf91USNtgiWZWb/O3sei4gtmbkkIt7XisIk9eN1f7VQo0EwOSLO\nzMyHASLiDGBy/bUR3T0kaZi87q8WaTQIrgBuiYgjKcas/wG4oj4i+H+0qjhJUus1FASZuRFYWJ8n\niMzsPwHdt1tRmKTD7K95aUgt0eh6BK8C/gLoAqZEFBNZZeanW1aZpJc1OqBMGoFG7xr6LvAOiv6A\n5/r9SGq1/gPKevc6kZyartE+gjmZeUFLK5E0MCeSU4s12iL4YUQsbGklkgbmgDK1WKNB8GfA5oj4\n94j4cUQ8FhE/bmVhkuocUKYWa/TS0J+3tApJg3NAmVqooRZBZj4BvA44r/74/zX6u5KaZFonHHuG\nIaCma+jLPCJWAv+NlxejmQp8s1VFSZLGTqP/qr+YYk2C5wAy81cU6xZLksa5RoPgQGYmkAAuNi9J\nE0ejQfDtiPifwNER8UFgHfCV1pUlSRorjc41dGNELKeYbO4k4PrMvKellUmSxkSjt49S/+L3y1+S\nJphBgyAinqXeL3D4S0Bm5qyWVCVJGjNDLV7vnUGSNME5KEySKs4gkKSKMwgkqeIMAkmqOINAkirO\nIJCkijMIJKniDAJJqjiDQJIqziCQpIozCCSp4gwCSao4g0CSKs4gkKSKMwgkqeJKC4KImBwRj0TE\nP5dVgySp3BbBNcD2Es8vSaKkIIiIOcDbga+WcX5J0svKahF8AfhboK+k80uS6sY8CCLiQuCZzNw8\nxPuujIhNEbGpVquNUXWSVD1ltAiWAhdFxE7gduC8iPjm4W/KzJszszszuzs7O8e6RkmqjDEPgsz8\nWGbOycwu4FLgB5n5vrGuox3VarBxY7GVpLHiOII2sWYNzJ0Ly5cX2zVryq5IUlVEZpZdw5C6u7tz\n06ZNZZfRMrVa8eW/b9/L+6ZPhyeeAK+KSRqpiNicmd1Dvc8WQRvYuRM6Og7dN3VqsV+SWs0gaANd\nXXDgwKH7enuL/ZLUagZBG+jshFWristBs2YV21WrvCwkaWxMKbsAFVasgGXListBXV2GgKSxYxC0\nkc5OA0DS2PPSkCRVnEEgSRVnEEhSxRkEklRxBoEkVZxBIEkVZxBIUsUZBJJUcQZBEzS6joDrDUhq\nRwbBKDW6joDrDUhqV65HMAqNriPgegOSyuB6BGOg0XUEXG9AUjszCEah0XUEXG9AUjszCEah0XUE\nXG9AUjuzj6AJarXG1hFo9H2S1AyN9hG4HkETNLqOgOsNSGpHXhqSpIqb0EHgAC5JGtqEDQIHcElS\nYyZkENRq0NNTDODau7fY9vTYMpCkgUzIIHAAlyQ1bkIGgQO4JKlxEzIIHMAlSY2bsOMIVqyAZcsc\nwCVJQ5mwQQAO4JKkRkzIS0OSpMYZBJJUcQaBJFWcQSBJFWcQSFLFGQSSVHEGgSRV3JgHQUS8LiL+\nNSK2RcRPI+Kasa5BkvSyMgaUvQB8NDO3RMRMYHNE3JOZ20qoRZIqb8xbBJn5VGZuqT9+FtgOvHas\n65AkFUrtI4iILmAxsKHMOiSpykoLgog4EvgH4K8z8w8DvH5lRGyKiE01V5SRpJYpJQgiYipFCNyW\nmd8Z6D2ZeXNmdmdmd6czx0lSy5Rx11AAq4Dtmfm5sT6/JOlQZbQIlgLvB86LiK31n7eVUIckiRJu\nH83MB4EY6/NKkgbmyGJJqjiDQJIqziCQpIozCCSp4gwCSao4g0CSKs4gkKSKMwgkqeImdBDUarBx\nY7GVJA1swgbBmjUwdy4sX15s16wpuyJJak8TMghqNejpgX37YO/eYtvTY8tAkgYyIYNg507o6Dh0\n39SpxX5J0qEmZBB0dcGBA4fu6+0t9kuSDjUhg6CzE1atgunTYdasYrtqVbFfknSoMZ+GeqysWAHL\nlhWXg7q6DAFJeiUTNgig+PI3ACRpcBPy0pAkqXEGgSRVnEEgSRVnEEhSxRkEklRxkZll1zCkiKgB\nT5RdxxCOA35TdhFtzM9ncH4+g/PzGdwrfT5zM3PIeyfHRRCMBxGxKTO7y66jXfn5DM7PZ3B+PoMb\n7efjpSFJqjiDQJIqziBonpvLLqDN+fkMzs9ncH4+gxvV52MfgSRVnC0CSao4g2AUIuJ1EfGvEbEt\nIn4aEdeUXVM7iojJEfFIRPxz2bW0m4g4OiLujIgdEbE9Is4uu6Z2EhEfqf/d+klErImIaWXXVLaI\nuCUinomIn/Tb9+qIuCciflbfHjOcYxoEo/MC8NHMPBU4C/gvEXFqyTW1o2uA7WUX0aZuAr6fmScD\ni/BzeklEvBb4MNCdmQuAycCl5VbVFlYDFxy27zrg3sycD9xbf94wg2AUMvOpzNxSf/wsxV/i15Zb\nVXuJiDnA24Gvll1Lu4mIo4A3AasAMvNAZv6+3KrazhRgekRMAY4AflVyPaXLzAeA3x62+x3A1+uP\nvw68czjHNAiaJCK6gMXAhnIraTtfAP4W6Cu7kDY0D6gBX6tfOvtqRMwou6h2kZm/BG4EdgFPAXsz\n8+5yq2pbx2fmU/XHTwPHD+eXDYImiIgjgX8A/joz/1B2Pe0iIi4EnsnMzWXX0qamAEuAv8/MxcBz\nDLNJP5HVr3O/gyIwXwPMiIj3lVtV+8viVtBh3Q5qEIxSREylCIHbMvM7ZdfTZpYCF0XETuB24LyI\n+Ga5JbWV3cDuzDzYiryTIhhUWAY8npm1zOwFvgP8ack1tatfR8RsgPr2meH8skEwChERFNd3t2fm\n58qup91k5scyc05mdlF08v0gM/0XXV1mPg08GREn1XedD2wrsaR2sws4KyKOqP9dOx8701/JXcAH\n6o8/AHx3OL9sEIzOUuD9FP/S3Vr/eVvZRWlc+RBwW0T8GHgD8JmS62kb9ZbSncAW4DGK76vKjzCO\niDXAeuCkiNgdET3ADcDyiPgZRUvqhmEd05HFklRttggkqeIMAkmqOINAkirOIJCkijMIJKniDAJp\nBOrTQQw6wWBErI6Idw+wvysi/lPrqpOGxyCQRiAzr8jMkQ7+6gIMArUNg0CVFhHXRsSH648/HxE/\nqD8+LyJui4j/GBHrI2JLRNxRn1eKiLgvIrrrj3si4v9ExMMR8ZWI+FK/U7wpIn4YEb/o1zq4ATin\nPgDxI2P4x5UGZBCo6v4NOKf+uBs4sj5/1DnAj4FPAMsycwmwCfib/r8cEa8B/o5iPYqlwMmHHX82\n8GfAhbw82vM64N8y8w2Z+fmm/4mkYZpSdgFSyTYDp0fELOB5iukMuimC4C7gVOChYqobOiiG9vd3\nJnB/Zv4WICLuAE7s9/o/ZWYfsC0ihjU1sDRWDAJVWmb2RsTjwGXADylaAecCfwI8DtyTmStGcYrn\n+z2OURxHahkvDUnF5aH/CjxQf/yfgUeAHwFLI+JPACJiRkSceNjvbgTeHBHH1FfR+osGzvcsMLNZ\nxUujZRBIxZf/bGB9Zv4a2E9xDb9G0VJYU58ddD2H9QHUV9H6DPAw8BCwE9g7xPl+DLwYEY/aWax2\n4Oyj0ihFxJGZ+X/rLYJ/BG7JzH8suy6pUbYIpNH7ZERsBX5C0a/wTyXXIw2LLQJJqjhbBJJUcQaB\nJFWcQSBJFWcQSFLFGQSSVHEGgSRV3P8HQKbJSbkdjwQAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "\n", "ax = df[df['label'] == 0].plot.scatter(x='weight', y='length', c='blue', label='young')\n", "ax = df[df['label'] == 1].plot.scatter(x='weight', y='length', c='orange', label='mid', ax=ax)\n", "ax = df[df['label'] == 2].plot.scatter(x='weight', y='length', c='red', label='adult', ax=ax)\n", "ax" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEKCAYAAAAfGVI8AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAHXtJREFUeJzt3X1wVfW97/H3lwQMDdAgRqXEknQM\noCRSILS1HLDyUB+gRC1lilLqNdhe51o91ovaVqGc3noZh7bqoGcuFQ9qER966AG9LRWhVEAuJiDy\nJFUraQwgpiAPRgIhfO8faycmIZCQnb1XyPq8Zpy992+vvdY3e8b94bcevsvcHRERia5OYRcgIiLh\nUhCIiEScgkBEJOIUBCIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiEsNu4CWOO+88zw7Ozvs\nMkREziobNmz4p7tnNrfcWREE2dnZlJSUhF2GiMhZxcz+0ZLltGtIRCTiFAQiIhGnIBARibiEHSMw\nsyeB8cBH7p4XGzsXeB7IBkqBSe7+cWvWX11dTXl5OVVVVW1TsACQlpZGVlYWnTt3DrsUEUmSRB4s\nXgDMBZ6uN3YfsMLdZ5vZfbHX97Zm5eXl5XTv3p3s7GzMLO5iBdydffv2UV5eTk5OTtjliEiSJGzX\nkLu/BuxvNFwIPBV7/hRwXWvXX1VVRa9evRQCbcjM6NWrl2ZZIhGT7GMEF7j7ntjzD4EL4lmZQqDt\n6TsVaQcqKqC4OHhMgtAOFntwj8xT3ifTzH5gZiVmVlKRpC9DRCR0ixZB374wdmzwuGhRwjeZ7CDY\na2a9AWKPH51qQXef5+4F7l6QmdnshXHtyqFDh8jKyuL222+vG3N3Ro0axaFDh0KsDI4dO8bIkSM5\nfvx4qHWISBMqKqCoCI4cgYMHg8eiooTPDJIdBEuB78eefx9YkuTtJ8UDDzzAyJEjG4z98Y9/ZNCg\nQfTo0SOkqgJdunRh9OjRPP/886HWISJNKC2FLl0ajnXuHIwnUMKCwMwWAeuA/mZWbmZFwGxgrJm9\nC4yJvU6attztVlxczGWXXUZVVRWVlZUMHDiQrVu3smHDBvbu3cs3v/nNBssvXLiQwsJCAGbMmMHD\nDz9c997PfvYzHnnkEdyd6dOnk5eXR35+ft2P9apVqxg/fnzd8rfffjsLFiwAgvYbM2fOZMiQIeTn\n57Njx47Y31rB2LFjGThwINOmTaNv377885//BOC6665j4cKF8X8JItK2srPh2LGGY9XVwXgCJfKs\nocnu3tvdO7t7lrvPd/d97j7a3XPdfYy7Nz6rKGHaerfbsGHDmDBhAvfffz/33HMPU6ZM4dJLL+Xu\nu+9mzpw5Jy2/du1ahg4dCsAtt9zC008HZ9WeOHGC5557jilTprB48WI2bdrEW2+9xauvvsr06dPZ\ns2fPSetq7LzzzmPjxo3cdtttddueNWsWo0aNYtu2bUycOJGysrK65fPy8iguLo7vCxCRtpeZCfPn\nQ9eu0KNH8Dh/fjCeQGdF07l41d/tduRIMFZUBGPGxPf9zpgxg2HDhpGWlsajjz7K448/zrXXXktW\nVtZJy+7fv5/u3bsDwb/ie/XqxZtvvsnevXsZPHgwvXr1Ys2aNUyePJmUlBQuuOACrrjiCoqLi5vd\nnXTDDTcAMHToUBYvXgzAmjVr+MMf/gDA1VdfTc+ePeuWT0lJoUuXLhw+fLiuJhFpJyZPDn6cSkuD\nmUASjpFGIghqd7vVhgB8ttstnu943759fPLJJ1RXV1NVVcW6detYvXo1jz/+OJ988gnHjh2jW7du\nzJ49m9TUVE6cOEGnTsEkbNq0aSxYsIAPP/yQW2655bTbqf1srcbn+Z9zzjlA8APf0oPAR48eJS0t\n7Uz+XBFJlszMpARArUj0GkrUbrcf/vCH/OIXv+Cmm27i3nvvZeHChZSVlVFaWsqcOXOYOnUqs2cH\nh0H69+/P+++/X/fZ66+/nmXLllFcXMxVV10FwIgRI3j++eepqamhoqKC1157ja985Sv07duX7du3\nc/ToUQ4cOMCKFSuarW348OG88MILALzyyit8/PFnnTz27dvHeeedpzYSIgJEZEZQu9utqCiYCVRX\nx7/b7emnn6Zz587ceOON1NTU8PWvf52VK1cyatSoJpcfN24cq1at4uKLLwaCs3euvPJKMjIySElJ\nAYJwWLduHYMGDcLMeOihh7jwwgsBmDRpEnl5eeTk5DB48OBm65s5cyaTJ0/mmWee4fLLL+fCCy+s\n2w30l7/8hXHjxrX+jxeRjsXd2/1/Q4cO9ca2b99+0lhzPvrI/Y03gsdk2717t48ZM6budU1NjQ8a\nNMjfeeedhGyvqqrKq6ur3d399ddf90GDBtW9d/311/vf/va3U362Nd+tiLQ/QIm34Dc2EjOCWkne\n7dZA7969ufXWWzl06BDl5eWMHz+e66+/ntzc3IRsr6ysjEmTJnHixAm6dOnCb3/7WyC4oOy6666j\nX79+CdmuiJx9LAiN9q2goMAb36ry7bff5pJLLgmpoo5N361Ix2BmG9y9oLnlInGwWERETk1BICIS\ncQoCEZGIUxCIiEScgqCNPfXUU+Tm5pKbm8tTTz1VN+6taEO9dOnSugvSGuvWrRsQNJe7+uqr4yta\nRCItUqePJtr+/fuZNWsWJSUlmBlDhw5lwoQJ9OzZs1VtqCdMmMCECRNOu0xmZia9e/dm7dq1DB8+\nPN4/QUQiKFozgqoK2FccPMapqTbUjz32GGPHjuXcc8+lZ8+ejB07lmXLlgEN21CXlpYyYMAAbr75\nZvr168dNN93Eq6++yvDhw8nNzeWNN94AYMGCBXU3t9m5cyeXX345+fn53H///Q1qUVtpEYlHdIKg\ndBEs6QsrxwaPpfH1oW6qDXXXrl256KKL6pbJyspi165dQMM21ADvvfced999Nzt27GDHjh08++yz\nrFmzhjlz5vDggw+etL0777yT2267jS1bttC7d+8G7xUUFLB69eq4/h4Ria5oBEFVBawvgpojUH0w\neFxfFPfMYMaMGSxfvpySkhLuueee0y5bvw01QE5ODvn5+XTq1ImBAwcyevRozIz8/HxKm7gb0dq1\na5k8eTIA3/ve9xq8d/7557N79+64/hYRia5oBEFlKXRqdPu3Tp2D8TjUtqE+fPgwVVVV9OnThw8+\n+KDu/fLycvr06QOc3Eq6tnU0QKdOneped+rU6ZStpM2syfGqqiq6du0a198iItEVjSBIz4YTjfpQ\nn6gOxuPQuA31VVddVdfy+eOPP+aVV16pazHduA31mRo+fDjPPfccwEnHA9555x3y8vJa/4eISKRF\nIwjSMuGr8yGlK3TuETx+dX4w3kr121Dfd999FBcXs2nTJh544AGGDRvGsGHDmDFjBueeey7wWRvq\n1nrkkUd47LHHyM/PrzvuUEttpUUkHtFqOldVEewOSs+OKwRaY8+ePUydOpXly5e3+bpHjhzJkiVL\nGtyOMh5qOifSMbS06Vy0riNIy0x6ANSq34b6TK4laE5FRQU//vGP2ywERCR6ohUEIZs0aVKbrzMz\nM5PrrruuzdcrItERjWMEIiJySgoCEZGIUxCIiEScgkBEJOIUBG3s6quvJiMjg/Hjx5/03sSJE8/o\norJVq1Y1uZ7GaltSl5aW8uyzz9aNb9myhZtvvrnF2xORaFIQtLHp06fzzDPPnDS+bds2ampq+NKX\nvpSwbTcOgvz8fMrLyykrK0vYNkXk7BetIKiogOLi4DFOTbWh3rp1K6NHj27QXK5W/TbUALfddhsF\nBQUMHDiQmTNn1o0vW7aMAQMGMGTIEBYvXlw3/vOf/5w5c+bUvc7LyzupOd19993H6tWr+fKXv8xv\nfvMbAL71rW/VtaYQEWlKdIJg0SLo2xfGjg0eF7V9G+rT9ftp3Ib6l7/8JSUlJWzevJm//vWvbN68\nmaqqKm699VZeeuklNmzYwIcffnhGNc2ePZsRI0awadMm7rrrLkAtqkWkedEIgooKKCqCI0fg4MHg\nsago7pnBmbSh3rNnD5mZn13V/MILLzBkyBAGDx7Mtm3b2L59Ozt27CAnJ4fc3FzMjClTpsRVH6hF\ntYg0LxpBUFoKXRq1oe7cORiPQ+M21KfTtWvXumV27tzJnDlzWLFiBZs3b2bcuHHNfr5xG+vmlq+/\nnFpUi8jpRCMIsrPhWKM21NXVwXgcGrehPp1LLrmE9957D4BDhw6Rnp7O5z//efbu3cuf/vQnAAYM\nGEBpaSl///vfAVhUb/dVdnY2GzduBGDjxo3s3LnzpG10796dw4cPNxhTi2oRaU40giAzE+bPh65d\noUeP4HH+/GC8lZpqQ71y5UpGjBjBd77zHVasWEFWVhZ//vOfgYZtqAcNGsTgwYMZMGAAN954Y91N\n59PS0pg3bx7jxo1jyJAhnH/++XXb+/a3v83+/fsZOHAgc+fOpV+/fifVdNlll5GSksKgQYPqDhar\nRbWINCdabagrKoLdQdnZcYVAaxw5coQrr7yStWvXkpKSkpRtHj16lCuuuII1a9aQmtry/oJqQy3S\nMagNdVMyM5MeALW6du3KrFmz2LVrF1/84heTss2ysjJmz559RiEgItGjX4gkqr1tZbLk5uaSm5ub\n1G2KyNknlGMEZnaXmW0zs61mtsjM0sKoQ0REQggCM+sD3AEUuHsekAJ8N9l1iIhIIKyzhlKBrmaW\nCnwO0BVPIiIhSXoQuPsuYA5QBuwBDrr7K42XM7MfmFmJmZVUtEFvIBERaVoYu4Z6AoVADvAFIN3M\nTuql4O7z3L3A3QsyQzrTpzVO14Ya4I477qhrG13r4Ycf5umnnwZg2rRp5OXl0b9/f1566SUAXn75\nZWbMmJHYwkUkssLYNTQG2OnuFe5eDSwGvh5CHQlxqjbUACUlJXz88ccNxo4fP86TTz7JjTfeCMAN\nN9zA1q1bWbp0aV3juHHjxvHSSy/x6aefJrZ4EYmkMIKgDPiamX3OzAwYDbydjA1XVlaya9cuKisr\n417XmbahrqmpYfr06Tz00EMNxleuXMmQIUPqzvW/9tprgeBisLS04GQqM+Mb3/gGL7/8ctx1i4g0\nlvTrCNx9vZn9HtgIHAfeBOYlertbtmxh6dKlpKSkUFNTQ2FhYVw9eOq3oT5y5Eizbajnzp3LhAkT\n6N27d4Pxxu2pAQ4ePMiUKVN48MEH68Zq20lPmjSp1TWLiDQllAvK3H0mMLPZBdtIZWUlS5cu5fjx\n4xw/fhyAJUuWkJOTQ3p6eqvXO2PGDIYNG0ZaWhqPPvroKZfbvXs3L774Yl2vofr27NlzUjuHWbNm\nMXHiRCZMmFA3pnbSIpIokWg6d+DAgZP6+6SkpHDgwIG41tvSNtRvvvkm7733HhdffDHZ2dl8+umn\nXHzxxUDD9tS1Nm/ezDXXXNNgTO2kRSRRItFiIiMjg5qamgZjNTU1ZGRkxLXe2jbUO3fu5N5772Xu\n3LlNLjdu3LgGdxvr1q1bXUvq+u2pa/30pz+tC4paaictIokSiRlBeno6hYWFpKamcs4555Camkph\nYWFcu4XOtA31qVxzzTW89tprDcaeffZZ9uzZ02BM7aRFJFEiMSOA4GbvOTk5HDhwgIyMjLhCAGDq\n1KlMnToVCHYzrV+/HoBRo0Y1+9lPPvmk7nnfvn3p1asX7777bl2DuCeeeKLB8nv37uXIkSPk5+fH\nVbOISFMiMSOolZ6eTp8+feIOgbY2e/bsk2YA9ZWVlfGrX/0qiRWJSJREZkbQnvXv35/+/fuf8v1h\nw4YlsRoRiZqzekZwNtxd7Wyj71Qkes7aIEhLS2Pfvn364WpD7s6+ffvqrmgWkWg4a3cNZWVlUV5e\njjqTtq20tDSysrLCLkNEkuisDYLOnTuTk5MTdhkiIme9s3bXkIiItA0FgYhIxCkIREQiTkEgIhJx\nCgIRkYhTEIiIRJyCQEQk4hQEIiIRpyAQEYk4BYGISMQpCEREIk5BICIScQoCEZGIUxCIiEScgkBE\nJOIUBCIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiFMQiIhEnIJApKOqqoB9xcGjyGmkhl2A\niCRA6SJYXwSdusCJY/DV+ZA9OeyqpJ3SjECko6mqCEKg5ghUHwwe1xdpZiCn1OIZgZmlABfU/4y7\nlyWiKBGJQ2VpMBOoOfLZWKfOwXhaZlhVSTvWoiAwsx8BM4G9wInYsAOXtWajZpYBPAHkxdZzi7uv\na826RKSR9Oxgd1B9J6qDcZEmtHRGcCfQ3933tdF2HwGWuftEM+sCfK6N1isiaZnBMYH1RcFM4ER1\n8FqzATmFlgbBB8DBttigmX0eGAncDODux4Bjp/uMiJyh7Mlw4Zhgd1B6tkJATuu0QWBmP449fR9Y\nZWb/Fzha+767/7oV28wBKoD/MLNBwAbgTnevbMW6RORU0jIVANIizZ011D32XxmwHOhSb6xbK7eZ\nCgwB/t3dBwOVwH2NFzKzH5hZiZmVVFTobAcRkUQ57YzA3WcBmNl33P3F+u+Z2Xdauc1yoNzd18de\n/54mgsDd5wHzAAoKCryV2xIRkWa09DqCn7RwrFnu/iHwgZn1jw2NBra3Zl0iIhK/5o4RXANcC/Qx\ns0frvdUDOB7Hdn8ELIydMfQ+8N/iWJeIiMShubOGdgMlwASCg7q1DgN3tXaj7r4JKGjt50VEpO00\nd4zgLeAtM3vW3auTVJOIiCRRS68j2GhmjQ/YHiSYLfyvNrzQTEREkqylQfAnoAZ4Nvb6uwRXA38I\nLAC+1eaViYhIUrQ0CMa4+5B6r7eY2UZ3H2JmUxJRmIiIJEdLTx9NMbOv1L4ws2FASuxlPGcPiYhI\nyFo6I5gGPGlm3QADDgHTzCwd+N+JKk5ERBKvRUHg7sVAfqxhHO5evwHdC4koTEREkqOl9yM4B/g2\nkA2kmhkA7v5vCatMRESSoqW7hpYQnC66gXrdR0VE5OzX0iDIcverE1qJiIiEoqVnDb1uZvkJrURE\nRELR0hnBvwA3m9lOgl1DBri7t+qexSIi0n60NAiuSWgVIiISmhbtGnL3fwAXAaNizz9t6WdFRKR9\na9GPuZnNBO7ls5vRdAZ+l6iiREQkeVr6r/rrCe5JUAng7rsJ7lssIiJnuZYGwTF3d8ABYq0lRESk\nA2hpELxgZv8HyDCzW4FXgd8mriwREUmWlvYammNmYwmazfUHZrj78oRWJiIiSdHS00eJ/fDrx19E\npIM5bRCY2WFixwUav0VwQVmPhFQlIiJJ09zN63VmkIhIB6eLwkREIk5BICIScQoCEZGIUxCIiESc\ngkBEJOIUBCIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiFMQiIhEnIJARCTiFAQiIhEXWhCY\nWYqZvWlmL4dVg4iIhDsjuBN4O8Tti4gIIQWBmWUB44Anwti+iIh8JqwZwcPAPcCJkLYvIiIxSQ8C\nMxsPfOTuG5pZ7gdmVmJmJRUVFUmqTkQkesKYEQwHJphZKfAcMMrMftd4IXef5+4F7l6QmZmZ7BpF\nRCIj6UHg7j9x9yx3zwa+C6x09ynJrkNERAK6jkBEJOJSw9y4u68CVoVZg4hI1GlGICIScQoCEZGI\nUxCIiEScgkBEJOIUBCIiEacgEBGJOAWBiEjEKQhERCJOQSAiEnEKAhGRiFMQiIhEnIJARCTiFAQi\nIhGnIBARiTgFgYhIxCkIREQiTkEgIhJxCgIRkYhTEIiIRJyCQEQk4hQEIiIRF4kgqKiA4uLgUURE\nGurwQbBoEfTtC2PHBo+LFoVdkYhI+9Khg6CiAoqK4MgROHgweCwq0sxARKS+Dh0EpaXQpUvDsc6d\ng3EREQl06CDIzoZjxxqOVVcH4yIiEujQQZCZCfPnQ9eu0KNH8Dh/fjAuIiKB1LALSLTJk2HMmGB3\nUHa2QkBEpLEOHwQQ/PgrAEREmtahdw2JiEjzFAQiIhGnIBARiTgFgYhIxCkIREQiTkEgIhJxCgIR\nkYhTEIiIRFzSg8DMLjKzv5jZdjPbZmZ3JrsGERH5TBhXFh8H7nb3jWbWHdhgZsvdfXsItYiIRF7S\nZwTuvsfdN8aeHwbeBvokuw4REQmEeozAzLKBwcD6Jt77gZmVmFlJhe4kIyKSMKEFgZl1A/4T+Fd3\nP9T4fXef5+4F7l6QqY5xIiIJE0oQmFlnghBY6O6Lw6hBREQCYZw1ZMB84G13/3Wyty8iIg2FMSMY\nDnwPGGVmm2L/XRtCHSIiQginj7r7GsCSvV0REWmariwWEYk4BYGISMQpCEREIk5BICIScQoCEZGI\nUxCIiEScgkBEJOIiEQSVlZXs2rWLysrKsEsREWl3wrgfQVJt2bKFpUuXkpKSQk1NDYWFheTl5YVd\nlohIu9GhZwSVlZUsXbqU48ePc/ToUY4fP86SJUs0MxARqadDB8GBAwdISUlpMJaSksKBAwdCqkhE\npP3p0EGQkZFBTU1Ng7GamhoyMjJCqkhEpP3p0EGQnp5OYWEhqampnHPOOaSmplJYWEh6enrYpYmI\ntBsd/mBxXl4eOTk5HDhwgIyMDIWAiEgjHT4IIJgZKABERJrWoXcNiYhI8xQEIiIRpyAQEYk4BYGI\nSMQpCEREIk5BICIScQoCEZGIM3cPu4ZmmVkF8I+w62iB84B/hl1EO6bv5/T0/ZyavpvTO9X309fd\nM5v78FkRBGcLMytx94Kw62iv9P2cnr6fU9N3c3rxfj/aNSQiEnEKAhGRiFMQtK15YRfQzun7OT19\nP6em7+b04vp+dIxARCTiNCMQEYk4BUEbMLOLzOwvZrbdzLaZ2Z1h19TemFmKmb1pZi+HXUt7Y2YZ\nZvZ7M9thZm+b2eVh19SemNldsf+vtprZIjNLC7umMJnZk2b2kZltrTd2rpktN7N3Y489z2SdCoK2\ncRy4290vBb4G/A8zuzTkmtqbO4G3wy6inXoEWObuA4BB6HuqY2Z9gDuAAnfPA1KA74ZbVegWAFc3\nGrsPWOHuucCK2OsWUxC0AXff4+4bY88PE/yP3CfcqtoPM8sCxgFPhF1Le2NmnwdGAvMB3P2Yux8I\nt6p2JxXoamapwOeA3SHXEyp3fw3Y32i4EHgq9vwp4LozWaeCoI2ZWTYwGFgfbiXtysPAPcCJsAtp\nh3KACuA/YrvOnjAz3U4vxt13AXOAMmAPcNDdXwm3qnbpAnffE3v+IXDBmXxYQdCGzKwb8J/Av7r7\nobDraQ/MbDzwkbtvCLuWdioVGAL8u7sPBio5w2l9Rxbb111IEJhfANLNbEq4VbVvHpwKekangyoI\n2oiZdSYIgYXuvjjsetqR4cAEMysFngNGmdnvwi2pXSkHyt29dgb5e4JgkMAYYKe7V7h7NbAY+HrI\nNbVHe82sN0Ds8aMz+bCCoA2YmRHs433b3X8ddj3tibv/xN2z3D2b4CDfSnfXv+hi3P1D4AMz6x8b\nGg1sD7Gk9qYM+JqZfS72/9lodDC9KUuB78eefx9YciYfVhC0jeHA9wj+tbsp9t+1YRclZ40fAQvN\nbDPwZeDBkOtpN2Izpd8DG4EtBL9Zkb7K2MwWAeuA/mZWbmZFwGxgrJm9SzCLmn1G69SVxSIi0aYZ\ngYhIxCkIREQiTkEgIhJxCgIRkYhTEIiIRJyCQKQVYq0gTttY0MwWmNnEJsazzezGxFUncmYUBCKt\n4O7T3L21F35lAwoCaTcUBBJpZjbdzO6IPf+Nma2MPR9lZgvN7Jtmts7MNprZi7F+UpjZKjMriD0v\nMrN3zOwNM/utmc2tt4mRZva6mb1fb3YwGxgRu/DwriT+uSJNUhBI1K0GRsSeFwDdYn2jRgCbgfuB\nMe4+BCgBflz/w2b2BeABgvtQDAcGNFp/b+BfgPF8drXnfcBqd/+yu/+mzf8ikTOUGnYBIiHbAAw1\nsx7AUYJWBgUEQbAUuBRYG7S5oQvBpf31fQX4q7vvBzCzF4F+9d7/L3c/AWw3szNqDSySLAoCiTR3\nrzazncDNwOsEs4ArgYuBncByd58cxyaO1ntucaxHJGG0a0gk2D30P4HXYs//O/Am8P+A4WZ2MYCZ\npZtZv0afLQauMLOesTtofbsF2zsMdG+r4kXipSAQCX78ewPr3H0vUEWwD7+CYKawKNYZdB2NjgHE\n7qD1IPAGsBYoBQ42s73NQI2ZvaWDxdIeqPuoSJzMrJu7fxKbEfwBeNLd/xB2XSItpRmBSPx+bmab\ngK0ExxX+K+R6RM6IZgQiIhGnGYGISMQpCEREIk5BICIScQoCEZGIUxCIiEScgkBEJOL+P3vS2BJ7\neG9SAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df2 = pd.DataFrame([df.iloc[0], df.iloc[1], df.iloc[4]], columns=['weight', 'length', 'label'])\n", "df3 = pd.DataFrame([df.iloc[14]], columns=['weight', 'length', 'label'])\n", "\n", "ax = df2[df2['label'] == 0].plot.scatter(x='weight', y='length', c='blue', label='x4(young)')\n", "ax = df2[df2['label'] == 1].plot.scatter(x='weight', y='length', c='orange', label='x0(mid)', ax=ax)\n", "ax = df2[df2['label'] == 2].plot.scatter(x='weight', y='length', c='red', label='x1(adult)', ax=ax)\n", "ax = df3.plot.scatter(x='weight', y='length', c='gray', label='x14(?)', ax=ax)\n", "ax" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def euclidean_distance(x, y): \n", " return np.sqrt(np.sum((x - y) ** 2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$\\sqrt{\\sum^n_{i=1} (x_i - y_i)^2}$" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x0: [6.6 6.2] \n", " x1: [9.7 9.9] \n", " x4: [1.3 2.7] \n", "x14: [1.3 1.3]\n" ] } ], "source": [ "x0 = X[0][:-1]\n", "x1 = X[1][:-1]\n", "x4 = X[4][:-1]\n", "x14 = X[14][:-1]\n", "print(\" x0:\", x0, \"\\n x1:\", x1, \"\\n x4:\", x4, \"\\nx14:\", x14)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x14 and x0: 7.218032973047436 \n", " x14 and x1: 12.021647141718974 \n", " x14 and x4: 1.4000000000000001\n" ] } ], "source": [ "print(\" x14 and x0:\", euclidean_distance(x14, x0), \"\\n\",\n", " \"x14 and x1:\", euclidean_distance(x14, x1), \"\\n\",\n", " \"x14 and x4:\", euclidean_distance(x14, x4))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def cosine_similarity(x, y):\n", " return np.dot(x, y) / (np.sqrt(np.dot(x, x)) * np.sqrt(np.dot(y, y)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$\\frac{x \\bullet y}{ \\sqrt{x \\bullet x} \\sqrt{y \\bullet y}}$" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " x14 and x0: 0.9995120760870786 \n", " x14 and x1: 0.9999479424242859 \n", " x14 and x4: 0.9438583563660174\n" ] } ], "source": [ "print(\" x14 and x0:\", cosine_similarity(x14, x0), \"\\n\",\n", " \"x14 and x1:\", cosine_similarity(x14, x1), \"\\n\",\n", " \"x14 and x4:\", cosine_similarity(x14, x4))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "While cosine looks at the angle between vectors (thus not taking into regard their weight or magnitude), euclidean distance is similar to using a ruler to actually measure the distance." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Cosine Similarity in Action" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import wikipedia\n", "\n", "q1 = wikipedia.page('Machine Learning')\n", "q2 = wikipedia.page('Artifical Intelligence')\n", "q3 = wikipedia.page('Soccer')\n", "q4 = wikipedia.page('Tennis')" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from sklearn.feature_extraction.text import CountVectorizer\n", "\n", "cv = CountVectorizer()\n", "X = np.array(cv.fit_transform([q1.content, q2.content, q3.content, q4.content]).todense())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ML \t 4048 \n", "AI \t 13742 \n", "soccer \t 6470 \n", "tennis \t 9736\n" ] } ], "source": [ "print(\"ML \\t\", len(q1.content.split()), \"\\n\"\n", " \"AI \\t\", len(q2.content.split()), \"\\n\"\n", " \"soccer \\t\", len(q3.content.split()), \"\\n\"\n", " \"tennis \\t\", len(q4.content.split()))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Machine learning is a field of computer science that often uses statistical techniques to give compu'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q1.content[:100]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Machine',\n", " 'learning',\n", " 'is',\n", " 'a',\n", " 'field',\n", " 'of',\n", " 'computer',\n", " 'science',\n", " 'that',\n", " 'often']" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q1.content.split()[:10]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],\n", " dtype=int64)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[0][:20]" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(5484,)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[0].shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ML - AI \t 846.53411035823 \n", "ML - soccer \t 479.75827246645787 \n", "ML - tennis \t 789.7069076562519\n" ] } ], "source": [ "print(\"ML - AI \\t\", euclidean_distance(X[0], X[1]), \"\\n\"\n", " \"ML - soccer \\t\", euclidean_distance(X[0], X[2]), \"\\n\"\n", " \"ML - tennis \\t\", euclidean_distance(X[0], X[3]))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ML - AI \t 0.8887965704386804 \n", "ML - soccer \t 0.7839297821715802 \n", "ML - tennis \t 0.7935675914311315\n" ] } ], "source": [ "print(\"ML - AI \\t\", cosine_similarity(X[0], X[1]), \"\\n\"\n", " \"ML - soccer \\t\", cosine_similarity(X[0], X[2]), \"\\n\"\n", " \"ML - tennis \\t\", cosine_similarity(X[0], X[3]))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def l1_normalize(v):\n", " norm = np.sum(v)\n", " return v / norm\n", "\n", "def l2_normalize(v):\n", " norm = np.sqrt(np.sum(np.square(v)))\n", " return v / norm" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ML - AI \t 0.9556356337470292 \n", "ML - soccer \t 0.9291904899197152 \n", "ML - tennis \t 0.9314819689984162\n" ] } ], "source": [ "print(\"ML - AI \\t\", 1 - euclidean_distance(l1_normalize(X[0]), l1_normalize(X[1])), \"\\n\"\n", " \"ML - soccer \\t\", 1 - euclidean_distance(l1_normalize(X[0]), l1_normalize(X[2])), \"\\n\"\n", " \"ML - tennis \\t\", 1 - euclidean_distance(l1_normalize(X[0]), l1_normalize(X[3])))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ML - AI \t 0.5283996828641448 \n", "ML - soccer \t 0.3426261066509869 \n", "ML - tennis \t 0.3574544240773757\n" ] } ], "source": [ "print(\"ML - AI \\t\", 1 - euclidean_distance(l2_normalize(X[0]), l2_normalize(X[1])), \"\\n\"\n", " \"ML - soccer \\t\", 1 - euclidean_distance(l2_normalize(X[0]), l2_normalize(X[2])), \"\\n\"\n", " \"ML - tennis \\t\", 1 - euclidean_distance(l2_normalize(X[0]), l2_normalize(X[3])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Categorize a Tweet" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": true }, "outputs": [], "source": [ "ml_tweet = \"New research release: overcoming many of Reinforcement Learning's limitations with Evolution Strategies.\"\n", "x = np.array(cv.transform([ml_tweet]).todense())[0]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t 373.09114167988497 \n", "tweet - AI \t 1160.7269274036853 \n", "tweet - soccer \t 712.600168397398 \n", "tweet - tennis \t 1052.5796881946753\n" ] } ], "source": [ "print(\"tweet - ML \\t\", euclidean_distance(x, X[0]), \"\\n\"\n", " \"tweet - AI \\t\", euclidean_distance(x, X[1]), \"\\n\"\n", " \"tweet - soccer \\t\", euclidean_distance(x, X[2]), \"\\n\"\n", " \"tweet - tennis \\t\", euclidean_distance(x, X[3]))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t 0.2613347291026786 \n", "tweet - AI \t 0.19333084671126158 \n", "tweet - soccer \t 0.1197543563241326 \n", "tweet - tennis \t 0.11622680287651725\n" ] } ], "source": [ "print(\"tweet - ML \\t\", cosine_similarity(x, X[0]), \"\\n\"\n", " \"tweet - AI \\t\", cosine_similarity(x, X[1]), \"\\n\"\n", " \"tweet - soccer \\t\", cosine_similarity(x, X[2]), \"\\n\"\n", " \"tweet - tennis \\t\", cosine_similarity(x, X[3]))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t -0.2154548703241279 \n", "tweet - AI \t -0.2701725499228351 \n", "tweet - soccer \t -0.32683506410998 \n", "tweet - tennis \t -0.3294910282687\n" ] } ], "source": [ "print(\"tweet - ML \\t\", 1 - euclidean_distance(l2_normalize(x), l2_normalize(X[0])), \"\\n\"\n", " \"tweet - AI \\t\", 1 - euclidean_distance(l2_normalize(x), l2_normalize(X[1])), \"\\n\"\n", " \"tweet - soccer \\t\", 1 - euclidean_distance(l2_normalize(x), l2_normalize(X[2])), \"\\n\"\n", " \"tweet - tennis \\t\", 1 - euclidean_distance(l2_normalize(x), l2_normalize(X[3])))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": true }, "outputs": [], "source": [ "so_tweet = \"#LegendsDownUnder The Reds are out for the warm up at the @nibStadium. Not long now until kick-off in Perth.\"\n", "x2 = np.array(cv.transform([so_tweet]).todense())[0]" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t 371.8669116767449 \n", "tweet - AI \t 1159.1397672412072 \n", "tweet - soccer \t 710.1035135809426 \n", "tweet - tennis \t 1050.1485609188826\n" ] } ], "source": [ "print(\"tweet - ML \\t\", euclidean_distance(x2, X[0]), \"\\n\"\n", " \"tweet - AI \\t\", euclidean_distance(x2, X[1]), \"\\n\"\n", " \"tweet - soccer \\t\", euclidean_distance(x2, X[2]), \"\\n\"\n", " \"tweet - tennis \\t\", euclidean_distance(x2, X[3]))" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t 0.4396242958582417 \n", "tweet - AI \t 0.46942065152331963 \n", "tweet - soccer \t 0.6136116162795926 \n", "tweet - tennis \t 0.5971160690477066\n" ] } ], "source": [ "print(\"tweet - ML \\t\", cosine_similarity(x2, X[0]), \"\\n\"\n", " \"tweet - AI \\t\", cosine_similarity(x2, X[1]), \"\\n\"\n", " \"tweet - soccer \\t\", cosine_similarity(x2, X[2]), \"\\n\"\n", " \"tweet - tennis \\t\", cosine_similarity(x2, X[3]))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "tweet - ML \t -0.0586554719470902 \n", "tweet - AI \t -0.030125573390623384 \n", "tweet - soccer \t 0.12092277504145588 \n", "tweet - tennis \t 0.10235426703816686\n" ] } ], "source": [ "print(\"tweet - ML \\t\", 1 - euclidean_distance(l2_normalize(x2), l2_normalize(X[0])), \"\\n\"\n", " \"tweet - AI \\t\", 1 - euclidean_distance(l2_normalize(x2), l2_normalize(X[1])), \"\\n\"\n", " \"tweet - soccer \\t\", 1 - euclidean_distance(l2_normalize(x2), l2_normalize(X[2])), \"\\n\"\n", " \"tweet - tennis \\t\", 1 - euclidean_distance(l2_normalize(x2), l2_normalize(X[3])))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }