{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## K 최근접 이웃 알고리즘 (K-Nearest Neighbor)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.37454012, 0.95071431],\n", " [0.73199394, 0.59865848],\n", " [0.15601864, 0.15599452],\n", " [0.05808361, 0.86617615],\n", " [0.60111501, 0.70807258],\n", " [0.02058449, 0.96990985],\n", " [0.83244264, 0.21233911],\n", " [0.18182497, 0.18340451],\n", " [0.30424224, 0.52475643],\n", " [0.43194502, 0.29122914]])" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Uniform distribution을 이용해서 10 * 2 배열을 임의로 생성한다.\n", "random_generator = np.random.RandomState(42)\n", "dataset = random_generator.rand(10, 2)\n", "dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAD7ZJREFUeJzt3VGInNd5h/Hn9VputsSJSrSBaqVYLsgiwimoLMbFF3ZwWsm+kEQwwQLTphgb0jq9sFmQSEmDc5HgpQQKbhPRhrSB2HGC2IhUYS9ih5QQBa/ZxKpkFraKE+9uwRvH65tsYll9e7Ejdbya1Xwrzew3c+b5gWC+M8czrw8zf2bPOTMnMhNJUlluqLsASVLnGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAt1Y1xNv27Ytd+3aVdfTS1Jfeumll36VmSPt+tUW7rt27WJ6erqup5ekvhQRv6jSz2kZSSpQ23CPiK9GxOsR8V/r3B8R8Y8RMRcRL0fEn3S+TEnSRlT55P414MBV7r8P2N349yjwz9dfliTperQN98z8IfDrq3Q5BPx7rjoNbI2IP+xUgZKkjevEnPso8FrT9Xyj7QoR8WhETEfE9NLSUgeeWpLUSifCPVq0tTwBJDOPZ+ZYZo6NjLTdySNJukadCPd5YGfT9Q5gsQOPK0m6Rp0I95PAXzR2zdwJvJWZ/9OBx5UkXaO2X2KKiGeAe4BtETEP/D2wBSAzvwycAu4H5oDfAH/VrWLXmpxZYGJqlsXlFbZvHWZ8/x4O72s53S9JA6VtuGfmkTb3J/A3HauoosmZBY6dOMPKhYsALCyvcOzEGQADXtLA69tvqE5MzV4O9ktWLlxkYmq2pookqXf0bbgvLq9sqF2SBknfhvv2rcMbapekQdK34T6+fw/DW4be1Ta8ZYjx/Xtqqkh1mJxZ4K4vPs+tR/+Du774PJMzC3WXJPWE2n7y93pdWjR1t8zgclFdWl/fhjusvoF9Ew+uqy2q+7rQoOvbaRnJRXVpfYa7+paL6tL6DHf1LRfVpfX19Zy7BpuL6tL6DHf1NRfVpdaclpGkAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQbuh8MmZxb8FUFJxRuocPfMTUmDYqCmZa525qYklWSgwt0zNyUNioEKd8/clDQoBircPXNT0qAYqAVVz9yUNCgGKtzBMzclDYaBmpaRpEFhuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCVQr3iDgQEbMRMRcRR1vc/6GIeCEiZiLi5Yi4v/OlSpKqahvuETEEPA3cB+wFjkTE3jXd/g54LjP3AQ8C/9TpQiVJ1VX55H4HMJeZ5zPzbeBZ4NCaPgm8r3H7/cBi50qUJG1UlXAfBV5rup5vtDX7HPBQRMwDp4BPt3qgiHg0IqYjYnppaekaypUkVVEl3KNFW665PgJ8LTN3APcDX4+IKx47M49n5lhmjo2MjGy8WklSJVXCfR7Y2XS9gyunXR4GngPIzB8D7wG2daJASdLGVQn3F4HdEXFrRNzE6oLpyTV9fgncCxARH2Y13J13kaSatA33zHwHeAyYAl5hdVfM2Yh4MiIONro9ATwSET8DngE+mZlrp24kSZuk0u+5Z+YpVhdKm9s+23T7HHBXZ0uTJF0rv6EqSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoEq/CilpMEzOLDAxNcvi8grbtw4zvn8Ph/etPVVT/cBwlwSsBvuxE2dYuXARgIXlFY6dOANgwPchp2UkATAxNXs52C9ZuXCRianZmirS9TDcJQGwuLyyoXb1NsNdEgDbtw5vqF29zXCXBMD4/j0Mbxl6V9vwliHG9++pqSJdDxdUJQH/v2jqbpkyGO6SLju8b9QwL4TTMpJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklSgSuEeEQciYjYi5iLi6Dp9PhER5yLibER8o7NlSpI2ou0xexExBDwN/BkwD7wYEScz81xTn93AMeCuzHwzIj7YrYIlaT2TMwueAdtQ5ZP7HcBcZp7PzLeBZ4FDa/o8AjydmW8CZObrnS1Tkq5ucmaBYyfOsLC8QgILyyscO3GGyZmFukurRZVwHwVea7qeb7Q1uw24LSJ+FBGnI+JAqweKiEcjYjoippeWlq6tYklqYWJqlpULF9/VtnLhIhNTszVVVK8q4R4t2nLN9Y3AbuAe4AjwLxGx9Yr/KPN4Zo5l5tjIyMhGa5WkdS0ur2yovXRVwn0e2Nl0vQNYbNHnO5l5ITN/DsyyGvaStCm2bx3eUHvpqoT7i8DuiLg1Im4CHgROrukzCXwUICK2sTpNc76ThUrS1Yzv38PwlqF3tQ1vGWJ8/56aKqpX290ymflORDwGTAFDwFcz82xEPAlMZ+bJxn1/HhHngIvAeGa+0c3CJanZpV0x7pZZFZlrp883x9jYWE5PT9fy3JLUryLipcwca9fPb6hKUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KB2v78gHStPDhBqo/hrq64dHDCpd/XvnRwAmDAS5vAaRl1hQcnSPUy3NUVHpwg1ctwV1d4cIJUL8NdXeHBCVK9XFBVV3hwglQvw11dc3jfqGEu1cRpGUkqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCVQr3iDgQEbMRMRcRR6/S74GIyIgY61yJkqSNahvuETEEPA3cB+wFjkTE3hb9bgb+FvhJp4uUJG1MlU/udwBzmXk+M98GngUOtej3eeAp4LcdrE+SdA2qhPso8FrT9Xyj7bKI2AfszMzvXu2BIuLRiJiOiOmlpaUNFytJqqZKuEeLtrx8Z8QNwJeAJ9o9UGYez8yxzBwbGRmpXqUkaUOqhPs8sLPpegew2HR9M3A78IOIeBW4Ezjpoqok1adKuL8I7I6IWyPiJuBB4OSlOzPzrczclpm7MnMXcBo4mJnTXalYktRW23DPzHeAx4Ap4BXgucw8GxFPRsTBbhcoSdq4G6t0ysxTwKk1bZ9dp+8911+WJOl6+A1VSSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKlClk5ikQTI5s8DE1CyLyyts3zrM+P49HN43WndZ0oYY7lKTyZkFjp04w8qFiwAsLK9w7MQZAANefcVpGanJxNTs5WC/ZOXCRSamZmuqSLo2hrvUZHF5ZUPtUq8y3KUm27cOb6hd6lWGu9RkfP8ehrcMvatteMsQ4/v31FSRdG1cUJWaXFo0dbeM+p3hLq1xeN+oYa6+57SMJBXIcJekAhnuklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAL58wOS1CV1nupluEtSF9R9qpfTMpLUBXWf6lUp3CPiQETMRsRcRBxtcf/jEXEuIl6OiO9HxC2dL1WS+kfdp3q1DfeIGAKeBu4D9gJHImLvmm4zwFhm/jHwbeCpThcqSf2k7lO9qnxyvwOYy8zzmfk28CxwqLlDZr6Qmb9pXJ4GdnS2TEnqL3Wf6lUl3EeB15qu5xtt63kY+F6rOyLi0YiYjojppaWl6lVKUp85vG+UL3z8I4xuHSaA0a3DfOHjH+mp3TLRoi1bdox4CBgD7m51f2YeB44DjI2NtXwMtVbnlipJ16bOU72qhPs8sLPpegewuLZTRHwM+Axwd2b+rjPlCerfUiWp/1SZlnkR2B0Rt0bETcCDwMnmDhGxD/gKcDAzX+98mYOt7i1VkvpP23DPzHeAx4Ap4BXgucw8GxFPRsTBRrcJ4L3AtyLipxFxcp2H0zWoe0uVpP5T6RuqmXkKOLWm7bNNtz/W4brUZPvWYRZaBPlmbamS1H/8hmofqHtLlaT+42/L9IFLi6bulpFUleFes6pbHOvcUiWp/xjuNXKLo6Rucc69Rm5xlNQthnuN3OIoqVsM9xrV/atxkspluNfILY6SusUF1Rq5xVFStxjuNXOLo6RucFpGkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVKDIzHqeOGIJ+EXF7tuAX3WxnBI4RtU4Tu05Ru3VOUa3ZOZIu061hftGRMR0Zo7VXUcvc4yqcZzac4za64cxclpGkgpkuEtSgfol3I/XXUAfcIyqcZzac4za6/kx6os5d0nSxvTLJ3dJ0gb0VLhHxIGImI2IuYg42uL+34uIbzbu/0lE7Nr8KutVYYwej4hzEfFyRHw/Im6po846tRujpn4PRERGRE/veuiWKuMUEZ9ovJ7ORsQ3NrvGulV4v30oIl6IiJnGe+7+OupsKTN74h8wBPw38EfATcDPgL1r+vw18OXG7QeBb9Zddw+O0UeB32/c/pRjdOUYNfrdDPwQOA2M1V13L44TsBuYAf6gcf3BuuvuwTE6DnyqcXsv8GrddV/610uf3O8A5jLzfGa+DTwLHFrT5xDwb43b3wbujYjYxBrr1naMMvOFzPxN4/I0sGOTa6xbldcRwOeBp4DfbmZxPaTKOD0CPJ2ZbwJk5uubXGPdqoxRAu9r3H4/sLiJ9V1VL4X7KPBa0/V8o61ln8x8B3gL+MCmVNcbqoxRs4eB73W1ot7TdowiYh+wMzO/u5mF9Zgqr6XbgNsi4kcRcToiDmxadb2hyhh9DngoIuaBU8CnN6e09nrpDNVWn8DXbuWp0qdklf//I+IhYAy4u6sV9Z6rjlFE3AB8CfjkZhXUo6q8lm5kdWrmHlb/AvzPiLg9M5e7XFuvqDJGR4CvZeY/RMSfAl9vjNH/dr+8q+ulT+7zwM6m6x1c+SfO5T4RcSOrfwb9elOq6w1VxoiI+BjwGeBgZv5uk2rrFe3G6GbgduAHEfEqcCdwcgAXVau+376TmRcy8+fALKthPyiqjNHDwHMAmflj4D2s/u5M7Xop3F8EdkfErRFxE6sLpifX9DkJ/GXj9gPA89lYyRgQbceoMeXwFVaDfdDmSKHNGGXmW5m5LTN3ZeYuVtclDmbmdD3l1qbK+22S1QV6ImIbq9M05ze1ynpVGaNfAvcCRMSHWQ33pU2tch09E+6NOfTHgCngFeC5zDwbEU9GxMFGt38FPhARc8DjwLrb3EpUcYwmgPcC34qIn0bE2hdj0SqO0cCrOE5TwBsRcQ54ARjPzDfqqXjzVRyjJ4BHIuJnwDPAJ3vlA6ffUJWkAvXMJ3dJUucY7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFej/APFg4ebEUZbEAAAAAElFTkSuQmCC\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Plot (아주 간단하게 plot)\n", "x = dataset[:, 0]\n", "y = dataset[:, 1]\n", "plt.scatter(x, y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "두 점 $v_1 = (x_1, y_1), v_2 = (x_2, y_2)$가 주어졌을 때, 두 점 사이의 거리 $\\text{dist}(v_1, v_2)=\\sqrt{(x_1-x_2)^2+(y_1-y_2)^2}$이다.\n", "\n", "dataset은 $\\{v_1, v_2, ..., v_10\\}$이고, 가까운 데이터와의 거리를 재기 위해서,\n", " - $v_1$에 대하여, $\\text{dist}(v_1, v_2), \\text{dist}(v_1, v_3), ..., \\text{dist}(v_1, v_{10})$\n", " - $v_2$에 대하여, $\\text{dist}(v_2, v_1), \\text{dist}(v_2, v_3), ..., \\text{dist}(v_2, v_{10})$\n", " - $v_{10}$에 대하여, $\\text{dist}(v_{10}, v_1), \\text{dist}(v_{10}, v_2), ..., \\text{dist}(v_{10}, v_1)$\n", " \n", "를 각각 구해야 한다.\n", "\n", "이를 계산하기 위해서 broadcasting 기능을 활용한다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Step by step" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.1. Vector간의 difference" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[[0.37454012, 0.95071431]],\n", "\n", " [[0.73199394, 0.59865848]],\n", "\n", " [[0.15601864, 0.15599452]],\n", "\n", " [[0.05808361, 0.86617615]],\n", "\n", " [[0.60111501, 0.70807258]],\n", "\n", " [[0.02058449, 0.96990985]],\n", "\n", " [[0.83244264, 0.21233911]],\n", "\n", " [[0.18182497, 0.18340451]],\n", "\n", " [[0.30424224, 0.52475643]],\n", "\n", " [[0.43194502, 0.29122914]]])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# dataset[: np.newaxis, :] 도 사용가능\n", "row = dataset.reshape(10, 1, 2)\n", "row" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[[0.37454012, 0.95071431],\n", " [0.73199394, 0.59865848],\n", " [0.15601864, 0.15599452],\n", " [0.05808361, 0.86617615],\n", " [0.60111501, 0.70807258],\n", " [0.02058449, 0.96990985],\n", " [0.83244264, 0.21233911],\n", " [0.18182497, 0.18340451],\n", " [0.30424224, 0.52475643],\n", " [0.43194502, 0.29122914]]])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# dataset[np.newaxis, :, :] 도 사용가능\n", "other_rows = dataset.reshape(1, 10, 2)\n", "other_rows" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Shape1 (10, 1, 2)\n", "Shape2 (1, 10, 2)\n" ] } ], "source": [ "print('Shape1', row.shape)\n", "print('Shape2', other_rows.shape)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[[ 0. , 0. ],\n", " [-0.35745382, 0.35205582],\n", " [ 0.21852148, 0.79471979],\n", " [ 0.31645651, 0.08453816],\n", " [-0.22657489, 0.24264173],\n", " [ 0.35395562, -0.01919555],\n", " [-0.45790252, 0.7383752 ],\n", " [ 0.19271515, 0.7673098 ],\n", " [ 0.07029788, 0.42595787],\n", " [-0.0574049 , 0.65948517]],\n", "\n", " [[ 0.35745382, -0.35205582],\n", " [ 0. , 0. ],\n", " [ 0.5759753 , 0.44266396],\n", " [ 0.67391033, -0.26751766],\n", " [ 0.13087893, -0.10941409],\n", " [ 0.71140945, -0.37125137],\n", " [-0.1004487 , 0.38631937],\n", " [ 0.55016897, 0.41525397],\n", " [ 0.4277517 , 0.07390205],\n", " [ 0.30004892, 0.30742934]],\n", "\n", " [[-0.21852148, -0.79471979],\n", " [-0.5759753 , -0.44266396],\n", " [ 0. , 0. ],\n", " [ 0.09793503, -0.71018163],\n", " [-0.44509637, -0.55207806],\n", " [ 0.13543415, -0.81391533],\n", " [-0.676424 , -0.05634459],\n", " [-0.02580633, -0.02740999],\n", " [-0.1482236 , -0.36876191],\n", " [-0.27592638, -0.13523462]],\n", "\n", " [[-0.31645651, -0.08453816],\n", " [-0.67391033, 0.26751766],\n", " [-0.09793503, 0.71018163],\n", " [ 0. , 0. ],\n", " [-0.5430314 , 0.15810357],\n", " [ 0.03749912, -0.10373371],\n", " [-0.77435903, 0.65383704],\n", " [-0.12374136, 0.68277164],\n", " [-0.24615863, 0.34141971],\n", " [-0.37386141, 0.57494701]],\n", "\n", " [[ 0.22657489, -0.24264173],\n", " [-0.13087893, 0.10941409],\n", " [ 0.44509637, 0.55207806],\n", " [ 0.5430314 , -0.15810357],\n", " [ 0. , 0. ],\n", " [ 0.58053052, -0.26183727],\n", " [-0.23132763, 0.49573347],\n", " [ 0.41929004, 0.52466807],\n", " [ 0.29687277, 0.18331615],\n", " [ 0.16916999, 0.41684344]],\n", "\n", " [[-0.35395562, 0.01919555],\n", " [-0.71140945, 0.37125137],\n", " [-0.13543415, 0.81391533],\n", " [-0.03749912, 0.10373371],\n", " [-0.58053052, 0.26183727],\n", " [ 0. , 0. ],\n", " [-0.81185815, 0.75757074],\n", " [-0.16124047, 0.78650534],\n", " [-0.28365775, 0.44515342],\n", " [-0.41136052, 0.67868071]],\n", "\n", " [[ 0.45790252, -0.7383752 ],\n", " [ 0.1004487 , -0.38631937],\n", " [ 0.676424 , 0.05634459],\n", " [ 0.77435903, -0.65383704],\n", " [ 0.23132763, -0.49573347],\n", " [ 0.81185815, -0.75757074],\n", " [ 0. , 0. ],\n", " [ 0.65061767, 0.0289346 ],\n", " [ 0.5282004 , -0.31241732],\n", " [ 0.40049762, -0.07889003]],\n", "\n", " [[-0.19271515, -0.7673098 ],\n", " [-0.55016897, -0.41525397],\n", " [ 0.02580633, 0.02740999],\n", " [ 0.12374136, -0.68277164],\n", " [-0.41929004, -0.52466807],\n", " [ 0.16124047, -0.78650534],\n", " [-0.65061767, -0.0289346 ],\n", " [ 0. , 0. ],\n", " [-0.12241728, -0.34135192],\n", " [-0.25012005, -0.10782463]],\n", "\n", " [[-0.07029788, -0.42595787],\n", " [-0.4277517 , -0.07390205],\n", " [ 0.1482236 , 0.36876191],\n", " [ 0.24615863, -0.34141971],\n", " [-0.29687277, -0.18331615],\n", " [ 0.28365775, -0.44515342],\n", " [-0.5282004 , 0.31241732],\n", " [ 0.12241728, 0.34135192],\n", " [ 0. , 0. ],\n", " [-0.12770278, 0.23352729]],\n", "\n", " [[ 0.0574049 , -0.65948517],\n", " [-0.30004892, -0.30742934],\n", " [ 0.27592638, 0.13523462],\n", " [ 0.37386141, -0.57494701],\n", " [-0.16916999, -0.41684344],\n", " [ 0.41136052, -0.67868071],\n", " [-0.40049762, 0.07889003],\n", " [ 0.25012005, 0.10782463],\n", " [ 0.12770278, -0.23352729],\n", " [ 0. , 0. ]]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "differences = row - other_rows\n", "differences" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(10, 10, 2)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "differences.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.2. Differences의 제곱" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "sq_differences = differences ** 2" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[[0.00000000e+00, 0.00000000e+00],\n", " [1.27773236e-01, 1.23943302e-01],\n", " [4.77516365e-02, 6.31579538e-01],\n", " [1.00144721e-01, 7.14670060e-03],\n", " [5.13361821e-02, 5.88750085e-02],\n", " [1.25284584e-01, 3.68468977e-04],\n", " [2.09674720e-01, 5.45197930e-01],\n", " [3.71391297e-02, 5.88764324e-01],\n", " [4.94179135e-03, 1.81440111e-01],\n", " [3.29532252e-03, 4.34920684e-01]],\n", "\n", " [[1.27773236e-01, 1.23943302e-01],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [3.31747548e-01, 1.95951385e-01],\n", " [4.54155132e-01, 7.15656993e-02],\n", " [1.71292943e-02, 1.19714439e-02],\n", " [5.06103402e-01, 1.37827578e-01],\n", " [1.00899411e-02, 1.49242658e-01],\n", " [3.02685901e-01, 1.72435863e-01],\n", " [1.82971516e-01, 5.46151337e-03],\n", " [9.00293563e-02, 9.45128016e-02]],\n", "\n", " [[4.77516365e-02, 6.31579538e-01],\n", " [3.31747548e-01, 1.95951385e-01],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [9.59126976e-03, 5.04357941e-01],\n", " [1.98110780e-01, 3.04790182e-01],\n", " [1.83424079e-02, 6.62458167e-01],\n", " [4.57549428e-01, 3.17471286e-03],\n", " [6.65966501e-04, 7.51307525e-04],\n", " [2.19702363e-02, 1.35985347e-01],\n", " [7.61353662e-02, 1.82884024e-02]],\n", "\n", " [[1.00144721e-01, 7.14670060e-03],\n", " [4.54155132e-01, 7.15656993e-02],\n", " [9.59126976e-03, 5.04357941e-01],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [2.94883101e-01, 2.49967382e-02],\n", " [1.40618384e-03, 1.07606818e-02],\n", " [5.99631905e-01, 4.27502868e-01],\n", " [1.53119229e-02, 4.66177107e-01],\n", " [6.05940715e-02, 1.16567421e-01],\n", " [1.39772351e-01, 3.30564059e-01]],\n", "\n", " [[5.13361821e-02, 5.88750085e-02],\n", " [1.71292943e-02, 1.19714439e-02],\n", " [1.98110780e-01, 3.04790182e-01],\n", " [2.94883101e-01, 2.49967382e-02],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [3.37015682e-01, 6.85587582e-02],\n", " [5.35124720e-02, 2.45751670e-01],\n", " [1.75804141e-01, 2.75276582e-01],\n", " [8.81334408e-02, 3.36048094e-02],\n", " [2.86184866e-02, 1.73758451e-01]],\n", "\n", " [[1.25284584e-01, 3.68468977e-04],\n", " [5.06103402e-01, 1.37827578e-01],\n", " [1.83424079e-02, 6.62458167e-01],\n", " [1.40618384e-03, 1.07606818e-02],\n", " [3.37015682e-01, 6.85587582e-02],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [6.59113650e-01, 5.73913428e-01],\n", " [2.59984901e-02, 6.18590653e-01],\n", " [8.04617184e-02, 1.98161568e-01],\n", " [1.69217481e-01, 4.60607509e-01]],\n", "\n", " [[2.09674720e-01, 5.45197930e-01],\n", " [1.00899411e-02, 1.49242658e-01],\n", " [4.57549428e-01, 3.17471286e-03],\n", " [5.99631905e-01, 4.27502868e-01],\n", " [5.35124720e-02, 2.45751670e-01],\n", " [6.59113650e-01, 5.73913428e-01],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [4.23303357e-01, 8.37211125e-04],\n", " [2.78995660e-01, 9.76045824e-02],\n", " [1.60398345e-01, 6.22363676e-03]],\n", "\n", " [[3.71391297e-02, 5.88764324e-01],\n", " [3.02685901e-01, 1.72435863e-01],\n", " [6.65966501e-04, 7.51307525e-04],\n", " [1.53119229e-02, 4.66177107e-01],\n", " [1.75804141e-01, 2.75276582e-01],\n", " [2.59984901e-02, 6.18590653e-01],\n", " [4.23303357e-01, 8.37211125e-04],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [1.49859894e-02, 1.16521135e-01],\n", " [6.25600401e-02, 1.16261509e-02]],\n", "\n", " [[4.94179135e-03, 1.81440111e-01],\n", " [1.82971516e-01, 5.46151337e-03],\n", " [2.19702363e-02, 1.35985347e-01],\n", " [6.05940715e-02, 1.16567421e-01],\n", " [8.81334408e-02, 3.36048094e-02],\n", " [8.04617184e-02, 1.98161568e-01],\n", " [2.78995660e-01, 9.76045824e-02],\n", " [1.49859894e-02, 1.16521135e-01],\n", " [0.00000000e+00, 0.00000000e+00],\n", " [1.63079989e-02, 5.45349958e-02]],\n", "\n", " [[3.29532252e-03, 4.34920684e-01],\n", " [9.00293563e-02, 9.45128016e-02],\n", " [7.61353662e-02, 1.82884024e-02],\n", " [1.39772351e-01, 3.30564059e-01],\n", " [2.86184866e-02, 1.73758451e-01],\n", " [1.69217481e-01, 4.60607509e-01],\n", " [1.60398345e-01, 6.22363676e-03],\n", " [6.25600401e-02, 1.16261509e-02],\n", " [1.63079989e-02, 5.45349958e-02],\n", " [0.00000000e+00, 0.00000000e+00]]])" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sq_differences" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3. 제곱간의 합 " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0. , 0.25171654, 0.67933117, 0.10729142, 0.11021119,\n", " 0.12565305, 0.75487265, 0.62590345, 0.1863819 , 0.43821601],\n", " [0.25171654, 0. , 0.52769893, 0.52572083, 0.02910074,\n", " 0.64393098, 0.1593326 , 0.47512176, 0.18843303, 0.18454216],\n", " [0.67933117, 0.52769893, 0. , 0.51394921, 0.50290096,\n", " 0.68080058, 0.46072414, 0.00141727, 0.15795558, 0.09442377],\n", " [0.10729142, 0.52572083, 0.51394921, 0. , 0.31987984,\n", " 0.01216687, 1.02713477, 0.48148903, 0.17716149, 0.47033641],\n", " [0.11021119, 0.02910074, 0.50290096, 0.31987984, 0. ,\n", " 0.40557444, 0.29926414, 0.45108072, 0.12173825, 0.20237694],\n", " [0.12565305, 0.64393098, 0.68080058, 0.01216687, 0.40557444,\n", " 0. , 1.23302708, 0.64458914, 0.27862329, 0.62982499],\n", " [0.75487265, 0.1593326 , 0.46072414, 1.02713477, 0.29926414,\n", " 1.23302708, 0. , 0.42414057, 0.37660024, 0.16662198],\n", " [0.62590345, 0.47512176, 0.00141727, 0.48148903, 0.45108072,\n", " 0.64458914, 0.42414057, 0. , 0.13150712, 0.07418619],\n", " [0.1863819 , 0.18843303, 0.15795558, 0.17716149, 0.12173825,\n", " 0.27862329, 0.37660024, 0.13150712, 0. , 0.07084299],\n", " [0.43821601, 0.18454216, 0.09442377, 0.47033641, 0.20237694,\n", " 0.62982499, 0.16662198, 0.07418619, 0.07084299, 0. ]])" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sq_differences = sq_differences.sum(-1)\n", "sq_differences" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.4. Root" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0. , 0.5017136 , 0.82421549, 0.32755369, 0.33198071,\n", " 0.35447574, 0.86883407, 0.7911406 , 0.4317197 , 0.66197886],\n", " [0.5017136 , 0. , 0.72642889, 0.72506609, 0.17058938,\n", " 0.8024531 , 0.39916488, 0.68929077, 0.43408873, 0.4295837 ],\n", " [0.82421549, 0.72642889, 0. , 0.71690251, 0.7091551 ,\n", " 0.8251064 , 0.67876663, 0.0376467 , 0.39743626, 0.30728451],\n", " [0.32755369, 0.72506609, 0.71690251, 0. , 0.56557921,\n", " 0.11030352, 1.01347658, 0.69389411, 0.42090556, 0.68581077],\n", " [0.33198071, 0.17058938, 0.7091551 , 0.56557921, 0. ,\n", " 0.63684727, 0.5470504 , 0.67162543, 0.34891009, 0.44986324],\n", " [0.35447574, 0.8024531 , 0.8251064 , 0.11030352, 0.63684727,\n", " 0. , 1.11041752, 0.80286309, 0.52784779, 0.79361514],\n", " [0.86883407, 0.39916488, 0.67876663, 1.01347658, 0.5470504 ,\n", " 1.11041752, 0. , 0.65126075, 0.61367764, 0.40819356],\n", " [0.7911406 , 0.68929077, 0.0376467 , 0.69389411, 0.67162543,\n", " 0.80286309, 0.65126075, 0. , 0.36263911, 0.27237142],\n", " [0.4317197 , 0.43408873, 0.39743626, 0.42090556, 0.34891009,\n", " 0.52784779, 0.61367764, 0.36263911, 0. , 0.26616347],\n", " [0.66197886, 0.4295837 , 0.30728451, 0.68581077, 0.44986324,\n", " 0.79361514, 0.40819356, 0.27237142, 0.26616347, 0. ]])" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "distances = np.sqrt(sq_differences)\n", "distances" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 3, 4, 5, 8, 1, 9, 7, 2, 6],\n", " [1, 4, 6, 9, 8, 0, 7, 3, 2, 5],\n", " [2, 7, 9, 8, 6, 4, 3, 1, 0, 5],\n", " [3, 5, 0, 8, 4, 9, 7, 2, 1, 6],\n", " [4, 1, 0, 8, 9, 6, 3, 5, 7, 2],\n", " [5, 3, 0, 8, 4, 9, 1, 7, 2, 6],\n", " [6, 1, 9, 4, 8, 7, 2, 0, 3, 5],\n", " [7, 2, 9, 8, 6, 4, 1, 3, 0, 5],\n", " [8, 9, 4, 7, 2, 3, 0, 1, 5, 6],\n", " [9, 8, 7, 2, 6, 1, 4, 0, 3, 5]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nearest = np.argsort(distances, axis=1)\n", "nearest" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "첫번째 결과인 `[0, 3, 4, 5, 8, 1, 9, 7, 2, 6]`을 해석해보면, 첫 번째로 작은 값을 가진 entry의 index는 0, 두 번째로 작은 값을 가진 entry의 index는 3, 그 다음으로는 index가 4 이다.\n", "\n", "0번 column은 row index에 해당하는 값이고(자기 자신과의 거리는 0이므로), 첫 번째 column이 가장 가까운 점이다.\n", "\n", "$k$ 번째라면 각 행을 partition으로 나눠 가장 작은 $k+1$개의 거리가 먼저 오고, 그 보다 큰 거리의 요소를 배열의 나머지 위치에 채우면 된다." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.5. $k$개 이웃 선정" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[3, 4],\n", " [4, 6],\n", " [7, 9],\n", " [5, 0],\n", " [1, 0],\n", " [3, 0],\n", " [1, 9],\n", " [2, 9],\n", " [9, 4],\n", " [8, 7]])" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# k = 2라고 하자.\n", "k = 2\n", "knn = nearest[:, 1:k+1]\n", "knn" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.6. Plot" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "- $v_1$과 가장 가까운 데이터는 $v_4$, $v_5$" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAE3JJREFUeJzt3X+Mldd95/H31xdmyMw4tc1QabFxJpuACrZsZTWQRP2jaUNagpD9x5ImrlCKFQWplbtaJ2rleldp6lRNlTRC2pV3u2h3TROroY7/aBElcvrDVauqDoyVggzImLhTw1IpgEm6MxNmmPG3f8w1HY0v3Mtw5z53zrxf0kj3eZ7juV+O7v34zHnOvScyE0lSWW6rugBJUvsZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCrajqiQcHB3NoaKiqp5ekJenll1++mJlrmrWrLNyHhoYYGRmp6uklaUmKiH9qpZ3TMpJUoKbhHhH/NyJ+EBGvXOd6RMR/i4gzEXE8Iv5D+8uUJN2MVkbu+4FtN7j+cWB9/WcP8D9vvSxJ0q1oGu6Z+TfAmzdo8jDw9Zz1EnBHRPy7dhUoSbp57Zhzvxs4O+f4XP2cJKki7Qj3aHCu4Q4gEbEnIkYiYuTChQtteGpJUiPtCPdzwLo5x/cA5xs1zMx9mTmcmcNr1jRdpilJWqB2hPtB4NP1VTMfAn6Umf/cht8rSVqgph9iiohvAh8BBiPiHPBbwEqAzPwD4DCwHTgDTACPLlax841NTnPo2HlGL40ztLqfHQ+uZaC3ss9lSVLXiKo2yB4eHs5b+YTq0dE32f3METJhYmqGvp4aEbD/0S1sHrqrjZVKUveIiJczc7hZuyX5CdWxyWl2P3OE8ckZJqZmgNmAH5+cqZ+frrhCSarWkgz3Q8fOc70/ODLh0PGG93MladlYkuE+emn82oh9vompGUYvTnS4IknqLkvy7uPQ6n76emoNA76vp8bQYF8FVakKY/9ymZN/vp+Zi9+nNvg+Nn1sNwPvvrPqsqTKLcmR+44H1xKNPjoFRMCOB9Z2tiBV4tR3X4Cv/RT3H/8yH/7nb3D/8S/D135q9ry0zC3JcB/oXcH+R7fQ31ujr6cGzI7Y+3tr9fNL8g8S3YSxf7nMusOfZiCu0BeTAPTFJANxhXWHP834//9hxRVK1VqyKbh56C6OPLmVQ8fPM3pxgqHBPnY8sNZgXyZO/vl+7m/8LRfcRvLKd/az5T/+5w5XJXWPJZ2E/b0r+OTme6suQxWYufj9ayP2+fpikpmL3+9wRVJ3WZLTMlJt8H1MZG/DaxPZS23wfR2uSOouhruWpE0f281bDb+QFN4iuO/nd3e2IKnLGO5akgbefSdnt3+dsVx1bQQ/kb2M5SrObv86/bffUXGFUrWW9Jy7lreNH/wFxje9yivf+bd17vf9/G42GuyS4a6lrf/2O1wVIzXgtIwkFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBVoWX5CdeziZU7u3cfM6deobVjPpsf3MDDo1mySyrHswv3Utw6zbtdO7s+k7+oVJlau4q3f/21OPfs8Gz+xveryJKktltW0zNjFy6zbtZOBqR/Td/UKAH1XrzAw9WPW7drJ+CW3ZpNUhmUV7if37uO2vM7WbJmc2LuvwxVJ0uJYVuE+c/q1ayP2+fquXmHm9JkOVyRJi2NZhXttw3omVq5qeG1i5SpqG97f4YokaXEsq3Df9Pge3orrbM0WwX2P7+lwRZK0OJZVuA8M3snZZ59nrOdd10bwEytXMdbzLs4++zz9q93BR1IZlt1SyI2f2M74z53nlb37mDl9htqG93Pf43vYaLBLKsiyC3eA/tV3sOV3fqPqMiRp0SyraRlJWi4Md0kqkOEuSQUy3CWpQIa7JBXIcJekArUU7hGxLSJejYgzEfFEg+v3RsSLEfG9iDgeEX53riRVqGm4R0QNeBr4OLAJeCQiNs1r9l+B5zLzA8CngP/R7kIlSa1rZeS+BTiTma9n5hRwAHh4XpsE3l1//BPA+faVKEm6Wa18QvVu4Oyc43PAB+e1+SLwnYj4NaAf2NqW6iRJC9LKyL3R1yjO3/HiEWB/Zt4DbAe+ERHv+N0RsSciRiJi5MKFCzdfrSSpJa2E+zlg3Zzje3jntMtngOcAMvPvgVXA4PxflJn7MnM4M4fXrFmzsIolSU21Eu5HgfUR8d6I6GH2hunBeW3eAD4KEBEbmQ13h+aSVJGm4Z6Z08BjwAvAKWZXxZyIiKci4qF6s88Dn42IY8A3gd2Z19msVJK06Fr6yt/MPAwcnnfuC3MenwR+ur2lSZIWyk+oSlKBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalALX1xmKTyjV28zMm9+5g5/Rq1DevZ9PgeBgbvrLosLZDhLolT3zrMul07uT+TvqtXmFi5ird+/7c59ezzbPzE9qrL0wI4LSMtc2MXL7Nu104Gpn5M39UrAPRdvcLA1I9Zt2sn45d+WHGFWgjDXVrmTu7dx23X2VvntkxO7N3X4YrUDoa7tMzNnH7t2oh9vr6rV5g5fabDFakdDHdpmattWM/EylUNr02sXEVtw/s7XJHawXCXlrlNj+/hrYiG196K4L7H93S4IrWD4S4tcwODd3L22ecZ63nXtRH8xMpVjPW8i7PPPk//6jsqrlAL4VJISWz8xHbGf+48r+zdx8zpM9Q2vJ/7Ht/DRoN9yTLcJQHQv/oOtvzOb1RdhtrEaRlJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoFaCveI2BYRr0bEmYh44jptfjEiTkbEiYj4o/aWKUk3NjY5zYEjb/B73z7FgSNvMDY5XXVJlWq6E1NE1ICngY8B54CjEXEwM0/OabMe+E3gpzPzckT85GIVLEnzHR19k93PHCETJqZm6Oup8aU/O8n+R7eweeiuqsurRCsj9y3Amcx8PTOngAPAw/PafBZ4OjMvA2TmD9pbpiQ1NjY5ze5njjA+OcPE1AwwG/DjkzP188tzBN9KuN8NnJ1zfK5+bq4NwIaI+LuIeCkitjX6RRGxJyJGImLkwoULC6tYkuY4dOw8mY2vZcKh4+c7W1CXaCXco8G5+V25AlgPfAR4BPjfEfGObdMzc19mDmfm8Jo1a262Vkl6h9FL49dG7PNNTM0wenGiwxV1h1bC/Rywbs7xPcD8/xWeA/40M69m5j8CrzIb9pK0qIZW99PXU2t4ra+nxtBgX4cr6g6thPtRYH1EvDcieoBPAQfntfkT4GcBImKQ2Wma19tZqCQ1suPBtUSj+QUgAnY8sLazBXWJpuGemdPAY8ALwCngucw8ERFPRcRD9WYvAJci4iTwIvDrmXlpsYqWpLcN9K5g/6Nb6O+tXRvB9/XU6O+t1c83XRRYpMjr3YlYZMPDwzkyMlLJc0sqz/jkNIeOn2f04gRDg33seGBtkcEeES9n5nCzduX9yyUtS/29K/jk5nurLqNr+PUDklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHXuWjRjk9McOnae0UvjDK3uZ8eDaxko8EMlUjfynaZF4eYJUrWcllHbuXmCVD3DXW3n5glS9Qx3tZ2bJ0jVM9zVdm6eIFXPcFfbuXmCVD3DXW3n5glS9XyXaVFsHrqLI09uXRabJ0jdyHeaFo2bJ0jVcVpGkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAhnuklQgw12SCmS4S1KBDHdJKlBL4R4R2yLi1Yg4ExFP3KDdzojIiBhuX4mSpJvVNNwjogY8DXwc2AQ8EhGbGrS7HfhPwHfbXaQk6ea0MnLfApzJzNczcwo4ADzcoN2XgK8AV9pYnyRpAVoJ97uBs3OOz9XPXRMRHwDWZeahNtYmSVqgVsI9GpzLaxcjbgP2Ap9v+osi9kTESESMXLhwofUqJUk3pZVwPwesm3N8D3B+zvHtwP3AX0fEKPAh4GCjm6qZuS8zhzNzeM2aNQuvWpJ0Q62E+1FgfUS8NyJ6gE8BB9++mJk/yszBzBzKzCHgJeChzBxZlIolSU01DffMnAYeA14ATgHPZeaJiHgqIh5a7AIlSTdvRSuNMvMwcHjeuS9cp+1Hbr0sSdKt8BOqklQgw12SCmS4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFchwl6QCGe6SVKCWdmKSlpOxyWkOHTvP6KVxhlb3s+PBtQz0+lbR0uIrVprj6Oib7H7mCJkwMTVDX0+NL/3ZSfY/uoXNQ3dVXZ7UMqdlpLqxyWl2P3OE8ckZJqZmgNmAH5+cqZ+frrhCqXWGu1R36Nh5Mhtfy4RDx893tiDpFhjuUt3opfFrI/b5JqZmGL040eGKpIUz3KW6odX99PXUGl7r66kxNNjX4YqkhTPcpbodD64lovG1CNjxwNrOFiTdAsNdqhvoXcH+R7fQ31u7NoLv66nR31urn3dxmZYOX63SHJuH7uLIk1s5dPw8oxcnGBrsY8cDaw12LTm+YqV5+ntX8MnN91ZdhnRLnJaRpAIZ7pJUIMNdkgpkuEtSgQx3SSqQ4S5JBTLcJalAhrskFcgPMUnSIqlyVy/DXZIWQdW7ejktI0lt1g27erUU7hGxLSJejYgzEfFEg+ufi4iTEXE8Iv4yIt7T/lIlaWnohl29moZ7RNSAp4GPA5uARyJi07xm3wOGM/MB4HngK+0uVJKWim7Y1auVkfsW4Exmvp6ZU8AB4OG5DTLzxcx8u9qXgHvaW6YkLR3dsKtXK+F+N3B2zvG5+rnr+Qzw7VspSpKWsm7Y1auVcG9UYsPZpIjYBQwDX73O9T0RMRIRIxcuXGi9SjE2Oc2BI2/we98+xYEjbzDWgRsykhamG3b1irzerP/bDSI+DHwxM3+hfvybAJn55XnttgL/HfiZzPxBsyceHh7OkZGRhda9rDRaUhVBx5ZUSVqY8cnptu/qFREvZ+Zw03YthPsK4DTwUeD/AUeBX8rME3PafIDZG6nbMvO1Vgo03FszNjnNB3/3LxiffOfNmf7eGkee3OoWcNIy0mq4N52Wycxp4DHgBeAU8FxmnoiIpyLioXqzrwIDwLci4h8i4uAt1K45umFJlaSlp6UhX2YeBg7PO/eFOY+3trku1XXDkipJS4+fUO1y3bCkStLSY7h3gRuthOmGJVWSlh7vxFWs2ZcLvb2k6nqrZbyZKqmRpqtlFourZW5uJcxiLKmStPS0ulrGdKhQKythPrn5XgD6e1dceyxJzTjnXiFXwkhaLIZ7hVwJI2mxGO4VciWMpMViuFeoG75cSFKZTI+KbR66iyNPbnUljKS2MkG6gCthJLWb0zKSVCDDXZIKZLhLUoEMd0kqkOEuSQUy3CWpQIa7JBXIcJekAlX2fe4RcQH4p5v4TwaBi4tUTinso+bso+bso+aq7KP3ZOaaZo0qC/ebFREjrXxB/XJmHzVnHzVnHzW3FPrIaRlJKpDhLkkFWkrhvq/qApYA+6g5+6g5+6i5ru+jJTPnLklq3VIauUuSWtRV4R4R2yLi1Yg4ExFPNLjeGxF/XL/+3YgY6nyV1Wqhjz4XEScj4nhE/GVEvKeKOqvWrJ/mtNsZERkRXb3yYTG00kcR8Yv119OJiPijTtdYtRbeb/dGxIsR8b36e257FXU2lJld8QPUgO8D/x7oAY4Bm+a1+VXgD+qPPwX8cdV1d2Ef/SzQV3/8K8utj1rtp3q724G/AV4Chquuu9v6CFgPfA+4s378k1XX3YV9tA/4lfrjTcBo1XW//dNNI/ctwJnMfD0zp4ADwMPz2jwM/GH98fPARyOut8V0kZr2UWa+mJkT9cOXgHs6XGM3aOW1BPAl4CvAlU4W1yVa6aPPAk9n5mWAzPxBh2usWit9lMC7649/AjjfwfpuqJvC/W7g7Jzjc/VzDdtk5jTwI2B1R6rrDq300VyfAb69qBV1p6b9FBEfANZl5qFOFtZFWnktbQA2RMTfRcRLEbGtY9V1h1b66IvArog4BxwGfq0zpTXXTXuoNhqBz1/K00qbkrX874+IXcAw8DOLWlF3umE/RcRtwF5gd6cK6kKtvJZWMDs18xFm/wL824i4PzN/uMi1dYtW+ugRYH9mfi0iPgx8o95Hby1+eTfWTSP3c8C6Ocf38M4/ca61iYgVzP4Z9GZHqusOrfQREbEV+C/AQ5k52aHaukmzfroduB/464gYBT4EHFxmN1Vbfb/9aWZezcx/BF5lNuyXi1b66DPAcwCZ+ffAKma/d6Zy3RTuR4H1EfHeiOhh9obpwXltDgK/XH+8E/irrN/JWCaa9lF9uuF/MRvsy22O9G037KfM/FFmDmbmUGYOMXtv4qHMHKmm3Eq08n77E2Zv0BMRg8xO07ze0Sqr1UofvQF8FCAiNjIb7hc6WuV1dE241+fQHwNeAE4Bz2XmiYh4KiIeqjf7P8DqiDgDfA647hK3ErXYR18FBoBvRcQ/RMT8F2PxWuynZa3FPnoBuBQRJ4EXgV/PzEvVVNx5LfbR54HPRsQx4JvA7m4ZcPoJVUkqUNeM3CVJ7WO4S1KBDHdJKpDhLkkFMtwlqUCGuyQVyHCXpAIZ7pJUoH8FAocoKCsF8swAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(x, y, s=50)\n", "plt.scatter(dataset[0][0], dataset[0][1], s=50) # v1\n", "nearest = dataset[knn[0]] # v4, v5\n", "plt.scatter(nearest[:, 0], nearest[:, 1], c='r', s=50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "$v_{10}$과 가장 가까운 데이터는 $v_8$, $v_7$" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAE9VJREFUeJzt3W9sXfd93/H3V1cmlUs6tS2qwGTLYZpIgGRDRgZKSdEHTRenUQTBflCniQcjk5BFQAd3mBO0cJMhS+2iS5MNAjZ464RtVlOjUR0/aAVZgdN1LjoUcSgaqYRIgh3FZS2VBSLKSjqSESlS3z7gtUrQl7pH0uX9c+77BQi455yfyY9/4v3o8Jxzz4nMRJJULmvaHUCS1HyWuySVkOUuSSVkuUtSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQmvb9Y2HhoZyeHi4Xd9ekrrSq6++OpmZGxqNa1u5Dw8PMzY21q5vL0ldKSL+tsg4D8tIUgk1LPeI+N8R8aOI+P4K2yMi/ktEnI2IkxHxz5sfU5J0I4rsuR8Cdl1n+8eBzbU/+4H/fuuxJEm3omG5Z+ZfAm9dZ8jDwNdz0SvAHRHxz5oVUJJ045pxzP1u4NyS5fO1dZKkNmlGuUeddXWfABIR+yNiLCLGLly40IRvLUmqpxnlfh7YtGT5HmCi3sDMPJiZI5k5smFDw8s0JUk3qRnlfgT4dO2qmQ8BP8nMv2/C15Uk3aSGH2KKiG8AHwaGIuI88B+A2wAy8/eBY8Bu4CwwA+xbrbDLTc3Oc/TEBOMXpxleP8CeBzYy2N+2z2VJUseIdj0ge2RkJG/lE6rHx99i77OjZMLM3ALVvgoRcGjfTnYM39XEpJLUOSLi1cwcaTSuKz+hOjU7z95nR5meXWBmbgFYLPjp2YXa+vk2J5Sk9urKcj96YoKVfuHIhKMn657PlaSe0ZXlPn5x+toe+3IzcwuMT860OJEkdZauPPs4vH6Aal+lbsFX+yoMD1XbkErt4El1qb6ufBfseWAjT794uu62CNizfWOLE6kd6p1Uf/rF055Ul+jSwzKD/Ws5tG8nA/0Vqn0VYHGPfaC/Ulvflf9m6QZ4Ul26vq5twR3DdzH6hQc5enKC8ckZhoeq7Nm+0WLvEUVOqn9yx72tDSV1kK5uwoH+tb6Be5Qn1aXr68rDMtLbJ9Xr8aS6ZLmrS+15YCNR736keFJdAstdXcqT6tL1+Q5Q1/KkurQy3wXqap5Ul+rzsIwklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVUE9+QtVHs0kqu55rNB/NJqkX9NRhGR/NJqlX9FS5F3k0mySVQU+Vu49mk9QreqrcfTSbpF7RU+Xuo9kk9YqeKncfzSapV/Rcm/loNkm9oCcbzUezSSq7njosI0m9wnKXpBKy3CWphCx3SSohy12SSshyl6QSKlTuEbErIl6LiLMR8WSd7fdGxMsR8b2IOBkRu5sfVZJUVMNyj4gK8AzwcWAb8GhEbFs27N8Dz2fmB4BPAf+t2UElScUV2XPfCZzNzDcycw44DDy8bEwC7669/hnAe+dKUhsV+YTq3cC5JcvngQ8uG/Nl4NsR8evAAPBgU9JJkm5KkT33evdRXP7Ii0eBQ5l5D7Ab+MOIeMfXjoj9ETEWEWMXLly48bSSpEKKlPt5YNOS5Xt452GXzwDPA2Tmd4B1wNDyL5SZBzNzJDNHNmzYcHOJJUkNFSn348DmiHhvRPSxeML0yLIxbwIfAYiIrSyWu7vmktQmDcs9M+eBx4GXgDMsXhVzKiKeioiHasM+D3w2Ik4A3wD2Zq70tFJJ0mordMvfzDwGHFu27ktLXp8GfqG50SRJN8tPqEpSCVnuklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJWS5S1IJWe6SVEKWuySVkOUuSSVU6MZhkspvanaeoycmGL84zfD6AfY8sJHBfiuiW/k3J4nj42+x99lRMmFmboFqX4WnXzzNoX072TF8V7vj6SZ4WEbqcVOz8+x9dpTp2QVm5haAxYKfnl2orZ9vc0LdDMtd6nFHT0yw0qN1MuHoyeVP1VQ3sNylHjd+cfraHvtyM3MLjE/OtDiRmsFyl3rc8PoBqn2VutuqfRWGh6otTqRmsNylHrfngY1E1N8WAXu2b2xtIDWF5S71uMH+tRzat5OB/sq1PfhqX4WB/kptvRfVdSP/1iSxY/guRr/wIEdPTjA+OcPwUJU92zda7F3MvzlJAAz0r+WTO+5tdww1iYdlJKmELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBIqVO4RsSsiXouIsxHx5ApjfjUiTkfEqYj4o+bGlKTrm5qd5/Dom3zlW2c4PPomU7Pz7Y7UVg2fxBQRFeAZ4KPAeeB4RBzJzNNLxmwGfgv4hcy8FBE/u1qBJWm54+NvsffZUTJhZm6Bal+Fp188zaF9O9kxfFe747VFkT33ncDZzHwjM+eAw8DDy8Z8FngmMy8BZOaPmhtTkuqbmp1n77OjTM8uMDO3ACwW/PTsQm19b+7BFyn3u4FzS5bP19YttQXYEhF/FRGvRMSuel8oIvZHxFhEjF24cOHmEkvSEkdPTJBZf1smHD050dpAHaJIuUeddcunci2wGfgw8CjwPyPijnf8R5kHM3MkM0c2bNhwo1kl6R3GL05f22NfbmZugfHJmRYn6gxFyv08sGnJ8j3A8n8KzwN/mplXMvNvgNdYLHtJWlXD6weo9lXqbqv2VRgeqrY4UWcoUu7Hgc0R8d6I6AM+BRxZNuZPgF8CiIghFg/TvNHMoJJUz54HNhL1ji8AEbBn+8bWBuoQDcs9M+eBx4GXgDPA85l5KiKeioiHasNeAi5GxGngZeA3MvPiaoWWpLcN9q/l0L6dDPRXru3BV/sqDPRXausbXhRYSpErnYlYZSMjIzk2NtaW7y2pfKZn5zl6coLxyRmGh6rs2b6xlMUeEa9m5kijceX7P5fUkwb61/LJHfe2O0bH8PYDklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJeR17lo1U5OXOH3gIAuv/4DKls1se2I/g0N3tjuW1BMsd62KM988xqbHHuH+TKpXLjNz2zqu/qff5sxzL7D1E7vbHU8qPQ/LqOmmJi+x6bFHGJz7KdUrlwGoXrnM4NxP2fTYI0xf/HGbE0rlZ7mr6U4fOMiaFe5ZtCaTUwcOtjiR1HssdzXdwus/uLbHvlz1ymUWXj/b4kRS77Hc1XSVLZuZuW1d3W0zt62jsuX9LU4k9R7LXU237Yn9XF3h6QlXI7jvif0tTiT1HstdTTc4dCfnnnuBqb53XduDn7ltHVN97+Lccy8wsP4dj9eV1GReCqlVsfUTu5n+FxN8/8BBFl4/S2XL+7nvif1stdillrDctWoG1t/Bzt/5zXbHkHqSh2UkqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKqFC5R8SuiHgtIs5GxJPXGfdIRGREjDQvoiTpRjUs94ioAM8AHwe2AY9GxLY6424H/i3w3WaHlCTdmCJ77juBs5n5RmbOAYeBh+uMexr4KnC5ifkkSTehSLnfDZxbsny+tu6aiPgAsCkzjzYxmyTpJhUp96izLq9tjFgDHAA+3/ALReyPiLGIGLtw4ULxlJKkG1Kk3M8Dm5Ys3wNMLFm+Hbgf+IuIGAc+BBypd1I1Mw9m5khmjmzYsOHmU0uSrqtIuR8HNkfEeyOiD/gUcOTtjZn5k8wcyszhzBwGXgEeysyxVUksSWqoYbln5jzwOPAScAZ4PjNPRcRTEfHQageUJN24tUUGZeYx4NiydV9aYeyHbz2WJOlW+AlVSSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBKy3CWphCx3SSohy12SSshyl6QSstwlqYQsd0kqoUJPYpJ6ydQ/XOL0nx1iYfKHVIbex7aP7mXw3Xe2O5Z0Qyx3aYkz332JTcc+zf0k1ZhlZqKfqye/wpndX2frBz/W7nhSYR6WkWqm/uESm459msG4TDVmAajGLINxmU3HPs30//9xmxNKxVnuUs3pPzvEGrLutjUkp759qLWBpFtguUs1C5M/vLbHvlw1ZlmY/GGLE0k3z3KXaipD72Mm++tum8l+KkPva3Ei6eZZ7lLNto/u5SpRd9tVgvt+eW9rA0m3wHKXagbffSfndn+dqVx3bQ9+JvuZynWc2/11Bm6/o80JpeK8FFJaYusHP8b0ttf4/rf/6Tr3+355L1stdnUZy11aZuD2O9j5K/+u3TGkW+JhGUkqIctdkkrIcpekErLcJamELHdJKiHLXZJKyHKXpBKy3CWphPwQkyStkqnZeY6emGD84jTD6wfY88BGBvtbU7uWuyStguPjb7H32VEyYWZugWpfhadfPM2hfTvZMXzXqn9/D8tIUpNNzc6z99lRpmcXmJlbABYLfnp2obZ+ftUzFCr3iNgVEa9FxNmIeLLO9s9FxOmIOBkRfx4R72l+VEnqDkdPTJD1H+pFJhw9ObHqGRqWe0RUgGeAjwPbgEcjYtuyYd8DRjJzO/AC8NVmB5WkbjF+cfraHvtyM3MLjE/OrHqGInvuO4GzmflGZs4Bh4GHlw7IzJcz8+20rwD3NDemJHWP4fUDVPsqdbdV+yoMD1VXPUORcr8bOLdk+Xxt3Uo+A3zrVkJJUjfb88BGov5DvYiAPds3rnqGIuVeL2Ldo0kR8RgwAnxthe37I2IsIsYuXLhQPKWYmrzE6Bd/j+984l8z+sXfY2ryUrsjSVrBYP9aDu3byUB/5doefLWvwkB/pbZ+9S9UjFzpqP/bAyJ+HvhyZn6stvxbAJn5H5eNexD4r8AvZuaPGn3jkZGRHBsbu9ncPeXMN4+x6bFHWJNJ9cplZm5bx9UIzj33Als/sbvd8SStYHp2nqMnJxifnGF4qMqe7Rtvudgj4tXMHGk4rkC5rwVeBz4C/B1wHPiXmXlqyZgPsHgidVdm/qBIQMu9mKnJS3D33QzO/fSd2/reRUxMMLDeR8BJvaJouTc8LJOZ88DjwEvAGeD5zDwVEU9FxEO1YV8DBoFvRsRfR8SRW8iuJU4fOMiaFf4BXpPJqQMHW5xIUjco9PtBZh4Dji1b96Ulrx9sci7VLLz+A6pXLtfdVr1ymYXXz7Y4kaRu4CdUO1xly2ZmbltXd9vMbeuobHl/ixNJ6gaWeweYmp3n8OibfOVbZzg8+iZTSz6avO2J/Vxd4ZqqqxHc98T+VsWU1EW8cVibNbq50ODQnZx57oWVr5bxZKqkOhpeLbNavFpmcY/9g7/7f5iefefHlAf6K4x+4cFrl01NX/wxpw4cZOH1s1S2vJ/7ntjvVTJSDyp6tYx77m1U5OZCn9xxLwAD6+9g5+/8ZgvTSepmHnNvo064uZCkcrLc26gTbi4kqZws9zbqhJsLSSony72NOuHmQpLKyfZosx3DdzH6hQebfnMhSb3NBukAA/1rr10VI0nN4GEZSSohy12SSshyl6QSstwlqYQsd0kqIctdkkrIcpekErLcJamE2nY/94i4APztDfwnQ8DkKsUpC+eoMeeoMeeosXbO0Xsyc0OjQW0r9xsVEWNFblDfy5yjxpyjxpyjxrphjjwsI0klZLlLUgl1U7kfbHeALuAcNeYcNeYcNdbxc9Q1x9wlScV10567JKmgjir3iNgVEa9FxNmIeLLO9v6I+OPa9u9GxHDrU7ZXgTn6XEScjoiTEfHnEfGeduRst0bztGTcIxGREdHRVz6shiJzFBG/Wvt5OhURf9TqjO1W4P12b0S8HBHfq73ndrcjZ12Z2RF/gArwQ+DngD7gBLBt2Zh/A/x+7fWngD9ud+4OnKNfAqq117/Wa3NUdJ5q424H/hJ4BRhpd+5OmyNgM/A94M7a8s+2O3cHztFB4Ndqr7cB4+3O/fafTtpz3wmczcw3MnMOOAw8vGzMw8Af1F6/AHwkYqVHTJdSwznKzJczc6a2+ApwT4szdoIiP0sATwNfBS63MlyHKDJHnwWeycxLAJn5oxZnbLcic5TAu2uvfwaYaGG+6+qkcr8bOLdk+XxtXd0xmTkP/ARY35J0naHIHC31GeBbq5qoMzWcp4j4ALApM4+2MlgHKfKztAXYEhF/FRGvRMSulqXrDEXm6MvAYxFxHjgG/HprojXWSc9QrbcHvvxSniJjyqzw/39EPAaMAL+4qok603XnKSLWAAeAva0K1IGK/CytZfHQzIdZ/A3w/0XE/Zn541XO1imKzNGjwKHM/M8R8fPAH9bm6Orqx7u+TtpzPw9sWrJ8D+/8FefamIhYy+KvQW+1JF1nKDJHRMSDwBeBhzJztkXZOkmjeboduB/4i4gYBz4EHOmxk6pF329/mplXMvNvgNdYLPteUWSOPgM8D5CZ3wHWsXjfmbbrpHI/DmyOiPdGRB+LJ0yPLBtzBPhXtdePAP83a2cyekTDOaodbvgfLBZ7rx0jfdt15ykzf5KZQ5k5nJnDLJ6beCgzx9oTty2KvN/+hMUT9ETEEIuHad5oacr2KjJHbwIfAYiIrSyW+4WWplxBx5R77Rj648BLwBng+cw8FRFPRcRDtWH/C1gfEWeBzwErXuJWRgXn6GvAIPDNiPjriFj+w1h6BeeppxWco5eAixFxGngZ+I3MvNiexK1XcI4+D3w2Ik4A3wD2dsoOp59QlaQS6pg9d0lS81juklRClrsklZDlLkklZLlLUglZ7pJUQpa7JJWQ5S5JJfSPyPUqVWidyVEAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.scatter(x, y, s=50)\n", "plt.scatter(dataset[9][0], dataset[9][1], s=50) # v9\n", "nearest = dataset[knn[9]] # v8, v7\n", "plt.scatter(nearest[:, 0], nearest[:, 1], c='r', s=50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. 함수로 정의" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "def knn(dataset, k):\n", " dist = np.sqrt(np.sum((dataset[:, np.newaxis, :] - dataset[np.newaxis, :, :]) ** 2, axis=-1))\n", " near = np.argpartition(dist, k+1, axis=1)\n", " \n", " return near[:, :k+1]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[3, 0, 4],\n", " [1, 4, 6],\n", " [2, 7, 9],\n", " [3, 5, 0],\n", " [1, 4, 0],\n", " [5, 3, 0],\n", " [1, 9, 6],\n", " [7, 2, 9],\n", " [8, 9, 4],\n", " [8, 7, 9]])" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn(dataset, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "다만, `argpartition` method는 정렬 후, 작은 순서로 $k+1$인 숫자를 맨 좌측 partition에 배치하되, 배치 한 후에는 그 partition 내부에서의 order는 보장하지 않는다." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }