{ "cells": [ { "cell_type": "markdown", "metadata": { "_cell_guid": "95a0f564-0bc9-4e29-8985-a78150f95ff7", "_uuid": "31ce862a4caf5a38fe457908c211845315106f13" }, "source": [ "# TARUN SUNKARANENI'S Hierarchical and Point-Clustering Notebook Pt. 3" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "_cell_guid": "b1076dfc-b9ad-4769-8c92-a6c4dae69d19", "_uuid": "8f2839f25d086af736a60e9eeb907d3b93b6e0e5" }, "outputs": [], "source": [ "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "from scipy.cluster.hierarchy import dendrogram, linkage\n", "from scipy.spatial.distance import cdist\n", "from matplotlib import pyplot as plt\n", "from scipy.spatial import distance\n", "import math\n", "%matplotlib inline\n", "np.set_printoptions(precision=5, suppress=True) # suppress scientific float notation\n", "\n", "# Any results you write to the current directory are saved as output." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "_cell_guid": "79c7e3d0-c299-4dcb-8224-4455121ee9b0", "_uuid": "d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" }, "outputs": [], "source": [ "c1 = pd.read_csv(\"../input/C1.csv\", names=['x0', 'x1'])" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "351e3587-c6f7-4828-9633-49b1ae351e21", "_uuid": "7986cbee0e499844dd23711172eb956bd8a140a8" }, "source": [ "# Mean-Link Hierarchical\n", "### Mean-Link: measures the shortest link \n", "First compute \n", "$a_1 = \\frac{1}{|S_1|} \\sum_{s \\in S_1} s$ and \n", "\n", "$a_2 = \\frac{1}{|S_2|} \\sum_{s \\in S_2} s$ then\n", "\n", "$\\displaystyle{\\textbf{d}(S_1, S_2) = \\|a_1 - a_2\\|_2}$" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "_cell_guid": "732be8e3-36e9-427f-aba1-6774c94fd632", "_uuid": "60dc1e4e062975f164ed9f77dc77b0a8cc95a486" }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFl1JREFUeJzt3X+MHOV9x/H3J8aQa5PmAF+CfdixkVw3VE4xXVFSSw1JSQyowsaBxlRpIA2ySEJ/KK0Vu1RJFKnCqaVGSolCnIYCbcWPEGMc4fQKMYimDYRzDBjjHhykKXfnxheoSaNciDHf/rFzsJx37/ZuZnZndz4v6XS7M8/O8/Xsej43zzy7q4jAzMzK5w3tLsDMzNrDAWBmVlIOADOzknIAmJmVlAPAzKykHABmZiXlADAzKykHgJlZSTkAzMxK6oR2FzCdBQsWxNKlS9tdhplZx9i7d++PI6KvmbaFDoClS5cyODjY7jLMzDqGpB8229ZDQGZmJeUAMDMrKQeAmVlJOQDMzErKAWBmVlIOADOzkir0NNBus3PfKNsGhhg7MsGi3h42rVnBulX97S7LzErKAdAiO/eNsmXHfiaOHgNg9MgEW3bsB3AImFlbeAioRbYNDL168J80cfQY2waG2lSRmZWdA6BFxo5MzGq5mVneHAAtsqi3Z1bLzczy5gBokU1rVtAzf97rlvXMn8emNSvaVJGZlZ0vArfI5IVezwIys6JwALTQulX9PuCbWWF4CMjMrKQcAGZmJeUAMDMrqdQBIGmxpPslHZR0QNKf1mkjSV+UNCzpcUlnp+3XzMzSyeIi8MvAn0fE9yW9Gdgr6d6IeLKmzYXA8uTnt4AvJ7/NzKxNUp8BRMShiPh+cvv/gIPA1Kkua4FbouohoFfSwrR9m5nZ3GV6DUDSUmAV8PCUVf3AczX3Rzg+JMzMrIUyCwBJbwK+AfxZRPxk6uo6D4kG29koaVDS4Pj4eFblmZnZFJkEgKT5VA/+/xwRO+o0GQEW19w/HRirt62I2B4RlYio9PX1ZVGemZnVkcUsIAFfAw5GxN82aLYL+HAyG+hc4MWIOJS2bzMzm7ssZgGtBv4Q2C/p0WTZXwJLACLiBmA3cBEwDPwM+EgG/ZqZWQqpAyAivkP9Mf7aNgF8Im1fZmaWHb8T2MyspBwAZmYl5QAwMyspB4CZWUk5AMzMSsrfCJaznftG/TWQZlZIDoAc7dw3ypYd+5k4egyA0SMTbNmxH8AhYGZt5yGgHG0bGHr14D9p4ugxtg0MtakiM7PXOAByNHZkYlbLzcxayQGQo0W9PbNabmbWSg6AHG1as4Ke+fNet6xn/jw2rVnRporMzF7ji8A5mrzQ61lAZlZEDoCcrVvV7wO+mRWSh4DMzErKAWBmVlIOADOzknIAmJmVlAPAzKykMgkASTdKOizpiQbrz5P0oqRHk59PZ9GvmZnNXVbTQG8CrgdumabNv0XE72XUn5mZpZTJGUBEPAi8kMW2zMysNVp5DeBdkh6T9C1Jv96okaSNkgYlDY6Pj7ewPDOzcmlVAHwfeHtE/Abwd8DORg0jYntEVCKi0tfX16LyzMzKpyUBEBE/iYifJrd3A/MlLWhF32ZmVl9LAkDSaZKU3D4n6ff5VvRtZmb1ZTILSNKtwHnAAkkjwGeA+QARcQNwKfAxSS8DE8CGiIgs+jYzs7nJJAAi4vIZ1l9PdZqomZkVhN8JbGZWUg4AM7OScgCYmZWUA8DMrKQcAGZmJeUAMDMrKQeAmVlJOQDMzErKAWBmVlIOADOzknIAmJmVlAPAzKykHABmZiXlADAzKykHgJlZSTkAzMxKygFgZlZSmQSApBslHZb0RIP1kvRFScOSHpd0dhb9mpnZ3GV1BnATcME06y8Elic/G4EvZ9SvmZnNUSYBEBEPAi9M02QtcEtUPQT0SlqYRd9mZjY3rboG0A88V3N/JFl2HEkbJQ1KGhwfH29JcWZmZdSqAFCdZVGvYURsj4hKRFT6+vpyLsvMrLxaFQAjwOKa+6cDYy3q28zM6mhVAOwCPpzMBjoXeDEiDrWobzMzq+OELDYi6VbgPGCBpBHgM8B8gIi4AdgNXAQMAz8DPpJFv2ZmNneZBEBEXD7D+gA+kUVfZmaWDb8T2MyspBwAZmYl5QAwMyspB4CZWUk5AMzMSsoBYGZWUg4AM7OScgCYmZWUA8DMrKQcAGZmJeUAMDMrKQeAmVlJOQDMzErKAWBmVlIOADOzknIAmJmVlAPAzKykMgkASRdIGpI0LGlznfVXShqX9Gjyc1UW/ZqZ2dyl/kpISfOALwHvA0aARyTtiognpzS9PSKuSdufmZllI4szgHOA4Yh4NiJ+AdwGrM1gu2ZmlqMsAqAfeK7m/kiybKoPSHpc0p2SFmfQr5mZpZBFAKjOsphy/5vA0oh4J3AfcHPDjUkbJQ1KGhwfH8+gPDMzqyeLABgBav+iPx0Yq20QEc9HxEvJ3a8Cv9loYxGxPSIqEVHp6+vLoDwzM6sn9UVg4BFguaRlwCiwAfiD2gaSFkbEoeTuxcDBDPptaOe+UbYNDDF2ZIJFvT1sWrOCdavqjUqZmZVX6gCIiJclXQMMAPOAGyPigKTPAYMRsQv4E0kXAy8DLwBXpu23kZ37RtmyYz8TR48BMHpkgi079gM4BMzMaihi6nB9cVQqlRgcHJzVY1Zv3cPokYnjlvf39vDvm9+bVWlmZoUkaW9EVJpp23XvBB6rc/CfbrmZWVl1XQAs6u2Z1XIzs7LqugDYtGYFPfPnvW5Zz/x5bFqzok0VmZkVUxazgApl8kKvZwGZmU2v6wIAqiGQ1wHfU0zNrFt0ZQDkxVNMzaybdN01gDxtGxh69eA/aeLoMbYNDLWpIjOzufMZwCx4iqmZTafThoh9BjALnmJqZo1MDhGPHpkgeG2IeOe+0XaX1pADYBY8xdTMGunEIWIPAc2Cp5iaWSOdOETsAJilPKeYmnWTooyHt6qORb09dT+HrMhDxB4CKpid+0ZZvXUPyzbfw+qtewo9fmjWSFHGw1tZRycOETsACqQo/2nM0irKeHgr61i3qp/r1q+kv7cHUf0E4uvWryz0iIGHgApkuhdrkV9EZlMVZTy81XV02hCxzwAKpCj/aczSKsqU6aLUUVQOgALxi9W6RVHGw4tSR1FlEgCSLpA0JGlY0uY660+SdHuy/mFJS7Pot9v4xWrdoijj4UWpo6hSfyWkpHnAU8D7gBGqXxJ/eUQ8WdPm48A7I+JqSRuASyLigzNtey5fCdnpijJ1zsw602y+EjKLi8DnAMMR8WzS+W3AWuDJmjZrgc8mt+8ErpekKPIXEreJP8razFoliyGgfuC5mvsjybK6bSLiZeBF4NQM+rYmeYqpmU2VRQCozrKpf9k306baUNooaVDS4Pj4eOrirKoo87LNrDiyCIARYHHN/dOBsUZtJJ0AvAV4od7GImJ7RFQiotLX15dBeQaeYmpmx8siAB4BlktaJulEYAOwa0qbXcAVye1LgT0e/28tTzE1s6lSB0Aypn8NMAAcBO6IiAOSPifp4qTZ14BTJQ0DnwSOmypq+fIUUzObKpOPgoiI3cDuKcs+XXP758BlWfRlc+OPsjazqfxZQCXSaZ9TYmb5cgBYJvweA7PO4wCw1CbfYzA5zXTyPQaAQ8CswPxhcJaa32Ng1pkcAJaa32Ng1pkcAJaa32Ng1pkcAJaa32Ng1pl8EdhS83sMzDqTA8Ay4Y+xNus8DgArNE8xNcuPrwFYoXmKqVl+HABWaJ5iapYfB4AVmqeYmuXHAWCF5immZvnxRWArNE8xNcuPA8AKz1NMzfLhALDS8hRTK7tU1wAknSLpXklPJ79PbtDumKRHk5+p3xds1haeYmpll/YMYDPw7YjYKmlzcv9TddpNRMRZKfsyy5SnmFqRtGM4Mu0soLXAzcntm4F1Kbdn1jKeYmpFMTkcOXpkguC14cid+0Zz7TdtALwtIg4BJL/f2qDdGyUNSnpIkkPCCsFTTK0o2jUcOeMQkKT7gNPqrLp2Fv0siYgxSWcAeyTtj4hnGvS3EdgIsGTJkll0YTY7rZhi6llG1ox2DUfOGAARcX6jdZJ+JGlhRByStBA43GAbY8nvZyU9AKwC6gZARGwHtgNUKpWY8V9glkLeU0w9y8iasai3h9E6B/u8hyPTDgHtAq5Ibl8B3D21gaSTJZ2U3F4ArAaeTNmvWeF5lpE1q13DkWlnAW0F7pD0UeC/gcsAJFWAqyPiKuAdwFckvUI1cLZGhAPAup5nGXWfvIb02vWO91QBEBHPA79bZ/kgcFVy+z+AlWn6MetEeZ/W+/pCa+U9pJfncGQj/jA4s5zkeVrfrmmDZdaNQ3oOALOcrFvVz3XrV9Lf24OA/t4erlu/MpO/8rrxYFR03Tik588CMstRXqf1eR+MOnV4Kc+62zVTJ08+AzDrQHm+iznv4aWd+0ZZvXUPyzbfw+qtezLdbp51d+MbBx0AZh0oz4NRnsNLeR6k8x4Wy3NIr108BGTWgfKcNpjn8NJ0B+m0tbdijL4dM3Xy5AAw61B5HYzyHOvO8yDdjWP0efMQkJm9Tp7DS3leu+jGMfq8OQDM7HXyHOvO8yDdjWP0eVNEcT9vrVKpxODgYLvLMLMMdeoU004haW9EVJpp62sAZtZS3XYhtZN5CMjMrKQcAGZmJeUAMDMrKQeAmVlJOQDMzErKAWBmVlKpAkDSZZIOSHol+RrIRu0ukDQkaVjS5jR9mplZNtKeATwBrAcebNRA0jzgS8CFwJnA5ZLOTNmvmZmllPY7gQ8CSJqu2TnAcEQ8m7S9DVgL+IvhzczaqBXXAPqB52rujyTLzMysjWY8A5B0H3BanVXXRsTdTfRR7/Sg4QcQSdoIbARYsmRJE5s3M7O5mDEAIuL8lH2MAItr7p8OjE3T33ZgO1Q/DC5l32Zm1kArhoAeAZZLWibpRGADsKsF/ZqZ2TTSTgO9RNII8C7gHkkDyfJFknYDRMTLwDXAAHAQuCMiDqQr28zM0ko7C+gu4K46y8eAi2ru7wZ2p+nLzMyy5XcCm5mVlAPAzKykHABmZiXlADAzKykHgJlZSTkAzMxKygFgZlZSDgAzs5JyAJiZlZQDwMyspBwAZmYl5QAwMyupVB8GZ9Ypdu4bZdvAEGNHJljU28OmNStYt8pfTGfl5gCwrrdz3yhbduxn4ugxAEaPTLBlx34Ah4CVmoeArOttGxh69eA/aeLoMbYNDLWpIrNicABY1xs7MjGr5WZl4QCwrreot2dWy83KwgFgXW/TmhX0zJ/3umU98+exac2KNlVkVgxpvxP4MkkHJL0iqTJNu/+StF/So5IG0/RpNlvrVvVz3fqV9Pf2IKC/t4fr1q/0BWArvbSzgJ4A1gNfaaLteyLixyn7MwNmP61z3ap+H/DNpkj7pfAHASRlU41ZEzyt0ywbrboGEMC/StoraeN0DSVtlDQoaXB8fLxF5Vkn8bROs2zMeAYg6T7gtDqrro2Iu5vsZ3VEjEl6K3CvpP+MiAfrNYyI7cB2gEqlEk1u30rE0zrNsjFjAETE+Wk7iYix5PdhSXcB5wB1A8BsJot6exitc7D3tE6z2cl9CEjSL0t68+Rt4P1ULx6bzYmndZplI+000EskjQDvAu6RNJAsXyRpd9LsbcB3JD0GfA+4JyL+JU2/Vm6e1mmWDUUUd5i9UqnE4KDfNmBm1ixJeyOi4fuyavmdwGZmJeUAMDMrKQeAmVlJOQDMzErKAWBmVlIOADOzkir0NFBJ48APU2xiAVDETyB1Xc0rYk3gumajiDVB99b19ojoa6ZhoQMgLUmDzc6HbSXX1bwi1gSuazaKWBO4LvAQkJlZaTkAzMxKqtsDYHu7C2jAdTWviDWB65qNItYErqu7rwGYmVlj3X4GYGZmDXR8AEi6TNIBSa9IanjlXNIFkoYkDUvaXLN8maSHJT0t6XZJJ2ZU1ymS7k22e6+kk+u0eY+kR2t+fi5pXbLuJkk/qFl3VitqStodq+l3V83ydu6rsyR9N3muH5f0wZp1me6rRq+VmvUnJf/+4WR/LK1ZtyVZPiRpTZo6ZlnTJyU9meybb0t6e826us9ni+q6UtJ4Tf9X1ay7InnOn5Z0RYvr+kJNTU9JOlKzLpf9JelGSYcl1f0+FFV9Man5cUln16zLZ19FREf/AO8AVgAPAJUGbeYBzwBnACcCjwFnJuvuADYkt28APpZRXX8DbE5ubwY+P0P7U4AXgF9K7t8EXJrxvmqqJuCnDZa3bV8BvwosT24vAg4BvVnvq+leKzVtPg7ckNzeANye3D4zaX8SsCzZzrwW1fSemtfOxyZrmu75bFFdVwLXN3i9P5v8Pjm5fXKr6prS/o+BG1uwv34HOBt4osH6i4BvAQLOBR7Oe191/BlARByMiJm+DfwcYDgino2IXwC3AWslCXgvcGfS7mZgXUalrU221+x2LwW+FRE/y6j/LGp6Vbv3VUQ8FRFPJ7fHgMNAU292maW6r5Vp6r0T+N1k/6wFbouIlyLiB8Bwsr3ca4qI+2teOw8Bp2fQb+q6prEGuDciXoiI/wXuBS5oU12XA7dm1HdDUf0e9BemabIWuCWqHgJ6JS0kx33V8QHQpH7guZr7I8myU4EjEfHylOVZeFtEHAJIfr91hvYbOP5F+NfJqeAXJJ3UwpreKGlQ0kOTQ1IUaF9JOofqX3bP1CzOal81eq3UbZPsjxep7p9mHptXTbU+SvUvyUn1ns8sNFvXB5Ln5k5Ji2f52DzrIhkqWwbsqVmc1/6aSaO6c9tXM34pfBFIug84rc6qayPi7mY2UWdZTLM8dV3NbiPZzkJgJTBQs3gL8D9UD3TbgU8Bn2tRTUsiYkzSGcAeSfuBn9Rp16599Y/AFRHxSrJ4TvuqURd1lk39d+byeppG09uV9CGgAry7ZvFxz2dEPFPv8TnU9U3g1oh4SdLVVM+c3tvkY/Osa9IG4M6IOFazLK/9NZNWv646IwAi4vyUmxgBFtfcPx0Yo/p5G72STkj+kptcnrouST+StDAiDiUHrcPTbOr3gbsi4mjNtg8lN1+S9A/AX7SqpmSIhYh4VtIDwCrgG7R5X0n6FeAe4K+SU+TJbc9pXzXQ6LVSr82IpBOAt1A9tW/msXnVhKTzqQbquyPipcnlDZ7PLA5oM9YVEc/X3P0q8Pmax5435bEPZFBTU3XV2AB8onZBjvtrJo3qzm1flWUI6BFguaqzWE6k+qTviuoVlvupjr8DXAE0c0bRjF3J9prZ7nFjkMmBcHLsfR1Qd+ZA1jVJOnlyCEXSAmA18GS791XyvN1FdYz061PWZbmv6r5Wpqn3UmBPsn92ARtUnSW0DFgOfC9FLU3XJGkV8BXg4og4XLO87vOZQU3N1rWw5u7FwMHk9gDw/qS+k4H38/oz4FzrSmpbQfWi6ndrluW5v2ayC/hwMhvoXODF5I+b/PZVHle7W/kDXEI1IV8CfgQMJMsXAbtr2l0EPEU1ya+tWX4G1f+kw8DXgZMyqutU4NvA08nvU5LlFeDva9otBUaBN0x5/B5gP9WD2T8Bb2pFTcBvJ/0+lvz+aBH2FfAh4CjwaM3PWXnsq3qvFapDShcnt9+Y/PuHk/1xRs1jr00eNwRcmOHrfKaa7kte/5P7ZtdMz2eL6roOOJD0fz/wazWP/aNkHw4DH2llXcn9zwJbpzwut/1F9Y+8Q8nreITqtZqrgauT9QK+lNS8n5pZjXntK78T2MyspMoyBGRmZlM4AMzMSsoBYGZWUg4AM7OScgCYmZWUA8DMrKQcAGZmJeUAMDMrqf8HSS6970LAP9sAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.scatter(c1['x0'],c1['x1'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "_cell_guid": "5897852e-5c53-468f-b8d5-c6b5d38be6ca", "_uuid": "d0a8d18d903cc3b807fb5246e598632294a98875" }, "outputs": [], "source": [ "def avg(cluster):\n", " if len(cluster) < 0:\n", " return\n", " current_sum = cluster[0]\n", " for i in range(1,len(cluster)):\n", " current_sum = np.add(current_sum , cluster[i])\n", " # Divide by total samples\n", " for k in range(len(current_sum)):\n", " current_sum[k] = current_sum[k]/len(cluster)\n", " return current_sum" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "_cell_guid": "ed08a12f-2e46-4a07-9db7-c393d0672d54", "_uuid": "b51a1eec719d2c1f67aa6f3bdc294410ecc5a1d5" }, "outputs": [], "source": [ "def mean_distance(clusters ,cluster_num):\n", " print('first cluster | ','second cluster | ', 'distance')\n", " while len(clusters) is not cluster_num:\n", " # Clustering (\n", " closest_distance=clust_1=clust_2 = math.inf\n", " # for every cluster (until second last element)\n", " for cluster_id, cluster in enumerate(clusters[:len(clusters)]): \n", " cluster_avg = avg(cluster)\n", " for cluster2_id, cluster2 in enumerate(clusters[(cluster_id+1):]): \n", " cluster2_avg = avg (cluster2)\n", " if distance.euclidean(cluster_avg,cluster2_avg) < closest_distance:\n", " closest_distance = distance.euclidean(cluster_avg,cluster2_avg)\n", " clust_1 = cluster_id\n", " clust_2 = cluster2_id+cluster_id+1\n", " print(clust_1,' | ',clust_2, ' | ',closest_distance)\n", " clusters[clust_1].extend(clusters[clust_2]) \n", " clusters.pop(clust_2) \n", " return(clusters)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "_cell_guid": "fcf579fe-4e4a-4d69-a92e-04119037ffa4", "_uuid": "7ac6575fef403ea9354afa2956c82a954f78f1b1" }, "outputs": [], "source": [ "### Hierarchical clustering\n", "def hierarchical(data, cluster_num, metric = 'mean'):\n", " # initialization of clusters at first (every point is a cluster)\n", " init_clusters=[]\n", " for index, row in data.iterrows():\n", " init_clusters.append([[row['x0'], row['x1']]])\n", " if metric is 'mean':\n", " return mean_distance(init_clusters, cluster_num)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "_cell_guid": "d178b9b7-890b-4ad2-a458-bce05c7a09a2", "_uuid": "9d39dffabdd46bb2e36fae611cf78231ef7cabea" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "first cluster | second cluster | distance\n", "2 | 3 | 0.15085042956518227\n", "15 | 16 | 0.15501250939640307\n", "16 | 17 | 0.1679299964569166\n", "13 | 14 | 0.17501291697817623\n", "4 | 5 | 0.19186599490269243\n", "10 | 11 | 0.19888079280176854\n", "5 | 6 | 0.20597708099371148\n", "7 | 8 | 0.2126411284874588\n", "11 | 12 | 0.27650721583963234\n", "3 | 4 | 0.3141928388744722\n", "4 | 5 | 0.31484413939764316\n", "7 | 8 | 0.32516101556436616\n", "5 | 6 | 0.41304544834321805\n", "0 | 1 | 0.47931424457263944\n", "5 | 6 | 0.5402407941042792\n", "3 | 4 | 0.7368074545203777\n", "2 | 4 | 0.7941786051376811\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFY5JREFUeJzt3X+MZWd93/H3J2PGqE0abLyAsWcZo7gpVK2AjlymKM2Q5YexKi8kUBYpXUOJVlDoD7WVYuqqiZAqQ/9oVAoK2RAHNm0xiROHjTB17TUjXGmgHiPzw3aNF2ei2a6LNzgxQaSeePj2j3uW3o7v/Nr7c+a8X9LonnvOc8/z3efePd97nuc596SqkCS1z4+MOwBJ0niYACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUktdNO4AtnLZZZfV7OzsuMOQpD3j/vvv/+OqOrCTshOdAGZnZ1leXh53GJK0ZyT5o52WtQtIklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJoBRWlqCm2/uPErSmE30dQD7ytISHDoEa2swPQ2nTsH8/LijktRingGMyuJi5+C/vt55XFwcd0SSWs4EMCoLC51v/lNTnceFhXFHJKnl7AIalfn5TrfP4mLn4G/3j6QxMwGM0vy8B35JE8MuIElqKROAJLWUCUCSWsoEIEktZQKQpJbqOwEkmUnyhSQPJ3kwyT/tUSZJPpLkdJKvJXlVv/VKkvoziGmgzwD/oqq+kuTHgPuT3FVVD3WVeRNwdfP3t4FfbR4lSWPS9xlAVT1eVV9plv8MeBi4YkOxw8CJ6vgS8Lwkl/dbtyTpwg10DCDJLPBK4MsbNl0BrHY9P8Ozk8T5fRxLspxk+dy5c4MMT5LUZWAJIMmPAr8L/LOq+u7GzT1eUr32U1XHq2ququYOHDgwqPAkSRsMJAEkeQ6dg/9/rqrf61HkDDDT9fxK4Owg6pYkXZhBzAIK8BvAw1X17zcpdhI42swGejXwVFU93m/dkqQLN4hZQK8B/gHw9SQPNOv+FXAQoKo+DtwBXAecBr4PvGsA9UqS+tB3Aqiq/07vPv7uMgW8r9+6JEmD45XAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCGLalJbj55s6jJE0Qbwo/TEtLcOgQrK3B9DScOuVN4SVNDM8AhmlxsXPwX1/vPC4ujjsiSfohE8AwLSx0vvlPTXUeFxbGHZEk/ZBdQMM0P9/p9llc7Bz87f6RNEFMAMM2P++BX9JEsgtIklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSg7op/C1JnkjyjU22LyR5KskDzd+/GUS9kqQLN6jrAD4JfBQ4sUWZe6vq7w2oPklSnwZyBlBVXwSeHMS+JEmjMcoxgPkkX03y+SR/fYT1SpJ6GNVPQXwFeElVfS/JdcDvA1f3KpjkGHAM4ODBgyMKT5LaZyRnAFX13ar6XrN8B/CcJJdtUvZ4Vc1V1dyBAwdGEZ4ktdJIEkCSFyVJs3xNU+93RlG3JKm3gXQBJfk0sABcluQM8EvAcwCq6uPAW4H3JnkG+HPgSFXVIOqWJF2YgSSAqnrHNts/SmeaqCRpQnglsCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklpqIAkgyS1JnkjyjU22J8lHkpxO8rUkrxpEvZKkCzeoM4BPAtdusf1NwNXN3zHgVwdUryTpAg0kAVTVF4EntyhyGDhRHV8Cnpfk8kHULUm6MKMaA7gCWO16fqZZ9yxJjiVZTrJ87ty5kQQnSW00qgSQHuuqV8GqOl5Vc1U1d+DAgSGHJUntNaoEcAaY6Xp+JXB2RHVLknoYVQI4CRxtZgO9Gniqqh4fUd2SpB4uGsROknwaWAAuS3IG+CXgOQBV9XHgDuA64DTwfeBdg6hXknThBpIAquod22wv4H2DqEuSNBheCSxJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLTWQBJDk2iSPJDmd5MYe29+Z5FySB5q/XxhEvZKkC9f3LSGTTAEfA14PnAHuS3Kyqh7aUPQzVfX+fuuTJA3GIM4ArgFOV9VjVbUG3AocHsB+JUlDNIgEcAWw2vX8TLNuo59L8rUktyWZGUC9kqQ+DCIBpMe62vD8D4DZqvqbwN3ApzbdWXIsyXKS5XPnzg0gPElSL4NIAGeA7m/0VwJnuwtU1Xeq6unm6a8Df2uznVXV8aqaq6q5AwcODCA8SVIvg0gA9wFXJ7kqyTRwBDjZXSDJ5V1PrwceHkC9kqQ+9D0LqKqeSfJ+4E5gCrilqh5M8kFguapOAv8kyfXAM8CTwDv7rXcrS6urLK6ssDA7y/yMww2S1EuqNnbXT465ublaXl7e1WuWVlc5dOIEa+vrTE9NceroUZOApNZIcn9Vze2k7L67EnhxZYW19XXWq1hbX2dxZWXcIUnSRNp3CWBhdpbpqSmmEqanpliYnR13SJI0kfoeA5g08zMznDp61DEASdrGvksA0EkCHvglaWv7rgtIkrQzJoBdWlpd5eZ772VpdXX7wpI0wfZlF9CwOMVU0n7iGcAuOMVU0n5iAtgFp5hK2onVpVXuvfleVpcmu6vYLqBdcIqppO2sLq1y4tAJ1tfWmZqe4uipo8zMT+axwgSwS04xlbSVlcUV1tfWqfVifW2dlcWViU0AdgFJ0gDNLswyNT1FpsLU9BSzC7PjDmlTngFIGqrVpVVWFleYXZgd6zfhUcUxMz/D0VNHJ+LfvB0TwITxp6y1n0xKf/io45iZn5noA/95JoAJ4nUG2m8mpT98UuKYNI4BTBCvM9B+Myn94ZMSx6TxDGCCnL/O4PwZgNcZaK+blP7wSYlj0gzkjmBJrgX+A51bQn6iqj60YfvFwAk6N4P/DvD2qlrZbr8Xckewvc4xAEn92M0dwfo+A0gyBXwMeD1wBrgvycmqeqir2LuBP6mqn0hyBPgw8PZ+696PvM5A0qgMYgzgGuB0VT1WVWvArcDhDWUOA59qlm8DDiXJAOqWJF2gQSSAK4DuH7w406zrWaaqngGeAp4/gLolSRdoEAmg1zf5jQMLOynTKZgcS7KcZPncuXN9B6f/x3sZSOo2iFlAZ4DuTusrgbOblDmT5CLgx4Ene+2sqo4Dx6EzCDyA+ITXGEh6tkGcAdwHXJ3kqiTTwBHg5IYyJ4EbmuW3AvfUIKYface8xkDSRn2fAVTVM0neD9xJZxroLVX1YJIPAstVdRL4DeC3kpym883/SL/1ane8xkDSRgO5DmBY2ngdwDB5jYG0/430OgDtHV5jIKmbvwUkSS1lAtBAOMVU2nvsAlLfnGIq7U2eAahvTjGV9iYTgPp2forpVOIUU2kPsQtIfZufmeHU0aNOMZX2GBOABsIpptLeYxeQJLWUCUCSWsoEoD3B6wykwXMMQBPP6wyk4fAMQBPP6wyk4TABaOJ5nYE0HHYBaeJ5nYE0HCYA7QleZyANnl1Aaj1nGKmtPANQqznDSG3W1xlAkkuT3JXk0ebxkk3KrSd5oPnbeMN4aWycYaQ267cL6EbgVFVdDZxqnvfy51X1iubv+j7rlAbGGUaaNKtLq9x7872sLg2/S7LfLqDDwEKz/ClgEfjFPvcpjYwzjDRJVpdWOXHoBOtr60xNT3H01FFm5of3mew3Abywqh4HqKrHk7xgk3LPTbIMPAN8qKp+f7MdJjkGHAM4ePBgn+FJ23OGkSbFyuIK62vr1HqxvrbOyuLKeBNAkruBF/XYdNMu6jlYVWeTvBS4J8nXq+pbvQpW1XHgOMDc3Fztog5pIq0urbKyuMLswuxQ/zNr75tdmGVqeuqHZwCzC7NDrW/bBFBVr9tsW5JvJ7m8+fZ/OfDEJvs42zw+lmQReCXQMwFI+8moT+m1t83Mz3D01NGRfWHodxD4JHBDs3wD8NmNBZJckuTiZvky4DXAQ33WK+0JvU7ppa3MzM/wUx/4qZF8Ueg3AXwIeH2SR4HXN89JMpfkE02ZlwHLSb4KfIHOGIAJQK1w/pQ+UxnJKb20G6ma3G72ubm5Wl5eHncYUl+GOQbg+MJ4THK7J7m/quZ2UtYrgaUhm5mfGcpBwvGF8dhP7e5vAUl7lOML47Gf2t0EIO1RoxhfGOVVqYM0zLj307iOYwDSHjbs8YVhdnUMK/ZRdNE4BiBp7IY1vgDDvSp1mAfpUVxNO8x2HyW7gCT1NMyujmH2o++nLpph8wxAUk/DvCp1mD95MOqrafcyxwAkjcUk96PvZY4BSJp4+6UffS9zDECSWsoEIEktZQKQpJYyAUhSS5kAJKmlTACS1FImAElqKROAJLVUXwkgyduSPJjkB0k2vfIsybVJHklyOsmN/dQpSRqMfs8AvgH8LPDFzQokmQI+BrwJeDnwjiQv77NeSVKf+vopiKp6GCDJVsWuAU5X1WNN2VuBw4A3hpekMRrFGMAVQPdtec406yRJY7TtGUCSu4EX9dh0U1V9dgd19Do92PQnSJMcA44BHDx4cAe7lyRdiG0TQFW9rs86zgDdP/l3JXB2i/qOA8eh83PQfdYtSdrEKLqA7gOuTnJVkmngCHByBPVKkrbQ7zTQtyQ5A8wDn0tyZ7P+xUnuAKiqZ4D3A3cCDwO/XVUP9he2JKlf/c4Cuh24vcf6s8B1Xc/vAO7opy5J0mB5JbAktZQJQJJaygQgSS1lApCkljIBSFJLmQAkqaVMAJLUUiYASWopE4AktZQJQJJaygQgSS1lApCkljIBSFJLmQDUGkurS9x8780srS6NOxRpIvT1c9DSXrG0usShE4dYW19jemqaU0dPMT8zP+6wpLHyDECtsLiyyNr6Guu1ztr6Gosri+MOSRo7E4BaYWF2gempaaYyxfTUNAuzC+MOSRq7vrqAkrwN+GXgZcA1VbW8SbkV4M+AdeCZqprrp15pt+Zn5jl19BSLK4sszC7Y/SPR/xjAN4CfBX5tB2VfW1V/3Gd90gWbn5n3wC916feewA8DJBlMNJKkkRnVGEAB/y3J/UmOjahO7XNO65T6s+0ZQJK7gRf12HRTVX12h/W8pqrOJnkBcFeS/1lVX9ykvmPAMYCDBw/ucPdqG6d1Sv3bNgFU1ev6raSqzjaPTyS5HbgG6JkAquo4cBxgbm6u+q1b+1OvaZ0mAGl3ht4FlOQvJ/mx88vAG+gMHksXzGmdUv/6nQb6FuA/AgeAzyV5oKremOTFwCeq6jrghcDtzUDxRcB/qar/2mfcajmndUr9S9Xk9rLMzc3V8nLPSwskST0kuX+n11p5JbAktZQJQJJaygQgSS1lApCkljIBSFJLmQAkqaUmehpoknPAH13gyy8DJvHXR41rd4xrd4xrd/ZjXC+pqgM7KTjRCaAfSZYn8b4DxrU7xrU7xrU7bY/LLiBJaikTgCS11H5OAMfHHcAmjGt3jGt3jGt3Wh3Xvh0DkCRtbT+fAUiStrCnE0CStyV5MMkPkmw6Yp7k2iSPJDmd5Mau9Vcl+XKSR5N8Jsn0gOK6NMldzX7vSnJJjzKvTfJA19//SfLmZtsnk/xh17ZXjCquptx6V90nu9aPs71ekWSpeb+/luTtXdsG2l6bfV66tl/c/PtPN+0x27XtA836R5K8sZ84LiCuf57koaZ9TiV5Sde2nu/piOJ6Z5JzXfX/Qte2G5r3/dEkN4w4rl/piumbSf60a9tQ2ivJLUmeSNLznijp+EgT89eSvKpr2+Dbqqr27B/wMuAngUVgbpMyU8C3gJcC08BXgZc3234bONIsfxx474Di+nfAjc3yjcCHtyl/KfAk8Jea558E3jqE9tpRXMD3Nlk/tvYC/ipwdbP8YuBx4HmDbq+tPi9dZf4R8PFm+QjwmWb55U35i4Grmv1MjTCu13Z9ht57Pq6t3tMRxfVO4KM9Xnsp8FjzeEmzfMmo4tpQ/h8Dt4ygvf4u8CrgG5tsvw74PBDg1cCXh9lWe/oMoKoerqpHtil2DXC6qh6rqjXgVuBwkgA/A9zWlPsU8OYBhXa42d9O9/tW4PNV9f0B1b+Z3cb1Q+Nur6r6ZlU92iyfBZ6gcyOiQev5edki3tuAQ037HAZuraqnq+oPgdPN/kYSV1V9oesz9CXgygHV3VdcW3gjcFdVPVlVfwLcBVw7prjeAXx6QHVvqjr3Qn9yiyKHgRPV8SXgeUkuZ0httacTwA5dAax2PT/TrHs+8KdV9cyG9YPwwqp6HKB5fME25Y/w7A/fv21OAX8lycUjjuu5SZaTfOl8txQT1F5JrqHzre5bXasH1V6bfV56lmna4yk67bOT1w4zrm7vpvNN8rxe7+ko4/q55v25LcnMLl87zLhousquAu7pWj2s9trOZnEPpa36uiXkKCS5G3hRj003VdVnd7KLHutqi/V9x7XTfTT7uRz4G8CdXas/APxvOge548AvAh8cYVwHq+pskpcC9yT5OvDdHuXG1V6/BdxQVT9oVl9we/Wqose6jf/OoXymtrHjfSf5eWAO+Omu1c96T6vqW71eP4S4/gD4dFU9neQ9dM6efmaHrx1mXOcdAW6rqvWudcNqr+2M9LM18Qmgql7X5y7OADNdz68EztL5nY3nJbmo+RZ3fn3fcSX5dpLLq+rx5oD1xBa7+vvA7VX1F137frxZfDrJbwL/cpRxNV0sVNVjSRaBVwK/y5jbK8lfAT4H/Ovm9Pj8vi+4vXrY7PPSq8yZJBcBP07ntH4nrx1mXCR5HZ2k+tNV9fT59Zu8p4M4oG0bV1V9p+vprwMf7nrtwobXLg4gph3F1eUI8L7uFUNsr+1sFvdQ2qoNXUD3AVenM4Nlms6bfbI6IytfoNP/DnADsJMzip042exvJ/t9Vt9jcxA83+/+ZqDnjIFhxJXkkvNdKEkuA14DPDTu9mreu9vp9I/+zoZtg2yvnp+XLeJ9K3BP0z4ngSPpzBK6Crga+B99xLKruJK8Evg14PqqeqJrfc/3dIRxXd719Hrg4Wb5TuANTXyXAG/g/z8THmpcTWw/SWdQdalr3TDbazsngaPNbKBXA081X3CG01bDGOke1R/wFjqZ8Wng28CdzfoXA3d0lbsO+CadDH5T1/qX0vkPehr4HeDiAcX1fOAU8GjzeGmzfg74RFe5WeB/AT+y4fX3AF+ncyD7T8CPjiou4O80dX+1eXz3JLQX8PPAXwAPdP29Yhjt1evzQqdL6fpm+bnNv/900x4v7XrtTc3rHgHeNODP+3Zx3d38PzjfPie3e09HFNfNwINN/V8A/lrXa/9h046ngXeNMq7m+S8DH9rwuqG1F50ve483n+UzdMZq3gO8p9ke4GNNzF+na3bjMNrKK4ElqaXa0AUkSerBBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS11P8FYHbV+CJZGPQAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clusters = hierarchical(c1,4)\n", "colors = ['red', 'green', 'purple', 'teal']\n", "for cluster_index, cluster in enumerate(clusters):\n", " for point_index, point in enumerate(cluster):\n", " plt.plot([point[0]], [point[1]], marker='o', markersize=3, color=colors[cluster_index])" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "37c15a51-988b-4e19-bff4-772e895ae373", "_uuid": "a45109da7f08f18942887a6342cccb7102c8562a" }, "source": [ "> # Validation " ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "aefde9ae-a790-47ea-ad9f-bc32bf0f35c9", "_uuid": "f85cdfc72ea66fb272725ed9fcae98cd13aa2ecb" }, "source": [ "Credit to [https://joernhees.de/blog/2015/08/26/scipy-hierarchical-clustering-and-dendrogram-tutorial/](https://joernhees.de/blog/2015/08/26/scipy-hierarchical-clustering-and-dendrogram-tutorial/) for this Validation portion" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "_cell_guid": "ff676b94-11b6-4cc2-a275-205a7940d37d", "_uuid": "8e48fe89504b6ef77441535a9b230673dac14794" }, "outputs": [], "source": [ "X = c1.as_matrix()\n", "# generate the linkage matrix\n", "mean_link = linkage(X, 'average') # using single link metric to evaluate 'distance' between clusters" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "c0162828-06ec-4f46-ba05-28bb8969dbb9", "_uuid": "acf53a1520f601fdee9af85c9560c52d764f63ba" }, "source": [ " As you can see there's a lot of choice here and while python and scipy make it very easy to do the clustering, it's you who has to understand and make these choices.. This compares the actual pairwise distances of all your samples to those implied by the hierarchical clustering. \n", " > The closer the value is to `1`, the better the clustering preserves the original distances, which in our case is reasonably close:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "_cell_guid": "942dbb47-8294-4c31-823c-2178063886ee", "_uuid": "6802058ae18b8002ce53f49d3c1410cf69453fac" }, "outputs": [], "source": [ "from scipy.cluster.hierarchy import cophenet\n", "from scipy.spatial.distance import pdist" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "_cell_guid": "894af22d-3b90-46ca-9cea-fa9a66d4bd9c", "_uuid": "c69eed9cdc59a4aa5aea86c63baec10c9aa3587f" }, "outputs": [ { "data": { "text/plain": [ "0.86708962296996617" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c, coph_dists = cophenet(mean_link, pdist(X))\n", "c" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "fda7583d-1474-47c5-b2ed-b93e6f1b5e1d", "_uuid": "cb8092526b39fcd25d49c25a3446fcbfbcda79e3" }, "source": [ "No matter what method and metric you pick, the linkage() function will use that method and metric to calculate the distances of the clusters (starting with your n individual samples (aka data points) as singleton clusters)) and in each iteration will merge the two clusters which have the smallest distance according the selected method and metric. It will return an array of length `n - 1` giving you information about the `n - 1` cluster merges which it needs to pairwise merge n clusters. `mean_link[i]` will tell us which clusters were merged in the i-th iteration, let's take a look at the first two points that were merged:" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "_cell_guid": "5d04913a-12fb-47a0-8952-a908b6d013dd", "_uuid": "a284d47c714e72d37e8c55dd0088dc066e797175" }, "outputs": [ { "data": { "text/plain": [ "array([ 2. , 3. , 0.15085, 2. ])" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_link[0]" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "08cdbd35-4914-4917-94b2-dcd33fafdf21", "_uuid": "b98b22bb6ec6f5f0d4c419cc6b21acb67a523be9" }, "source": [ "In its first iteration the linkage algorithm decided to merge the two clusters with indices `2` and `3`, as they only had a distance of `0.15085`. This created a cluster with a total of `2` samples. \n", "> We can see that each row of the resulting array has the format `[idx1, idx2, dist, sample_count].`\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "_cell_guid": "e2716e17-9819-4507-9974-61c8df7d8b59", "_uuid": "ded6e0655cb223daf3002b108878cb38598d6254" }, "outputs": [ { "data": { "text/plain": [ "array([ 16. , 17. , 0.15501, 2. ])" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_link[1]" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "27543935-ec8e-4820-acbf-3f638af6a7ae", "_uuid": "334e640a4e9470a106efed5ae0e7990c5885bbe9" }, "source": [ "In the second iteration the algorithm decided to merge the clusters (original samples here as well) with indices `16` and `17`, which had a distance of `0.15501`. This again formed another cluster with a total of `2` samples.\n", "\n", "The indices of the clusters until now correspond to our samples. Remember that we had a total of 21 samples, so indices `0` to `20`. Let's have a look at the first `20` iterations:" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "_cell_guid": "ff96fc25-6429-4540-bc95-a47246d66e58", "_uuid": "e9e7269195fd0c7f050d77362f2925b4f380f2f3" }, "outputs": [ { "data": { "text/plain": [ "array([[ 2. , 3. , 0.15085, 2. ],\n", " [ 16. , 17. , 0.15501, 2. ],\n", " [ 18. , 19. , 0.16793, 2. ],\n", " [ 14. , 15. , 0.17501, 2. ],\n", " [ 5. , 6. , 0.19187, 2. ],\n", " [ 12. , 13. , 0.19888, 2. ],\n", " [ 7. , 8. , 0.20598, 2. ],\n", " [ 10. , 11. , 0.21264, 2. ],\n", " [ 20. , 23. , 0.27691, 3. ],\n", " [ 9. , 27. , 0.31485, 3. ],\n", " [ 4. , 25. , 0.31538, 3. ],\n", " [ 22. , 24. , 0.32588, 4. ],\n", " [ 26. , 28. , 0.41308, 4. ],\n", " [ 0. , 1. , 0.47931, 2. ],\n", " [ 29. , 32. , 0.54894, 7. ],\n", " [ 30. , 33. , 0.73684, 7. ],\n", " [ 31. , 35. , 0.8642 , 10. ],\n", " [ 36. , 37. , 1.26468, 17. ],\n", " [ 21. , 38. , 1.39423, 19. ],\n", " [ 34. , 39. , 2.58 , 21. ]])" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_link[:20]" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "0e71367a-c7a0-49e5-866b-35ff2dc0b84f", "_uuid": "1e33e53f1098a7add3aea9053b955e8ac2175862" }, "source": [ "We can observe the monotonic increase of the distance. This is also similar to the results we had with our run. Note that out algorithm with `4` clusters ends at the distance of `0.79` ish, whereas the above information pertains to finishing with `1` cluster.\n", "\n", "Our Output:\n", "\n", "`first cluster | second cluster | distance\n", "2 | 3 | 0.15085042956518227\n", "15 | 16 | 0.15501250939640307\n", "16 | 17 | 0.1679299964569166\n", "13 | 14 | 0.17501291697817623\n", "4 | 5 | 0.19186599490269243\n", "10 | 11 | 0.19888079280176854\n", "5 | 6 | 0.20597708099371148\n", "7 | 8 | 0.2126411284874588\n", "11 | 12 | 0.27650721583963234\n", "3 | 4 | 0.3141928388744722\n", "4 | 5 | 0.31484413939764316\n", "7 | 8 | 0.32516101556436616\n", "5 | 6 | 0.41304544834321805\n", "0 | 1 | 0.47931424457263944\n", "5 | 6 | 0.5402407941042792\n", "3 | 4 | 0.7368074545203777\n", "2 | 4 | 0.7941786051376811`" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "c23918b0-4359-45ea-a680-7a9caa57b9bd", "_uuid": "648166977529ccb31d4d301655f5f76c13bf55f8" }, "source": [ "## Dendogram\n", "> A dendrogram is a visualization in form of a tree showing the order and distances of merges during the hierarchical clustering." ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "_cell_guid": "551e1655-5fca-4ee5-8496-0bca0f88e4f9", "_uuid": "3f11903f03aa1dd062f415abf2818643984c2520" }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAABakAAAJdCAYAAAA4HvtLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAIABJREFUeJzs3XuU5HdZ5/HPAxMN2IGoGQMJCRHBGyt2IIuw3mZXRIggyKLCiBxAHVRY8Yi6gIjgLqxyVlYQNA6KgDIIcg1HOKLAiIggIdNcQrwEFBMGhlzIZcItgWf/qF9Dp+2Z6SRT8+2Zfr3O6dNdVb/61dM1BZm865tvVXcHAAAAAABGuMXoAQAAAAAA2LxEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQA2saq6oKq2bYA5zqiqrqotB7j9KVX1h/N8jHXc/+lV9ac3Z4bDpar2V9WdRs9xOEx/JncePQcAAOOI1AAAx6iq+requs+q6x5VVe9Yvtzdd+3u3Ud8uBupu5/V3T8178epqu1Vdd4UgT9eVW+qqu86jOe/WaF8WXcvdPdHDtdcy6YQf11VXTN9/XNVPb+qbn+4HwsAAJaJ1AAA3Gg3JbJW1S3nMcvhUlW/mOR3kjwryclJTk/ye0keNHKulW5u3F6nV3T3CUm+JskPJ7ldkveOCNWH8zVTM/79BwBgA/KXNACATWzlauuqukVVPamqPlxVl1fVK6vqa6bbllcA/2RV/XuSt07X/3lVfaKqrqqqt1fVXVec+8VV9ftV9caqujbJf62qW1XVb1fVR6f7vKOqbrVipB+vqn+vqsuq6ldXnOsGW21U1XdV1Tur6sqquriqHjVd/4NVtaeqrp6uf/o6n4fbJvmNJI/r7td097XdfV13v6G7f3mN47dV1SUHeS7vOa3Ivrqq9lXVc6bD3j59v3JarX3v6fjHVNWFVfWpqvrLqrrjivN2VT2uqv4lyb+suO7OK57nF1TVX0yrn99dVd+w4v73rap/mp7v36uqv6mqQ65Kn37/C5L8WJJLkzxxxTkfUFVL0/P/zqq626rn4Zeq6v3TY76iqo5fcfsvT6vU91bVY1Y9h2u9Zm5bVS+tqkun181Tl2NzVd1yej1dVlX/WlWPrxUr1atqd1U9s6r+Lsmnk9ypqh49PdfXVNVHquqxq/9cq+pXquqT05wPrqqza7aq/IqqesqhnjsAAG4ckRoAgGU/n+TBSb43ySlJPpXkBauO+d4k35LkB6bLb0pylyRfl+T8JC9bdfz2JM9MckKSdyT5v0nukeS/ZLZS91eSfHHF8d+V5JuSfF+Sp1XVt6wesqpOnx73d5NsTbKYZGm6+dokj0xyYpIfTPKzVfXgdfzu905yfJLXruPY9Xhukud2922SfEOSV07Xf8/0/cRpy46/n+Z7SpKHZPb7/G2Sl68634OTfEeSbz3A4z08yTOSfHWSizJ7zlNVJyV5VZInJ/naJP+U2XO/bt39hSSvT/Ld0znvnuRFSR47nfMPkpxbVV+54m4/muR+Sb4+yd2SPGq67/2S/FKS78/sdXOD7Wgmq18zv5vktknulNnr75FJHj0d+9NJ7p/Za+DumT1Pq/1Ekh3T+T6a5JNJHpDkNtN5/t/0Oy27XWavhVOTPC3JC5M8IrPX7Xdn9ro8JvYDBwDYKERqAIBj2+um1a5XVtWVmW1fcSCPTfKr3X1Jd38uydOTPLRuuMXE06dVxp9Jku5+UXdfs+L4b59WJS97fXf/XXd/McnnkzwmyRO6+2Pd/YXufud032XP6O7PdPf7krwvybevMeePJ/nr7n75tNr38u5emubZ3d0f6O4vdvf7M4u937uO5+lrk1zW3dev49j1uC7JnavqpO7e393vOsixj03yf7r7wunxn5VkceVq6un2K5af9zW8prv/Ybr/yzKLtklydpILptXh1yd5XpJP3ITfZ29mbyokszD8B9397unP8CVJPpfkXiuOf1537+3uK5K8YcU8P5rkj7v7g919bWavmdVWvmauy2wl95On19m/JfntzMLz8vmeO71mP5XkN9c434u7+4Luvn56vfxFd3+4Z/4myZszBfjJdUme2d3XJfmzJCdNj3HNtLL8gszCOwAAh4lIDQBwbHtwd5+4/JXk5w5y7B2TvHZF0L4wyRcy25952cXLP0xbLfxmzbYHuTrJv003nbTW8dP1xyf58EFmWBlQP51kYY1jTjvQOarqO6rqbdPWEFcl+ZlV8xzI5UlOqsO35/NPJvnGJP9YVe+pqgcc5Ng7Jnnuiuf9iiSV2UreZRevec8vO9DzdsrK+3Z3J7nBNiXrdOo01/K8T1z15sdp02PdqHkyW9m82urXzFesOu6j+fJzs/p8az1PN7iuqu5fVe+atu64MrOQv/I1cvm0ejxJlt8U2Lfi9s9k7dclAAA3kUgNAMCyi5Pcf2XU7u7ju/tjK47pFT9vz+xDBe+T2XYMZ0zX1wGOvyzJZzPb/uLmznmgc+xKcm6S07r7tknOWTXPgfz9NNt6tgZJZtuK3Hr5Qs0+4G/r8uXu/pfufnhm26D8VpJXVdVX5YbPx7KLkzx21fN+q+5+54pj1rrfenw8yR1WzFkrL6/HtP/zAzPbhmR53meumvfW3b16i5IDzXPaisunr3HM6tfMdZmF8ZX3WX5N3uD3W3Xu/3C+aUuSV2e27czJ0xs3b8z6XiMAAMyJSA0AwLJzkjxzeZuJqtpaVQ86yPEnZLbNw+WZBdtnHezk0/YNL0rynKo6ZVqJfe9Vexmvx8uS3KeqfrSqtlTV11bV8nYSJyS5ors/W1X3zCykH1J3X5XZ/sMvmD4o79ZVddy06vbZa9zln5McX7MPajwuyVOTfOn3qKpHVNXW6Xe+crr6C5l9AOEXM9tfedk5SZ5c04dOTh8U+CPrfC4O5S+SfNv0O21J8rjM9lw+pOn3/5bMtky5XZLlD398YZKfmVatV1V91fQ8nLCO074yyaOq6lur6tZJfv1gB08rml+Z2evyhOm1+YtJlj9E85VJnlBVp1bViUn+5yEe/ysy+3O6NMn1VXX/JPddx9wAAMyRSA0AwLLnZrYK+c1VdU2Sd2X2YX0H8tLMtl74WJIPTccfyi8l+UCS92S2fcRv5Ub+nbS7/z2zLRqeOJ1jKV/eu/rnkvzGNP/T8uUPLFzPeZ+TWQB9amYR8+Ikj0/yujWOvWp6rD/M7Pe/NjfcRuN+SS6oqv2ZPa8P6+7PdvenM/tQwL+btsq4V3e/NrPn4c+mbVM+mNmHAd5s3X1Zkh9J8uzM3kz41iTnZfbmwoH82DT3lZm9Hi5Pco/u3jud87zM9qV+fmYfrnlRpg9GXMc8b0ryO0neOt3vreu42//I7Pn9SGYfpLgrszc7klkwf3OS9yfZk9mq6Osze0Ngrce/JrMPCH3lNPv26XcEAGCgmm1LBwAAHOumrTsuSfLj3f220fMcbtPK6HO6+46HPBgAgA3DSmoAADiGVdUPVNWJ07YqT8ls/+X1rHrf8KrqVlV19rTty6mZbR/y2tFzAQBw44jUAABwbLt3kg9n9iGED0zy4O7+zNiRDptK8ozMtu7Yk+TCzLZ5AQDgKGK7DwAAAAAAhrGSGgAAAACAYURqAAAAAACG2TJ6gBvrpJNO6jPOOGP0GAAAAAAAHMR73/vey7p766GOO+oi9RlnnJHzzjtv9BgAAAAAABxEVX10PcfZ7gMAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYZsvoAbhpdu5Mdu0aPQUAG9327cmOHaOnAAAAgAOzkvootWtXsrQ0egoANrKlJW9oAgAAsPFZSX0UW1xMdu8ePQUAG9W2baMnAAAAgEOzkhoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhplbpK6q06rqbVV1YVVdUFVPWOOYbVV1VVUtTV9Pm9c8AAAAAABsPFvmeO7rkzyxu8+vqhOSvLeq/qq7P7TquL/t7gfMcQ4AAAAAADaoua2k7u6Pd/f508/XJLkwyanzejwAAAAAAI4+R2RP6qo6I8mZSd69xs33rqr3VdWbququR2IeAAAAAAA2hnlu95EkqaqFJK9O8gvdffWqm89Pcsfu3l9VZyd5XZK7rHGOHUl2JMnpp58+54kBAAAAADhS5rqSuqqOyyxQv6y7X7P69u6+urv3Tz+/MclxVXXSGsft7O6zuvusrVu3znNkAAAAAACOoLlF6qqqJH+U5MLufs4BjrnddFyq6p7TPJfPayYAAAAAADaWeW738Z1JfiLJB6pqabruKUlOT5LuPifJQ5P8bFVdn+QzSR7W3T3HmQAAAAAA2EDmFqm7+x1J6hDHPD/J8+c1AwAAAAAAG9tc96QGAAAAAICDEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYJi5ReqqOq2q3lZVF1bVBVX1hDWOqap6XlVdVFXvr6q7z2seAAAAAAA2ni1zPPf1SZ7Y3edX1QlJ3ltVf9XdH1pxzP2T3GX6+o4kvz99BwAAAABgE5jbSuru/nh3nz/9fE2SC5OcuuqwByV5ac+8K8mJVXX7ec0EAAAAAMDGckT2pK6qM5KcmeTdq246NcnFKy5fkv8YsgEAAAAAOEbNPVJX1UKSVyf5he6+evXNa9yl1zjHjqo6r6rOu/TSS+cxJgAAAAAAA8w1UlfVcZkF6pd192vWOOSSJKetuHyHJHtXH9TdO7v7rO4+a+vWrfMZFgAAAACAI25ukbqqKskfJbmwu59zgMPOTfLImrlXkqu6++PzmgkAAAAAgI1lyxzP/Z1JfiLJB6pqabruKUlOT5LuPifJG5OcneSiJJ9O8ug5zgMAAAAAwAYzt0jd3e/I2ntOrzymkzxuXjMAAAAAALCxzf2DEwEAAAAA4EBEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGCYuUXqqnpRVX2yqj54gNu3VdVVVbU0fT1tXrMAAAAAALAxbZnjuV+c5PlJXnqQY/62ux8wxxkAAAAAANjA5raSurvfnuSKeZ0fAAAAAICj3+g9qe9dVe+rqjdV1V0HzwIAAAAAwBE2z+0+DuX8JHfs7v1VdXaS1yW5y1oHVtWOJDuS5PTTTz9yEwIAAAAAMFfDVlJ399XdvX/6+Y1Jjquqkw5w7M7uPqu7z9q6desRnRMAAAAAgPkZFqmr6nZVVdPP95xmuXzUPAAAAAAAHHlz2+6jql6eZFuSk6rqkiS/nuS4JOnuc5I8NMnPVtX1ST6T5GHd3fOaBwAAAACAjWdukbq7H36I25+f5PnzenwAAAAAADa+Ydt9AAAAAACASA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADDMuiJ1VX1jVb2lqj44Xb5bVT11vqMBAAAAAHCsW+9K6hcmeXKS65Kku9+f5GHzGgoAAAAAgM1hvZH61t39D6uuu/5wDwMAAAAAwOay3kh9WVV9Q5JOkqp6aJKPz20qAAAAAAA2hS3rPO5xSXYm+eaq+liSf03yiLlNBQAAAADAprCuSN3dH0lyn6r6qiS36O5r5jsWAAAAAACbwbq2+6iqZ1XVid19bXdfU1VfXVX/e97DAQAAAABwbFvvntT37+4rly9096eSnD2fkQAAAAAA2CzWG6lvWVVfuXyhqm6V5CsPcjwAAAAAABzSej848U+TvKWq/jhJJ3lMkpfMbSoAAAAAADaF9X5w4rOr6gNJvi9JJflf3f2Xc50MWLedO5Ndu0ZPAWw0S0uz79u2DR0D2IC2b0927Bg9BQAAzKx3JXW6+01J3jTHWYCbaNeuWYxaXBw9CbCR+P8EYC3Lb2CJ1AAAbBTritRV9ZAkv5Xk6zJbSV1JurtvM8fZgBthcTHZvXv0FADARue/rgAAYKNZ70rqZyd5YHdfOM9hAAAAAADYXG6xzuP2CdQAAAAAABxu611JfV5VvSLJ65J8bvnK7n7NXKYCAAAAAGBTWG+kvk2STye574rrOolIDQAAAADATbauSN3dj573IAAAAAAAbD7ritRVdXySn0xy1yTHL1/f3Y+Z01wAAAAAAGwC6/3gxD9JcrskP5Dkb5LcIck18xoKAAAAAIDNYb2R+s7d/WtJru3ulyT5wSTfNr+xAAAAAADYDNYbqa+bvl9ZVf8pyW2TnDGXiQAAAAAA2DTWtSd1kp1V9dVJnprk3CQLSX5tblMBAAAAALAprDdSv6W7P5Xk7UnulCRV9fVzmwoAAAAAgE1hvdt9vHqN6151OAcBAAAAAGDzOehK6qr65iR3TXLbqnrIiptuk+T4eQ4GAAAAAMCx71DbfXxTkgckOTHJA1dcf02Sn57XUAAAAAAAbA4HjdTd/fokr6+qe3f33x+hmQAAAAAA2CTWuyf1D1fVbarquKp6S1VdVlWPmOtkAAAAAAAc89Ybqe/b3VdntvXHJUm+Mckvz20qAAAAAAA2hfVG6uOm72cneXl3XzGneQAAAAAA2EQO9cGJy95QVf+Y5DNJfq6qtib57PzGAgAAAABgM1jXSuruflKSeyc5q7uvS3JtkgfNczAAAAAAAI59B11JXVX/rbvfWlUPWXHdykNeM6/BAAAAAAA49h1qu4/vSfLWJA9M0klq1XeRGgAAAACAm+xQkfqaqvrFJB/Ml+N0pp8BAAAAAOBmOVSkXpi+f1OS/5zk9ZmF6gcmefsc5wIAjrCdO5Ndu0ZPAczb0tLs+7ZtQ8cAjoDt25MdO0ZPAQCHdtBI3d3PSJKqenOSu3f3NdPlpyf587lPBwAcMbt2zeLV4uLoSYB58r9x2ByW35ASqQE4GhxqJfWy05N8fsXlzyc547BPAwAMtbiY7N49egoA4ObyX0sAcDRZb6T+kyT/UFWvzWw/6h9O8pK5TQUAAAAAwKawrkjd3c+sqjcl+e7pqkd39575jQUAAAAAwGaw3pXU6e7zk5w/x1kAAAAAANhkbjF6AAAAAAAANi+RGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhGpAYAAAAAYBiRGgAAAACAYURqAAAAAACGEakBAAAAABhmbpG6ql5UVZ+sqg8e4PaqqudV1UVV9f6quvu8ZgEAAAAAYGOa50rqFye530Fuv3+Su0xfO5L8/hxnAQAAAABgA5pbpO7utye54iCHPCjJS3vmXUlOrKrbz2seAAAAAAA2npF7Up+a5OIVly+ZrgMAAAAAYJMYGalrjet6zQOrdlTVeVV13qWXXjrnsQAAAAAAOFJGRupLkpy24vIdkuxd68Du3tndZ3X3WVu3bj0iwwEAAAAAMH8jI/W5SR5ZM/dKclV3f3zgPAAAAAAAHGFb5nXiqnp5km1JTqqqS5L8epLjkqS7z0nyxiRnJ7koyaeTPHpeswAAAAAAsDHNLVJ398MPcXsnedy8Hh8AAAAAgI1v5HYfAAAAAABsciI1AAAAAADDiNQAAAAAAAwjUgMAAAAAMIxIDQAAAADAMCI1AAAAAADDiNQAAAAAAAwjUgMAAAAAMIxIDQAAAADAMCI1AAAAAADDiNQAAAAAAAwjUgMAAAAAMIxIDQAAAADAMCI1AAAAAADDiNQAAAAAAAwjUgMAAAAAMIxIDQAAAADAMCI1AAAAAADDiNQAAAAAAAwjUgMAAAAAMIxIDQAAAADAMFtGDwAAAHAs2btzb/bt2jd6DDa5/Ut3TpLs2XbR4EnY7E7efnJO2XHK6DGADU6kBgAAOIz27dqX/Uv7s7C4MHoUNrEXLorTjLd/aX+SiNTAIYnUAAAAh9nC4kLO3H3m6DEAhtqzbc/oEYCjhD2pAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGFEagAAAAAAhhGpAQAAAAAYRqQGAAAAAGAYkRoAAAAAgGG2jB4AgI1p59692bVv3+gxOIKW9t85SbJtz0WDJ+FI2X7yydlxyimjxwAAADY5kRqANe3aty9L+/dncWFh9CgcIYsvFKc3k6X9+5NEpAYAAIYTqQE4oMWFhew+88zRYwBzsG3PntEjAAAAJLEnNQAAAAAAA4nUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDB1u49lAAAUQUlEQVQiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADCMSA0AAAAAwDAiNQAAAAAAw4jUAAAAAAAMI1IDAAAAADDMltEDAAAAAMe+vTv3Zt+ufaPH4Ajav7Q/SbJn257Bk3CknLz95Jyy45TRY3AUspIaAAAAmLt9u/Z9KVqyOSwsLmRhcWH0GBwh+5f2eyOKm8xKagAAAOCIWFhcyJm7zxw9BjAHVsxzc1hJDQAAAADAMCI1AAAAAADDiNQAAAAAAAwz10hdVferqn+qqouq6klr3P6oqrq0qpamr5+a5zwAAAAAAGwsc/vgxKq6ZZIXJPn+JJckeU9VndvdH1p16Cu6+/HzmgMAAAAAgI1rniup75nkou7+SHd/PsmfJXnQHB8PAAAAAICjzDwj9alJLl5x+ZLputX+e1W9v6peVVWnzXEeAAAAAAA2mHlG6lrjul51+Q1JzujuuyX56yQvWfNEVTuq6ryqOu/SSy89zGMCAAAAADDKPCP1JUlWroy+Q5K9Kw/o7su7+3PTxRcmucdaJ+rund19VneftXXr1rkMCwAAAADAkTfPSP2eJHepqq+vqq9I8rAk5648oKpuv+LiDyW5cI7zAAAAAACwwWyZ14m7+/qqenySv0xyyyQv6u4Lquo3kpzX3ecm+fmq+qEk1ye5Ismj5jXP4bLzvTuz6wO7Ro+RpU/8TpJk24t/Yegc279te3bcY8fQGQAAAACAo9fcInWSdPcbk7xx1XVPW/Hzk5M8eZ4zHG67PrArS59YyuLtFofOsfiksXE6SZY+sZQkIjUAAAAAcJPNNVIfqxZvt5jdj9o9eozhtr142+gRAAAAAICj3Dz3pAYAAAAAgIMSqQEAAAAAGEakBgAAAABgGJEaAAAAAIBhRGoAAAAAAIYRqQEAAAAAGEakBgAAAABgGJEaAAAAAIBhRGoAAAAAAIYRqQEAAAAAGEakBgAAAABgmC2jBwAAOJJ27t2bXfv2jR5juKX9+5Mk2/bsGTzJeNtPPjk7Tjll9BgAALBpWUkNAGwqu/bt+1Kg3cwWFxayuLAweozhlvbv96YFAAAMZiU1ALDpLC4sZPeZZ44egw3ASnIAABjPSmoAAAAAAIYRqQEAAAAAGEakBgAAAABgGJEaAAAAAIBhRGoAAAAAAIYRqQEAAAAAGEakBgAAAABgGJEaAAAAAIBhRGoAAAAAAIYRqQEAAAAAGEakBgAAAABgGJEaAAAAAIBhRGoAAAAAAIYRqQEAAAAAGGbL6AEAAAAA4Gi1d+fe7Nu1b/QYw+1f2p8k2bNtz+BJNoaTt5+cU3acMnqMo4aV1AAAAABwE+3bte9LgXYzW1hcyMLiwugxNoT9S/u9cXEjWUkNcJjt3Ls3u/Yd/f8wWto/+0vWtj1H/7vg208+OTtO8Q42wEZ3rKxEO5ZWklkFBrA+C4sLOXP3maPHYIM4Fv4OcKRZSQ1wmO3at+9LgfdotriwkMWFo/9d8KX9+4+JNw0ANoNjZSXasbKSzCowAOBIsZIaYA4WFxay+0zvom8Ex8JKcIDNxEq0jcMqMADgSLGSGgAAAID/396dB2l2VnUA/h0yhsVRwChTxoWAIAoBJiZBLTAEEiBYIKJRJEABaqVULKVwQ4tCoSw1IqCWIAyLbI5YIJEACmEbwm4iGRZZBBUNDg6iEByJBJLjH/c23TQzmSST/t7u+Z7nn77f27e/e+b09O3b5z33vQDDKFIDAAAAADCMIjUAAAAAAMMoUgMAAAAAMIwiNQAAAAAAw2wbHQAAAABwaPt27cv+3ftHh3HEDuw9kCS59PRLB0dy5HacsyPHn3v86DAAjho6qQEAAGAT2797/5cLvFvZ9p3bs33n9tFhHLEDew8cFZMGAJuJTmoAAADY5Lbv3J6T9pw0OgxydHSCA2w2OqkBAAAAABhGkRoAAAAAgGEUqQEAAAAAGEaRGgAAAACAYRSpAQAAAAAYRpEaAAAAAIBhFKkBAAAAABhGkRoAAAAAgGEUqQEAAAAAGEaRGgAAAACAYbaNDgAAgOW0a9++7N6/f2gMew8cSJKcfumlQ+NIknN27Mi5xx8/OgwAAFg4RWoANowC1CrFJ/hqu/fvz94DB7Jz+/ZhMYw89lor5yrnCQAAlpEiNQAbRgFqovgEh7Zz+/bsOemk0WEMN3oiDQAARlKkBmBDKUApPq3YDJ31ie56AACAzUaRGgBYiM3QWZ/orge2hn279mX/7rETewf2TueqS08fO6m345wdOf5c50sAOJopUgMAC6OzfjK6ixvY/Pbv3p8Dew9k+85xE2sjj71ipVCuSA0ARzdFagAAgE1o+87tOWnPck/sje7iBgAW40ajAwAAAAAAYHnppAYAAAAAjgqe67BqKz3XQZEaAAAA2BIUn1ZtpeITLJLnOky22nMdFKkBAACALUHxabLVik+waJ7rMH4i7bpSpAYAAAC2DMWnrVd8AjgcRWo4Urt2Jbt3j41h7x9OH09/7Ng4kuScc5Jzzx0dBQAAAEvAEjCrLAHDVqZIDUdq9+5k795k585hIezZuQmK08mUh0SRGgAAgIWwBMzEEjBsdYrUcEPYuTPZs2d0FOOdfvroCAAAAFgyloAZ38UNR+pGowMAAAAAAGB5KVIDAAAAADCMIjUAAAAAAMMoUgMAAAAAMIwiNQAAAAAAwyhSAwAAAAAwjCI1AAAAAADDKFIDAAAAADCMIjUAAAAAAMMoUgMAAAAAMIwiNQAAAAAAwyhSAwAAAAAwjCI1AAAAAADDKFIDAAAAADCMIjUAAAAAAMMoUgMAAAAAMIwiNQAAAAAAwyhSAwAAAAAwjCI1AAAAAADDKFIDAAAAADCMIjUAAAAAAMMoUgMAAAAAMIwiNQAAAAAAwyhSAwAAAAAwjCI1AAAAAADDbGiRuqrOqqqPVNXHqurxB/n8javqL+fPv7uqTtjIeAAAAAAA2Fw2rEhdVcckeUaS+ye5Y5KHVtUd1+32U0k+0923S/L0JOdtVDwAAAAAAGw+G9lJfbckH+vuf+7uK5O8NMmD1u3zoCQvnLdfnuSMqqoNjAkAAAAAgE1kI4vU35LksjWvPzGPHXSf7v5SksuTHLeBMQEAAAAAsIls28D3PlhHdF+PfVJV5yY5d355oKo+coSxHbF6tIbvFXIxcxPAKrlIcvAT3LKSi4k8TORhlVxM5GGVXMwkYpVcTORhIg+r5GIiD6vkYiIPq+RiMj4Pt742O21kkfoTSb5tzetvTbLvEPt8oqq2Jbl5kv9e/0bdvSvJrg2KEwAAAACAQTZyuY+Lk9y+qm5TVccm+YkkF6zb54Ikj5y3z07ypu7+qk5qAAAAAACOThvWSd3dX6qqn0/yuiTHJHl+d/9DVT05ySXdfUGS5yV5cVV9LFMH9U9sVDwAAAAAAGw+pXEZAAAAAIBRNnK5DwAAAAAAuEaK1AAAAAAADKNIDQAAAADAMBv24MSjSVWdnOT7ktwyyWeTvKu7Lxkb1eZRVad298Wj41ikqrpTkqu6+8Nrxr63u989MKwh5p+Py5L8V5IHJLmiuy8cG9XiVdWdk3x/pvPE/iQXdve+sVEtXlX9UJI3dPfnR8cyUlV9TZKzkvxXd7+jqh6e5OZJ/ry7Pzs2OoDNraoe093PGB3HSFV1YpITk/zTEl5nf3N3f7KqKsmDknx3kn9J8vLu/tLY6ACAjeLBiYdRVU9PcuMkb0hyeZKvT3JmpgLlL4yMbdGq6mCd95Xktd19n0XHM0pVPTXJjiRfSnJckp/s7v+sqjd1973HRrdYVfW8TP8HvpDkm5LsS/K5JLfq7nNHxrZIVfV7SW6a5L1J7pXk/5JcleQd3f2ikbEtWlXtS/KvmQr15ye5oLs/Mzaqxauq85NcnOQWSU5O8jdJPp3knO6+38jYRjCxNzGpt0oDwKEt2+R/Vb01ycofJDV/vFOSD3T3aWOiGqOqXtvdZ1XVY5OckeQ1Se6e5N+7+/Fjo1uclWvqqvqjJFckeVOSnUlO6e4fHxvdYlXVMUl+OOvOl0n+epkK9ib/v5LmGJNZKzQIXbNln/TeihPeitSHUVUXHewC+VDjR7Oq+nymi6LKV/4xcZfuPm5YYAtWVW/p7nvO23dJ8sdJfiXJeUtYpF6bi/d3953n7Td3973GRrc4VfXG7j5jzevXd/d9quoN3X3myNgWbeV7X1W3SfIjSR6YaRLjld39zLHRLc7an4Gq+kB3n7h+fFmY2JuY1FulAWBi8n9SVY9LcpckL+juPfPY33b3/YcGNsCa4uxbktyru6+ex9/W3fcYHN7CrFw/rb+OWtLfoS9O8r4kb8xXni/v2t0PHxnbIpn8X6U5ZmIya6JBaJVJ78lWn/C23MfhXVJVz8r0h9TnMl0YnJHkPUOjGuNDSR7c3ZevHayq1w+KZ5RtVXVsd1/Z3e+rqgcneUmmE+CyWXsO+Y0127V+x6Pcp6rq1zL9EXHPJB+cx48ZF9JY3f0vSZ6a5KlVtSNTh8My+d+qekKmQtwnq+qXkvx3pgLlsjll3cTey6rqVwbHNMLt1k3qnT1vv3lsWEOcfJA/Fs6vqouGRDPOgRxi8n9YRAN099Oq6tgkP11VP5Nk9+iYBrpjVb0oyXdk+v1xxTx+k3EhDfHCqnpuksuq6iVJ3pLp52IZ77Y4obsfsW7s0rkYs0xu0d2/k3x58v+p8/ajhkY1xqlrmmOev7Y5JsnSFKmTXD1/vNOayawLl/C66iPrGoTOr6qlaxCanR+T3kly7PzxwVmd8H5WVb1tYEzXmk7qa6GqTsp0O80tMt1i9c7uvnRsVItXVd+c6RarK9eNb1uyW2ruluTj3f2pNWPHJPmx7n7puMgWb76F/8PdfdWasWOTnNXdF4yLbLHm7/+Dk9w2yUeSvKq7r66q45fw1rv7dffrRscxWlXdNNNtqf+U5KNJHpmp+LR7/UTf0a6q3p7pAunK+fUtM03sndLdO4YGt0BV9fbuvvu8/cDuftW8vae7Tx8a3IJV1dOS3Cxf3QDwhe5+7MjYFqmq/j7JvQ82+b9MndRrVdW2JI9Icoet0O1zQ6uqW695ua+7v1hV25P8QHf/7ai4Rqiq45PcL9OdOJdn6hJ979ioFm+e1L1nkj1ZPV/eM8lF3f2UgaEtVFW9OtOk3o0zLX3y2kyT/w/p7rNGxrZoVfUXSfZmtTnmpt39i8t2p0FVPSLTv/+YJF+T1cmsK7r7V0fGtkgH+76vNAh1965BYQ2zMumd5LRMk94/u2xF6qr6jyQXJrl3ktt39xXz+CXdfcrQ4K4FRWoAYMMcYmJvW5Lf6O4nj4tssa5hUu/xy5SHFQdrAEiybausl3dDuIbJ/6Vakxq4ZlV1jyR3znSuvDzTshe3XabnOlzD5P8ly3a+vIbmmNO6e6nuSDrYZFaSY5fp/8ShGoSW/Vpi7aR3kr9aplysm/D+ZHdfOU94P24r/M2hSA0AbJhDrLubJK9bpm5R6w+vkouJPACHMz/X4VaZ1hxe5uc6OF/O5GIiDxN5WCUXk62eB2tSAwAbybq7k7V5SKZcLGMektVcrLWMufCzARyO5zpM1v7eWDlnLuv50u/QiTxMXEuscq092dL/JxSpAYCN5KG7E3lYJRcTeQAOxwPbJ86Xq+RiIg8TeVglF5MtnQfLfQAAG8ZDdyfysEouJvIAHI4Htk+cL1fJxUQeJvKwSi4mWz0PitQAAAAAAAxzqIcZAQAAAADAhlOkBgAAAABgGEVqAABYoKraU1WnXIf9n1xVZ17HY3y8qr7xukcHAACLt210AAAAwKF19xNHxwAAABtJJzUAAEutqr62ql5TVe+tqg9U1UPm8SdW1cXz2K6qqnl8T1U9vaouqqoPVdWpVfWKqvpoVf32vM8JVfXhqnphVb2vql5eVTc7yLHvW1XvrKr3VNXLqmr7QfZ5QVWdPW9/vKqeNO///qr6rnn8uKq6sKourapnJ6k1X//wqvq7qtpbVc+uqmOq6tZzvN9YVTeqqrdW1X03JMEAAHAYitQAACy7s5Ls6+67dveJSV47j/9Jd586j900yQPWfM2V3X1akmcleWWSxyQ5Mcmjquq4eZ87JNnV3XdJ8rkkP7f2oPNyHE9IcmZ3f0+SS5I87lrE++l5/z9N8svz2G8meVt3n5TkgiTfPh/ju5M8JMndu3tnkquSPKy7/zXJeXP8v5Tkg9194bU4NgAA3OAUqQEAWHbvT3JmVZ1XVT/Q3ZfP4/eqqndX1fuT3DvJndZ8zQVrvvYfuvuT3f2FJP+c5Nvmz13W3W+ft1+S5B7rjvt9Se6Y5O1VtTfJI5Pc+lrE+4r5498nOWHePm0+Rrr7NUk+M4+fkeTkJBfPxzgjyW3n/Z6b5OuS/ExWi90AALBw1qQGAGCpdfc/VtXJSX4wye9W1YVJfj/JM5Oc0t2XVdVvJbnJmi/7wvzx6jXbK69XrrF7/aHWva4kr+/uh17HkFeOd1W+8np+/fuvHOOF3f3rX/WJafmRb51fbk/yP9cxDgAAuEHopAYAYKlV1fFJPt/dL0nyB0m+J6sF6U/P60SffT3e+tur6vvn7Ycmedu6z78ryd2r6nZzHDerqu+8HsdJkouSPGx+n/snueU8/sYkZ1fVrebPfUNVrXRrn5fkz5M8MclzrudxAQDgiOmkBgBg2d05yVOq6uokX0zys9392ap6TqblPD6e5OLr8b4fSvLI+UGGH820hvSXdfd/VtWjkvxFVd14Hn5Ckn+8Hsd60vw+70nyliT/Nh/jg1X1hCQXVtWNMv37HlNVJyQ5NdNa1VdV1Y9W1aO7+8+ux7EBAOCIVPfB7goEAACur7kI/Or5oYsAAMA1sNwHAAAAAADD6KQGAAAAAGAYndQAAAAAAAyjSA0AAAAAwDCK1AAAAAAADKNIDQAAAADAMIrUAAAAAAAMo0gNAAAAAMAw/w/cebYnwXBDEwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# calculate full dendrogram\n", "plt.figure(figsize=(25, 10))\n", "plt.title('Hierarchical Clustering Dendrogram')\n", "plt.xlabel('sample index')\n", "plt.ylabel('distance')\n", "dendrogram(\n", " mean_link,\n", " leaf_rotation=90., # rotates the x axis labels\n", " leaf_font_size=8., # font size for the x axis labels\n", " color_threshold= 1\n", ")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "_cell_guid": "46453f4a-2a5a-487e-90a1-05b02c07eefb", "_uuid": "d18e8a9fb385e496c3ac60ed3382b8dd3b92c149" }, "source": [ "Which actually corresponds to our results as well (pasted from top again)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "_cell_guid": "e9f50e04-3a22-4934-9a48-95515eacd96d", "_uuid": "5f6a86e038f60f9160fe22e95a0b47ab089949a9" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "first cluster | second cluster | distance\n", "2 | 3 | 0.15085042956518227\n", "15 | 16 | 0.15501250939640307\n", "16 | 17 | 0.1679299964569166\n", "13 | 14 | 0.17501291697817623\n", "4 | 5 | 0.19186599490269243\n", "10 | 11 | 0.19888079280176854\n", "5 | 6 | 0.20597708099371148\n", "7 | 8 | 0.2126411284874588\n", "11 | 12 | 0.27650721583963234\n", "3 | 4 | 0.3141928388744722\n", "4 | 5 | 0.31484413939764316\n", "7 | 8 | 0.32516101556436616\n", "5 | 6 | 0.41304544834321805\n", "0 | 1 | 0.47931424457263944\n", "5 | 6 | 0.5402407941042792\n", "3 | 4 | 0.7368074545203777\n", "2 | 4 | 0.7941786051376811\n" ] }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAD8CAYAAAB+UHOxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvNQv5yAAAFY5JREFUeJzt3X+MZWd93/H3J2PGqE0abLyAsWcZo7gpVK2AjlymKM2Q5YexKi8kUBYpXUOJVlDoD7WVYuqqiZAqQ/9oVAoK2RAHNm0xiROHjTB17TUjXGmgHiPzw3aNF2ei2a6LNzgxQaSeePj2j3uW3o7v/Nr7c+a8X9LonnvOc8/z3efePd97nuc596SqkCS1z4+MOwBJ0niYACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUktdNO4AtnLZZZfV7OzsuMOQpD3j/vvv/+OqOrCTshOdAGZnZ1leXh53GJK0ZyT5o52WtQtIklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJoBRWlqCm2/uPErSmE30dQD7ytISHDoEa2swPQ2nTsH8/LijktRingGMyuJi5+C/vt55XFwcd0SSWs4EMCoLC51v/lNTnceFhXFHJKnl7AIalfn5TrfP4mLn4G/3j6QxMwGM0vy8B35JE8MuIElqKROAJLWUCUCSWsoEIEktZQKQpJbqOwEkmUnyhSQPJ3kwyT/tUSZJPpLkdJKvJXlVv/VKkvoziGmgzwD/oqq+kuTHgPuT3FVVD3WVeRNwdfP3t4FfbR4lSWPS9xlAVT1eVV9plv8MeBi4YkOxw8CJ6vgS8Lwkl/dbtyTpwg10DCDJLPBK4MsbNl0BrHY9P8Ozk8T5fRxLspxk+dy5c4MMT5LUZWAJIMmPAr8L/LOq+u7GzT1eUr32U1XHq2ququYOHDgwqPAkSRsMJAEkeQ6dg/9/rqrf61HkDDDT9fxK4Owg6pYkXZhBzAIK8BvAw1X17zcpdhI42swGejXwVFU93m/dkqQLN4hZQK8B/gHw9SQPNOv+FXAQoKo+DtwBXAecBr4PvGsA9UqS+tB3Aqiq/07vPv7uMgW8r9+6JEmD45XAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCGLalJbj55s6jJE0Qbwo/TEtLcOgQrK3B9DScOuVN4SVNDM8AhmlxsXPwX1/vPC4ujjsiSfohE8AwLSx0vvlPTXUeFxbGHZEk/ZBdQMM0P9/p9llc7Bz87f6RNEFMAMM2P++BX9JEsgtIklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSg7op/C1JnkjyjU22LyR5KskDzd+/GUS9kqQLN6jrAD4JfBQ4sUWZe6vq7w2oPklSnwZyBlBVXwSeHMS+JEmjMcoxgPkkX03y+SR/fYT1SpJ6GNVPQXwFeElVfS/JdcDvA1f3KpjkGHAM4ODBgyMKT5LaZyRnAFX13ar6XrN8B/CcJJdtUvZ4Vc1V1dyBAwdGEZ4ktdJIEkCSFyVJs3xNU+93RlG3JKm3gXQBJfk0sABcluQM8EvAcwCq6uPAW4H3JnkG+HPgSFXVIOqWJF2YgSSAqnrHNts/SmeaqCRpQnglsCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklpqIAkgyS1JnkjyjU22J8lHkpxO8rUkrxpEvZKkCzeoM4BPAtdusf1NwNXN3zHgVwdUryTpAg0kAVTVF4EntyhyGDhRHV8Cnpfk8kHULUm6MKMaA7gCWO16fqZZ9yxJjiVZTrJ87ty5kQQnSW00qgSQHuuqV8GqOl5Vc1U1d+DAgSGHJUntNaoEcAaY6Xp+JXB2RHVLknoYVQI4CRxtZgO9Gniqqh4fUd2SpB4uGsROknwaWAAuS3IG+CXgOQBV9XHgDuA64DTwfeBdg6hXknThBpIAquod22wv4H2DqEuSNBheCSxJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS1lAlAklrKBCBJLTWQBJDk2iSPJDmd5MYe29+Z5FySB5q/XxhEvZKkC9f3LSGTTAEfA14PnAHuS3Kyqh7aUPQzVfX+fuuTJA3GIM4ArgFOV9VjVbUG3AocHsB+JUlDNIgEcAWw2vX8TLNuo59L8rUktyWZGUC9kqQ+DCIBpMe62vD8D4DZqvqbwN3ApzbdWXIsyXKS5XPnzg0gPElSL4NIAGeA7m/0VwJnuwtU1Xeq6unm6a8Df2uznVXV8aqaq6q5AwcODCA8SVIvg0gA9wFXJ7kqyTRwBDjZXSDJ5V1PrwceHkC9kqQ+9D0LqKqeSfJ+4E5gCrilqh5M8kFguapOAv8kyfXAM8CTwDv7rXcrS6urLK6ssDA7y/yMww2S1EuqNnbXT465ublaXl7e1WuWVlc5dOIEa+vrTE9NceroUZOApNZIcn9Vze2k7L67EnhxZYW19XXWq1hbX2dxZWXcIUnSRNp3CWBhdpbpqSmmEqanpliYnR13SJI0kfoeA5g08zMznDp61DEASdrGvksA0EkCHvglaWv7rgtIkrQzJoBdWlpd5eZ772VpdXX7wpI0wfZlF9CwOMVU0n7iGcAuOMVU0n5iAtgFp5hK2onVpVXuvfleVpcmu6vYLqBdcIqppO2sLq1y4tAJ1tfWmZqe4uipo8zMT+axwgSwS04xlbSVlcUV1tfWqfVifW2dlcWViU0AdgFJ0gDNLswyNT1FpsLU9BSzC7PjDmlTngFIGqrVpVVWFleYXZgd6zfhUcUxMz/D0VNHJ+LfvB0TwITxp6y1n0xKf/io45iZn5noA/95JoAJ4nUG2m8mpT98UuKYNI4BTBCvM9B+Myn94ZMSx6TxDGCCnL/O4PwZgNcZaK+blP7wSYlj0gzkjmBJrgX+A51bQn6iqj60YfvFwAk6N4P/DvD2qlrZbr8Xckewvc4xAEn92M0dwfo+A0gyBXwMeD1wBrgvycmqeqir2LuBP6mqn0hyBPgw8PZ+696PvM5A0qgMYgzgGuB0VT1WVWvArcDhDWUOA59qlm8DDiXJAOqWJF2gQSSAK4DuH7w406zrWaaqngGeAp4/gLolSRdoEAmg1zf5jQMLOynTKZgcS7KcZPncuXN9B6f/x3sZSOo2iFlAZ4DuTusrgbOblDmT5CLgx4Ene+2sqo4Dx6EzCDyA+ITXGEh6tkGcAdwHXJ3kqiTTwBHg5IYyJ4EbmuW3AvfUIKYface8xkDSRn2fAVTVM0neD9xJZxroLVX1YJIPAstVdRL4DeC3kpym883/SL/1ane8xkDSRgO5DmBY2ngdwDB5jYG0/430OgDtHV5jIKmbvwUkSS1lAtBAOMVU2nvsAlLfnGIq7U2eAahvTjGV9iYTgPp2forpVOIUU2kPsQtIfZufmeHU0aNOMZX2GBOABsIpptLeYxeQJLWUCUCSWsoEoD3B6wykwXMMQBPP6wyk4fAMQBPP6wyk4TABaOJ5nYE0HHYBaeJ5nYE0HCYA7QleZyANnl1Aaj1nGKmtPANQqznDSG3W1xlAkkuT3JXk0ebxkk3KrSd5oPnbeMN4aWycYaQ267cL6EbgVFVdDZxqnvfy51X1iubv+j7rlAbGGUaaNKtLq9x7872sLg2/S7LfLqDDwEKz/ClgEfjFPvcpjYwzjDRJVpdWOXHoBOtr60xNT3H01FFm5of3mew3Abywqh4HqKrHk7xgk3LPTbIMPAN8qKp+f7MdJjkGHAM4ePBgn+FJ23OGkSbFyuIK62vr1HqxvrbOyuLKeBNAkruBF/XYdNMu6jlYVWeTvBS4J8nXq+pbvQpW1XHgOMDc3Fztog5pIq0urbKyuMLswuxQ/zNr75tdmGVqeuqHZwCzC7NDrW/bBFBVr9tsW5JvJ7m8+fZ/OfDEJvs42zw+lmQReCXQMwFI+8moT+m1t83Mz3D01NGRfWHodxD4JHBDs3wD8NmNBZJckuTiZvky4DXAQ33WK+0JvU7ppa3MzM/wUx/4qZF8Ueg3AXwIeH2SR4HXN89JMpfkE02ZlwHLSb4KfIHOGIAJQK1w/pQ+UxnJKb20G6ma3G72ubm5Wl5eHncYUl+GOQbg+MJ4THK7J7m/quZ2UtYrgaUhm5mfGcpBwvGF8dhP7e5vAUl7lOML47Gf2t0EIO1RoxhfGOVVqYM0zLj307iOYwDSHjbs8YVhdnUMK/ZRdNE4BiBp7IY1vgDDvSp1mAfpUVxNO8x2HyW7gCT1NMyujmH2o++nLpph8wxAUk/DvCp1mD95MOqrafcyxwAkjcUk96PvZY4BSJp4+6UffS9zDECSWsoEIEktZQKQpJYyAUhSS5kAJKmlTACS1FImAElqKROAJLVUXwkgyduSPJjkB0k2vfIsybVJHklyOsmN/dQpSRqMfs8AvgH8LPDFzQokmQI+BrwJeDnwjiQv77NeSVKf+vopiKp6GCDJVsWuAU5X1WNN2VuBw4A3hpekMRrFGMAVQPdtec406yRJY7TtGUCSu4EX9dh0U1V9dgd19Do92PQnSJMcA44BHDx4cAe7lyRdiG0TQFW9rs86zgDdP/l3JXB2i/qOA8eh83PQfdYtSdrEKLqA7gOuTnJVkmngCHByBPVKkrbQ7zTQtyQ5A8wDn0tyZ7P+xUnuAKiqZ4D3A3cCDwO/XVUP9he2JKlf/c4Cuh24vcf6s8B1Xc/vAO7opy5J0mB5JbAktZQJQJJaygQgSS1lApCkljIBSFJLmQAkqaVMAJLUUiYASWopE4AktZQJQJJaygQgSS1lApCkljIBSFJLmQDUGkurS9x8780srS6NOxRpIvT1c9DSXrG0usShE4dYW19jemqaU0dPMT8zP+6wpLHyDECtsLiyyNr6Guu1ztr6Gosri+MOSRo7E4BaYWF2gempaaYyxfTUNAuzC+MOSRq7vrqAkrwN+GXgZcA1VbW8SbkV4M+AdeCZqprrp15pt+Zn5jl19BSLK4sszC7Y/SPR/xjAN4CfBX5tB2VfW1V/3Gd90gWbn5n3wC916feewA8DJBlMNJKkkRnVGEAB/y3J/UmOjahO7XNO65T6s+0ZQJK7gRf12HRTVX12h/W8pqrOJnkBcFeS/1lVX9ykvmPAMYCDBw/ucPdqG6d1Sv3bNgFU1ev6raSqzjaPTyS5HbgG6JkAquo4cBxgbm6u+q1b+1OvaZ0mAGl3ht4FlOQvJ/mx88vAG+gMHksXzGmdUv/6nQb6FuA/AgeAzyV5oKremOTFwCeq6jrghcDtzUDxRcB/qar/2mfcajmndUr9S9Xk9rLMzc3V8nLPSwskST0kuX+n11p5JbAktZQJQJJaygQgSS1lApCkljIBSFJLmQAkqaUmehpoknPAH13gyy8DJvHXR41rd4xrd4xrd/ZjXC+pqgM7KTjRCaAfSZYn8b4DxrU7xrU7xrU7bY/LLiBJaikTgCS11H5OAMfHHcAmjGt3jGt3jGt3Wh3Xvh0DkCRtbT+fAUiStrCnE0CStyV5MMkPkmw6Yp7k2iSPJDmd5Mau9Vcl+XKSR5N8Jsn0gOK6NMldzX7vSnJJjzKvTfJA19//SfLmZtsnk/xh17ZXjCquptx6V90nu9aPs71ekWSpeb+/luTtXdsG2l6bfV66tl/c/PtPN+0x27XtA836R5K8sZ84LiCuf57koaZ9TiV5Sde2nu/piOJ6Z5JzXfX/Qte2G5r3/dEkN4w4rl/piumbSf60a9tQ2ivJLUmeSNLznijp+EgT89eSvKpr2+Dbqqr27B/wMuAngUVgbpMyU8C3gJcC08BXgZc3234bONIsfxx474Di+nfAjc3yjcCHtyl/KfAk8Jea558E3jqE9tpRXMD3Nlk/tvYC/ipwdbP8YuBx4HmDbq+tPi9dZf4R8PFm+QjwmWb55U35i4Grmv1MjTCu13Z9ht57Pq6t3tMRxfVO4KM9Xnsp8FjzeEmzfMmo4tpQ/h8Dt4ygvf4u8CrgG5tsvw74PBDg1cCXh9lWe/oMoKoerqpHtil2DXC6qh6rqjXgVuBwkgA/A9zWlPsU8OYBhXa42d9O9/tW4PNV9f0B1b+Z3cb1Q+Nur6r6ZlU92iyfBZ6gcyOiQev5edki3tuAQ037HAZuraqnq+oPgdPN/kYSV1V9oesz9CXgygHV3VdcW3gjcFdVPVlVfwLcBVw7prjeAXx6QHVvqjr3Qn9yiyKHgRPV8SXgeUkuZ0httacTwA5dAax2PT/TrHs+8KdV9cyG9YPwwqp6HKB5fME25Y/w7A/fv21OAX8lycUjjuu5SZaTfOl8txQT1F5JrqHzre5bXasH1V6bfV56lmna4yk67bOT1w4zrm7vpvNN8rxe7+ko4/q55v25LcnMLl87zLhousquAu7pWj2s9trOZnEPpa36uiXkKCS5G3hRj003VdVnd7KLHutqi/V9x7XTfTT7uRz4G8CdXas/APxvOge548AvAh8cYVwHq+pskpcC9yT5OvDdHuXG1V6/BdxQVT9oVl9we/Wqose6jf/OoXymtrHjfSf5eWAO+Omu1c96T6vqW71eP4S4/gD4dFU9neQ9dM6efmaHrx1mXOcdAW6rqvWudcNqr+2M9LM18Qmgql7X5y7OADNdz68EztL5nY3nJbmo+RZ3fn3fcSX5dpLLq+rx5oD1xBa7+vvA7VX1F137frxZfDrJbwL/cpRxNV0sVNVjSRaBVwK/y5jbK8lfAT4H/Ovm9Pj8vi+4vXrY7PPSq8yZJBcBP07ntH4nrx1mXCR5HZ2k+tNV9fT59Zu8p4M4oG0bV1V9p+vprwMf7nrtwobXLg4gph3F1eUI8L7uFUNsr+1sFvdQ2qoNXUD3AVenM4Nlms6bfbI6IytfoNP/DnADsJMzip042exvJ/t9Vt9jcxA83+/+ZqDnjIFhxJXkkvNdKEkuA14DPDTu9mreu9vp9I/+zoZtg2yvnp+XLeJ9K3BP0z4ngSPpzBK6Crga+B99xLKruJK8Evg14PqqeqJrfc/3dIRxXd719Hrg4Wb5TuANTXyXAG/g/z8THmpcTWw/SWdQdalr3TDbazsngaPNbKBXA081X3CG01bDGOke1R/wFjqZ8Wng28CdzfoXA3d0lbsO+CadDH5T1/qX0vkPehr4HeDiAcX1fOAU8GjzeGmzfg74RFe5WeB/AT+y4fX3AF+ncyD7T8CPjiou4O80dX+1eXz3JLQX8PPAXwAPdP29Yhjt1evzQqdL6fpm+bnNv/900x4v7XrtTc3rHgHeNODP+3Zx3d38PzjfPie3e09HFNfNwINN/V8A/lrXa/9h046ngXeNMq7m+S8DH9rwuqG1F50ve483n+UzdMZq3gO8p9ke4GNNzF+na3bjMNrKK4ElqaXa0AUkSerBBCBJLWUCkKSWMgFIUkuZACSppUwAktRSJgBJaikTgCS11P8FYHbV+CJZGPQAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "clusters = hierarchical(c1,4)\n", "colors = ['red', 'green', 'purple', 'teal']\n", "for cluster_index, cluster in enumerate(clusters):\n", " for point_index, point in enumerate(cluster):\n", " plt.plot([point[0]], [point[1]], marker='o', markersize=3, color=colors[cluster_index])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 1 }