{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**k-means algo searches for a pre-determined number of clusters**\n", "\n", "* cluster center is arithmetic mean of all points belonging to cluster.\n", "* Each point is closer to its own cluster center than to other cluster centers." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.cluster import KMeans" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | CustomerID | \n", "Genre | \n", "Age | \n", "Annual Income (k$) | \n", "Spending Score (1-100) | \n", "
|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "Male | \n", "19 | \n", "15 | \n", "39 | \n", "
| 1 | \n", "2 | \n", "Male | \n", "21 | \n", "15 | \n", "81 | \n", "
| 2 | \n", "3 | \n", "Female | \n", "20 | \n", "16 | \n", "6 | \n", "
| 3 | \n", "4 | \n", "Female | \n", "23 | \n", "16 | \n", "77 | \n", "
| 4 | \n", "5 | \n", "Female | \n", "31 | \n", "17 | \n", "40 | \n", "