{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "hide-input"
    ]
   },
   "outputs": [],
   "source": [
    "import panel as pn\n",
    "\n",
    "import pandas as pd\n",
    "import holoviews as hv\n",
    "\n",
    "from sklearn.cluster import KMeans\n",
    "\n",
    "pn.extension(design='material')\n",
    "\n",
    "import hvplot.pandas"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "tags": [
     "hide-input"
    ]
   },
   "outputs": [],
   "source": [
    "penguins = pd.read_csv('https://datasets.holoviz.org/penguins/v1/penguins.csv').dropna()\n",
    "cols = list(penguins.columns)[2:6]\n",
    "\n",
    "x = pn.widgets.Select(name='x', options=cols, sizing_mode=\"stretch_width\", margin=10)\n",
    "y = pn.widgets.Select(name='y', options=cols, value='bill_depth_mm', sizing_mode=\"stretch_width\")\n",
    "n_clusters = pn.widgets.IntSlider(name='n_clusters', start=2, end=5, value=3, sizing_mode=\"stretch_width\", margin=10)\n",
    "\n",
    "def cluster(data, n_clusters):\n",
    "    kmeans = KMeans(n_clusters=n_clusters, n_init='auto')\n",
    "    est = kmeans.fit(data)\n",
    "    return est.labels_.astype('str')\n",
    "\n",
    "def plot(x, y, n_clusters):\n",
    "    penguins['labels'] = cluster(penguins.iloc[:, 2:6].values, n_clusters)\n",
    "    centers = penguins.groupby('labels').mean(numeric_only=True)\n",
    "    return (penguins.sort_values('labels').hvplot.scatter(\n",
    "        x, y, c='labels', hover_cols=['species'], line_width=1, size=60, frame_width=400, frame_height=400\n",
    "    ).opts(marker=hv.dim('species').categorize({'Adelie': 'square', 'Chinstrap': 'circle', 'Gentoo': 'triangle'})) * centers.hvplot.scatter(\n",
    "        x, y, marker='x', color='black', size=400, padding=0.1, line_width=5\n",
    "    ))\n",
    "\n",
    "description = pn.pane.Markdown(\"\"\"\n",
    "This app applies *k-means clustering* on the Palmer Penguins dataset using scikit-learn, parameterizing the number of clusters and the variables to plot.\n",
    "<br><br>\n",
    "Each cluster is denoted by one color while the penguin species is indicated using markers: \n",
    "<br><br>\n",
    "● - Adelie, ■ - Chinstrap,  ▲ - Gentoo\n",
    "<br><br>\n",
    "By comparing the two we can assess the performance of the clustering algorithm.\n",
    "<br><br>\n",
    "Additionally the center of each cluster is marked with an `X`.\n",
    "<br><br>\n",
    "\"\"\", sizing_mode=\"stretch_width\")\n",
    "\n",
    "explanation = pn.pane.Markdown(\"\"\"\n",
    "**Species**\n",
    "\n",
    "Adelie: ●\\n\n",
    "Chinstrap: ■\\n\n",
    "Gentoo: ▲\n",
    "\"\"\", margin=(0, 10))\n",
    "\n",
    "code = pn.pane.Markdown(\"\"\"\n",
    "```python\n",
    "import panel as pn\n",
    "\n",
    "pn.extension()\n",
    "\n",
    "x = pn.widgets.Select(name='x', options=cols)\n",
    "y = pn.widgets.Select(name='y', options=cols, value='bill_depth_mm')\n",
    "n_clusters = pn.widgets.IntSlider(name='n_clusters', start=2, end=5, value=3)\n",
    "\n",
    "explanation = pn.pane.Markdown(...)\n",
    "\n",
    "def plot(x, y, n_clusters):\n",
    "    ...\n",
    "    \n",
    "interactive_plot = pn.bind(plot, x, y, n_clusters)\n",
    "    \n",
    "pn.Row(\n",
    "    pn.WidgetBox(x, y, n_clusters, explanation),  \n",
    "    interactive_plot\n",
    ")\n",
    "```\n",
    "\"\"\", width=800)\n",
    "\n",
    "app = pn.Tabs(\n",
    "    ('APP',\n",
    "        pn.Row(\n",
    "            pn.WidgetBox(x, y, n_clusters, explanation, width=175, margin=10),  \n",
    "            pn.bind(plot, x, y, n_clusters),),     \n",
    "    ),\n",
    "    ('CODE', code),\n",
    "    ('DESCRIPTION', description),\n",
    "    width=800\n",
    ")\n",
    "\n",
    "\n",
    "pn.Row(\n",
    "    pn.layout.HSpacer(),\n",
    "    app,\n",
    "    pn.layout.HSpacer(),\n",
    "    sizing_mode='stretch_width'\n",
    ").embed(max_opts=4, json=True, json_prefix='json')"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python",
   "pygments_lexer": "ipython3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}