{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide-input" ] }, "outputs": [], "source": [ "import panel as pn\n", "\n", "import pandas as pd\n", "import holoviews as hv\n", "\n", "from sklearn.cluster import KMeans\n", "\n", "pn.extension(design='material')\n", "\n", "import hvplot.pandas" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "hide-input" ] }, "outputs": [], "source": [ "penguins = pd.read_csv('https://datasets.holoviz.org/penguins/v1/penguins.csv').dropna()\n", "cols = list(penguins.columns)[2:6]\n", "\n", "x = pn.widgets.Select(name='x', options=cols, sizing_mode=\"stretch_width\", margin=10)\n", "y = pn.widgets.Select(name='y', options=cols, value='bill_depth_mm', sizing_mode=\"stretch_width\")\n", "n_clusters = pn.widgets.IntSlider(name='n_clusters', start=2, end=5, value=3, sizing_mode=\"stretch_width\", margin=10)\n", "\n", "def cluster(data, n_clusters):\n", " kmeans = KMeans(n_clusters=n_clusters, n_init='auto')\n", " est = kmeans.fit(data)\n", " return est.labels_.astype('str')\n", "\n", "def plot(x, y, n_clusters):\n", " penguins['labels'] = cluster(penguins.iloc[:, 2:6].values, n_clusters)\n", " centers = penguins.groupby('labels').mean(numeric_only=True)\n", " return (penguins.sort_values('labels').hvplot.scatter(\n", " x, y, c='labels', hover_cols=['species'], line_width=1, size=60, frame_width=400, frame_height=400\n", " ).opts(marker=hv.dim('species').categorize({'Adelie': 'square', 'Chinstrap': 'circle', 'Gentoo': 'triangle'})) * centers.hvplot.scatter(\n", " x, y, marker='x', color='black', size=400, padding=0.1, line_width=5\n", " ))\n", "\n", "description = pn.pane.Markdown(\"\"\"\n", "This app applies *k-means clustering* on the Palmer Penguins dataset using scikit-learn, parameterizing the number of clusters and the variables to plot.\n", "

\n", "Each cluster is denoted by one color while the penguin species is indicated using markers: \n", "

\n", "● - Adelie, ■ - Chinstrap, ▲ - Gentoo\n", "

\n", "By comparing the two we can assess the performance of the clustering algorithm.\n", "

\n", "Additionally the center of each cluster is marked with an `X`.\n", "

\n", "\"\"\", sizing_mode=\"stretch_width\")\n", "\n", "explanation = pn.pane.Markdown(\"\"\"\n", "**Species**\n", "\n", "Adelie: ●\\n\n", "Chinstrap: ■\\n\n", "Gentoo: ▲\n", "\"\"\", margin=(0, 10))\n", "\n", "code = pn.pane.Markdown(\"\"\"\n", "```python\n", "import panel as pn\n", "\n", "pn.extension()\n", "\n", "x = pn.widgets.Select(name='x', options=cols)\n", "y = pn.widgets.Select(name='y', options=cols, value='bill_depth_mm')\n", "n_clusters = pn.widgets.IntSlider(name='n_clusters', start=2, end=5, value=3)\n", "\n", "explanation = pn.pane.Markdown(...)\n", "\n", "def plot(x, y, n_clusters):\n", " ...\n", " \n", "interactive_plot = pn.bind(plot, x, y, n_clusters)\n", " \n", "pn.Row(\n", " pn.WidgetBox(x, y, n_clusters, explanation), \n", " interactive_plot\n", ")\n", "```\n", "\"\"\", width=800)\n", "\n", "app = pn.Tabs(\n", " ('APP',\n", " pn.Row(\n", " pn.WidgetBox(x, y, n_clusters, explanation, width=175, margin=10), \n", " pn.bind(plot, x, y, n_clusters),), \n", " ),\n", " ('CODE', code),\n", " ('DESCRIPTION', description),\n", " width=800\n", ")\n", "\n", "\n", "pn.Row(\n", " pn.layout.HSpacer(),\n", " app,\n", " pn.layout.HSpacer(),\n", " sizing_mode='stretch_width'\n", ").embed(max_opts=4, json=True, json_prefix='json')" ] } ], "metadata": { "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 4 }