{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "from pycircstat2 import Circular, load_data\n", "from pycircstat2.clustering import MovM, CircHAC, CircKMeans" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Clustering circular data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We implemented 3 clustering algorithms for circular data: \n", "\n", "- Mixture of von Mises\n", "- Circular hierarchical agglomerative clustering\n", "- Circular k-means\n", "\n", "Here, we use dataset B3 from Fisher (1993) to demonstrate the their usages, as it is also the example shown in Jammalamadaka & Vaidyanathan (2024)." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "d = load_data(\"B3\", source=\"fisher\")[\"θ\"].values[:]\n", "\n", "movm = MovM(n_clusters=2, unit=\"degree\", random_seed=2046)\n", "movm.fit(d)\n", "\n", "hac = CircHAC(n_clusters=2, unit=\"degree\", random_seed=2046, metric=\"chord\")\n", "hac.fit(d)\n", "\n", "ckm = CircKMeans(n_clusters=2, unit=\"degree\", random_seed=2046, metric=\"chord\")\n", "ckm.fit(d)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | Method | \n", "μ1 (deg) | \n", "μ2 (deg) | \n", "κ1 | \n", "κ2 | \n", "p1 | \n", "p2 | \n", "
|---|---|---|---|---|---|---|---|
| 0 | \n", "Paper (J&V 2024) | \n", "63.4716 | \n", "241.2036 | \n", "2.6187 | \n", "8.4465 | \n", "0.8400 | \n", "0.1600 | \n", "
| 1 | \n", "MovM | \n", "63.4706 | \n", "241.1973 | \n", "2.609 | \n", "8.4559 | \n", "0.8367 | \n", "0.1633 | \n", "
| 2 | \n", "CircHAC | \n", "247.0065 | \n", "65.9970 | \n", "N/A | \n", "N/A | \n", "0.1711 | \n", "0.8289 | \n", "
| 3 | \n", "CircKMeans | \n", "64.6328 | \n", "246.0378 | \n", "N/A | \n", "N/A | \n", "0.8158 | \n", "0.1842 | \n", "