{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "knn_map2.ipynb", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "yYanQhRwuNaE" }, "source": [ "import folium\n", "import matplotlib.colors as colors\n", "import matplotlib.cm as cm\n", "from folium import plugins\n", "from scipy.spatial.distance import cdist, pdist\n", "from sklearn import metrics\n", "from sklearn.cluster import KMeans\n", "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "plt.style.use('ggplot')\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "SDt2Ua5ZuQSX" }, "source": [ "df_loc = pd.read_csv(\"map/data/north-york_kmeans.csv\")\n", "for index, row in df_loc.iterrows():\n", " df_loc.append([row]*row['severity'])\n", "df_loc.head()" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5ORjM6_P_9El" }, "source": [ "df_loc.loc[df_loc['origin coordinate latitude'] == 43.701330]" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "T7FutkKU2Ijd" }, "source": [ "X = df_loc[['origin coordinate latitude',\t'origin coordinate longitude', 'destination coordinate latitude', 'destination coordinate lontitude']].values\n", "Ks = range(1, 20)\n", "kmean = [KMeans(n_clusters=i).fit(X) for i in Ks]\n", "\n", "def plot_elbow(kmean, X):\n", " centroids = [k.cluster_centers_ for k in kmean]\n", " D_k = [cdist(X, center, 'euclidean') for center in centroids]\n", " dist = [np.min(D,axis=1) for D in D_k]\n", "\n", " wcss = [sum(d**2) for d in dist]\n", " tss = sum(pdist(X)**2)/X.shape[0]\n", " bss = tss-wcss\n", "\n", " plt.subplots(nrows=1, ncols=1, figsize=(8,8))\n", " ax = plt.subplot(1, 1, 1)\n", " ax.plot(Ks, bss/tss*100, 'b*-')\n", " plt.grid(True)\n", " plt.show()\n", "\n", "plot_elbow(kmean, X)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "ajtuKtLUGpxe" }, "source": [ "map_clusters = folium.Map(location=[43.761539, -79.411079], zoom_start=4)\n", "cx = np.arange(20)\n", "cys = [i + cx + (i*cx)**2 for i in range(20)]\n", "colors_array = cm.rainbow(np.linspace(0, 1, len(cys)))\n", "colos = [colors.rgb2hex(i) for i in colors_array]\n", "\n", "def plot_stations_map(df_loc):\n", " markers_colors = []\n", " for lat, lng, cluster in zip(list(df_loc['origin coordinate latitude']), list(df_loc['origin coordinate longitude']), list(df_loc['cluster'])):\n", " \n", " folium.vector_layers.CircleMarker(\n", " [lat, lng],\n", " radius=5,\n", " tooltip = 'Cluster ' + str(cluster),\n", " color=colos[cluster],\n", " fill=True,\n", " fill_color=colos[cluster],\n", " fill_opacity=0.1).add_to(map_clusters)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "So2xsc6uZNst" }, "source": [ "k = [16]\n", "n = len(k)\n", "\n", "for i in range(n):\n", " est = kmean[k[i]-1]\n", " df_loc['cluster'] = est.predict(X).tolist()\n", " plot_stations_map(df_loc)\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "U4H-1e4wFv7S" }, "source": [ "map_clusters" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "86zwJpr5GoXa" }, "source": [ "centers = np.array(est.cluster_centers_)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "P6YmraDQHwsr" }, "source": [ "print(centers)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "iRCu7KetH7HT" }, "source": [ "centroid_map = folium.Map(location=[43.761539, -79.411079],\n", " zoom_start=15)\n", "\n", "\n", "for x1, y1, x2, y2 in centers:\n", " loc = [[x1, y1],[x2, y2]]\n", "\n", " #folium.Marker()\n", "\n", " folium.PolyLine(loc,\n", " color='red',\n", " weight=4,\n", " opacity=1).add_to(centroid_map)\n", "\n", "centroid_map" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "xHXfzcsJY5wZ", "cellView": "both" }, "source": [ "map_c = folium.Map(location=[43.761539, -79.411079], zoom_start=4)\n", "\n", "occurences = folium.map.FeatureGroup()\n", "n_mean = df_loc['severity'].mean()\n", "\n", "for lat, lng, cluster, sev in zip(list(df_loc['origin coordinate latitude']), list(df_loc['origin coordinate longitude']), list(df_loc['cluster']), list(df_loc['severity'])):\n", "\n", " occurences.add_child(\n", " folium.vector_layers.CircleMarker(\n", " [lat, lng],\n", " radius=sev/n_mean*8,\n", " color='orange',\n", " fill=True,\n", " fill_color='yellow',\n", " fill_opacity=0.3,\n", " tooltip = str(sev)\n", " )\n", " )\n", "\n", "map_c.add_child(occurences)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "Tj6NXgJLZvis" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }