{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Unsupervised modeling\n", "\n", "The goal of this notebook is to identify and extract clusters that can review user's habit and common routine, such as home/school/work clusters" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "%reload_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[PosixPath('data/ny.html'),\n", " PosixPath('data/houston_processed.feather'),\n", " PosixPath('data/florida.html'),\n", " PosixPath('data/tx.html'),\n", " PosixPath('data/houston_ready.feather'),\n", " PosixPath('data/houston.html'),\n", " PosixPath('data/location_history.json'),\n", " PosixPath('data/houston_processed_miles_time_diff.feather')]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from pathlib import Path\n", "import json\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "# from geopy.distance import distance\n", "\n", "PATH = Path('data')\n", "list(PATH.iterdir())\n", "\n", "from IPython.core.interactiveshell import InteractiveShell\n", "InteractiveShell.ast_node_interactivity = \"all\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# from mpl_toolkits.basemap import Basemap\n", "import folium\n", "from folium.plugins import MarkerCluster,FastMarkerCluster\n", "\n", "from sklearn.cluster import KMeans\n", "k=5\n", "\n", "import matplotlib.cm as cmx\n", "import matplotlib.colors as mcolors\n", "# from cycler import cycler\n", "\n", "# def get_cmap(N):\n", "# color_norm = mcolors.Normalize(vmin=0, vmax=N-1)\n", "# return cmx.ScalarMappable(norm=color_norm, cmap='tab10').to_rgba\n", "# num_colr = k\n", "# cmap = get_cmap(num_colr)\n", "# colr_list = [cmap(float(x)) for x in range(num_colr)]\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df = pd.read_feather(PATH/'houston_ready.feather')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | 0 | \n", "1 | \n", "2 | \n", "
---|---|---|---|
accuracy | \n", "30 | \n", "21 | \n", "1259 | \n", "
altitude | \n", "0 | \n", "0 | \n", "0 | \n", "
heading | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
velocity | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
verticalAccuracy | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
act_conf1 | \n", "46 | \n", "NaN | \n", "100 | \n", "
act_type1 | \n", "UNKNOWN | \n", "None | \n", "TILTING | \n", "
act_cont2 | \n", "27 | \n", "NaN | \n", "NaN | \n", "
act_type2 | \n", "IN_VEHICLE | \n", "None | \n", "None | \n", "
extra_intVal | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
extra_name | \n", "None | \n", "None | \n", "None | \n", "
extra_type | \n", "None | \n", "None | \n", "None | \n", "
date_time | \n", "2015-11-30 21:41:32.110000-06:00 | \n", "2015-11-30 21:43:05.510000-06:00 | \n", "2015-11-30 21:47:40.071000-06:00 | \n", "
year | \n", "2015 | \n", "2015 | \n", "2015 | \n", "
month | \n", "11 | \n", "11 | \n", "11 | \n", "
day | \n", "30 | \n", "30 | \n", "30 | \n", "
day_of_week | \n", "0 | \n", "0 | \n", "0 | \n", "
hour | \n", "21 | \n", "21 | \n", "21 | \n", "
minute | \n", "41 | \n", "43 | \n", "47 | \n", "
latitude | \n", "29.6894 | \n", "29.6893 | \n", "29.6924 | \n", "
longitude | \n", "-95.2712 | \n", "-95.2712 | \n", "-95.2805 | \n", "
cluster | \n", "0 | \n", "0 | \n", "0 | \n", "
mile_diff | \n", "0 | \n", "0.00493873 | \n", "0.599316 | \n", "
plane | \n", "False | \n", "False | \n", "False | \n", "