{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "%config Completer.use_jedi = False\n", "%matplotlib inline\n", "\n", "import os\n", "import sys\n", "import pytz\n", "from datetime import datetime\n", "import geopandas as gpd\n", "from copy import copy, deepcopy\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sns\n", "from glob import glob\n", "from shapely.geometry import Point, Polygon\n", "from collections import Counter\n", "from dask.distributed import Client\n", "from mpl_toolkits.axes_grid1 import make_axes_locatable\n", "\n", "import skmob\n", "import mobilkit\n", "\n", "sns.set_context(\"notebook\", font_scale=1.5)\n", "from pandas.plotting import register_matplotlib_converters\n", "register_matplotlib_converters()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Notebook to show how to communicate between skmob and mobilkit\n", "\n", "So far, given the implementation of `skmob` the communication happens via the `TrajDataFrame`.\n", "\n", "We load a sample file from csv in `skmob`, filter and analyze it, port it to `dask` and then back it to `skmob`." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "accColName = \"acc\"" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# Load a sample file...\n", "df = pd.read_csv(\"/data/datiCuebiqTiled/part-00000-c02e38a8-f763-4f0f-a3f0-3ebbf4738748-c000.csv.gz\",\n", " sep=\",\", compression=\"gzip\", names=[\"UTC\",\"uid\",\"OS\",\"lat\",\"lng\",\"acc\",\"tz\",\"tile_ID\"])\n", "# Add the datetime col\n", "mobilkit.loader.compute_datetime_col(df, \"America/Mexico_City\")\n", "\n", "# Port it to skmob...\n", "traj = skmob.TrajDataFrame(df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filter it and do stop detection" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from skmob.preprocessing import filtering\n", "# filter out all points with a speed (in km/h) from the previous point higher than 500 km/h\n", "ftdf = filtering.filter(traj, max_speed_kmh=500.)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "from skmob.preprocessing import detection\n", "# compute the stops for each individual in the TrajDataFrame\n", "stdf = detection.stops(ftdf, stop_radius_factor=0.5, minutes_for_a_stop=20.0, spatial_radius_km=0.2, leaving_time=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Port it to `mobilkit` for later analysis" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import dask.dataframe as dd\n", "from importlib import reload\n", "reload(mobilkit)\n", "reload(mobilkit.loader)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "df_dask = mobilkit.loader.load_from_skmob(stdf)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### And back to `skmob`" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "# Back to skmob\n", "back_df = mobilkit.loader.dask_to_skmob(df_dask)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 4 }