{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "from shapely.geometry import Point\n", "from sklearn.model_selection import train_test_split\n", "\n", "%matplotlib inline\n", "import folium\n", "import folium.plugins\n", "import branca" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "#https://movement.uber.com/cities/san_francisco/downloads/speeds?lang=en-US&tp[y]=2019&tp[q]=4\n", "uber_data= pd.read_csv(r\"movement-speeds-hourly-san-francisco-12-2019.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 26725227 entries, 0 to 26725226\n", "Data columns (total 13 columns):\n", "year int64\n", "month int64\n", "day int64\n", "hour int64\n", "utc_timestamp object\n", "segment_id object\n", "start_junction_id object\n", "end_junction_id object\n", "osm_way_id int64\n", "osm_start_node_id int64\n", "osm_end_node_id int64\n", "speed_mph_mean float64\n", "speed_mph_stddev float64\n", "dtypes: float64(2), int64(7), object(4)\n", "memory usage: 2.6+ GB\n" ] } ], "source": [ "uber_data.info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "train, test = train_test_split(uber_data, test_size=0.005)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\pandas\\core\\frame.py:3694: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " errors=errors)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
hourutc_timestamposm_way_idspeed_mph_mean
419359152019-12-17T13:00:00.000Z51441709060.398
10843245142019-12-04T22:00:00.000Z28806524716.057
18706754192019-12-16T03:00:00.000Z41587374926.952
2022832132019-12-29T11:00:00.000Z39714670720.114
11531724112019-12-07T19:00:00.000Z18425567415.068
\n", "
" ], "text/plain": [ " hour utc_timestamp osm_way_id speed_mph_mean\n", "4193591 5 2019-12-17T13:00:00.000Z 514417090 60.398\n", "10843245 14 2019-12-04T22:00:00.000Z 288065247 16.057\n", "18706754 19 2019-12-16T03:00:00.000Z 415873749 26.952\n", "20228321 3 2019-12-29T11:00:00.000Z 397146707 20.114\n", "11531724 11 2019-12-07T19:00:00.000Z 184255674 15.068" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.drop(['year', 'month', 'day','segment_id', 'start_junction_id', 'end_junction_id','osm_start_node_id', 'osm_end_node_id', 'speed_mph_stddev'], axis='columns', inplace=True)\n", "test.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id@idNHSbicyclehgvhgv:national_networkhighwaylanesmaxspeedname...oneway:vehiclecoveredbridge:structurecolourhistoricstart_datecrossingoperatorgeometryosm_way_id
050715825071582STRAHNETnodesignatedyesmotorway555 mphJames Lick Freeway...NoneNoneNoneNoneNoneNoneNoneNoneLINESTRING (-122.3989008 37.7169496, -122.3989...582
\n", "

1 rows × 168 columns

\n", "
" ], "text/plain": [ " id @id NHS bicycle hgv hgv:national_network \\\n", "0 5071582 5071582 STRAHNET no designated yes \n", "\n", " highway lanes maxspeed name ... oneway:vehicle \\\n", "0 motorway 5 55 mph James Lick Freeway ... None \n", "\n", " covered bridge:structure colour historic start_date crossing operator \\\n", "0 None None None None None None None \n", "\n", " geometry osm_way_id \n", "0 LINESTRING (-122.3989008 37.7169496, -122.3989... 582 \n", "\n", "[1 rows x 168 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#http://overpass-turbo.eu/s/Rq6\n", "osm_links = gpd.read_file('sf.geojson')\n", "osm_links['osm_way_id']=osm_links['id'].str[4:]\n", "osm_links.osm_way_id=osm_links.osm_way_id.astype(int)\n", "osm_links.head(1)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['id',\n", " '@id',\n", " 'NHS',\n", " 'bicycle',\n", " 'hgv',\n", " 'hgv:national_network',\n", " 'highway',\n", " 'lanes',\n", " 'maxspeed',\n", " 'name',\n", " 'oneway',\n", " 'ref',\n", " 'source:hgv:national_network',\n", " 'lanes:backward',\n", " 'lanes:forward',\n", " 'lcn_ref',\n", " 'tiger:cfcc',\n", " 'tiger:county',\n", " 'tiger:name_base',\n", " 'tiger:name_type',\n", " 'tiger:reviewed',\n", " 'cycleway:right',\n", " 'turn:lanes',\n", " 'destination',\n", " 'destination:ref',\n", " 'bridge',\n", " 'layer',\n", " 'old_ref',\n", " 'sidewalk',\n", " 'source:imagery',\n", " 'surface',\n", " 'junction',\n", " 'destination:street',\n", " 'junction:ref',\n", " 'toll',\n", " 'maxspeed:advisory',\n", " 'history',\n", " 'maxheight',\n", " 'tiger:name_base_1',\n", " 'tiger:name_type_1',\n", " 'destination:lanes',\n", " 'destination:ref:lanes',\n", " 'foot',\n", " 'cycleway',\n", " 'placement:backward',\n", " 'turn:lanes:forward',\n", " 'turn:lanes:backward',\n", " 'FIXME',\n", " 'source',\n", " 'parking:lanes:right',\n", " 'trolley_wire',\n", " 'cycleway:left',\n", " 'lanes:both_ways',\n", " 'turn:lanes:both_ways',\n", " 'hgv:state_network',\n", " 'source:hgv:state_network',\n", " 'busway',\n", " 'lit',\n", " 'oneway:bus',\n", " 'alt_name',\n", " 'name:pronunciation',\n", " 'level',\n", " 'horse',\n", " 'name_1',\n", " 'source:maxspeed',\n", " 'old_name',\n", " 'tiger:name_direction_prefix',\n", " 'maxspeed:type',\n", " 'pedestrians',\n", " 'oneway:psv',\n", " 'name:en',\n", " 'name:zh',\n", " 'cutting',\n", " 'name:ja',\n", " 'width',\n", " 'oneway:bicycle',\n", " 'lcn',\n", " 'access',\n", " 'parking:lane:right',\n", " 'name:etymology:wikidata',\n", " 'tunnel',\n", " 'note:highway',\n", " 'note',\n", " 'wikidata',\n", " 'official_name',\n", " 'source:destination',\n", " 'parking:lane:both',\n", " 'maxweight',\n", " 'tiger:name_base_2',\n", " 'bicycle:designated',\n", " 'oneway:conditional',\n", " 'oneway:delivery',\n", " 'oneway:taxi',\n", " 'destination:name',\n", " 'cycleway:both',\n", " 'lanes:conditional',\n", " 'note:lcn',\n", " 'motor_vehicle:conditional',\n", " 'placement:forward',\n", " 'service',\n", " 'maxspeed:hgv',\n", " 'maxspeed:trailer',\n", " 'bridge:alt_name',\n", " 'bridge:name',\n", " 'bridge:old_name',\n", " 'tiger:zip_right',\n", " 'wikipedia',\n", " 'tiger:zip_left',\n", " 'maxspeed:towing',\n", " 'gosm:sig:8CBDE645',\n", " 'note:trolley_wire',\n", " 'parking:lane:left',\n", " 'change',\n", " 'not:turn:lanes',\n", " 'created_by',\n", " 'tiger:name_direction_suffix',\n", " 'embankment',\n", " 'access:backward',\n", " 'building:ref',\n", " 'localwiki',\n", " 'motor_vehicle',\n", " 'bus',\n", " 'taxi',\n", " 'change:lanes',\n", " 'overtaking',\n", " 'placement',\n", " 'name_base',\n", " 'name_type',\n", " 'short_name',\n", " 'psv:lanes:backward',\n", " 'psv:lanes:forward',\n", " 'tiger:separated',\n", " 'tiger:source',\n", " 'tiger:tlid',\n", " 'bus:lanes',\n", " 'lanes:bus',\n", " 'psv',\n", " 'access:lanes',\n", " 'psv:lanes',\n", " 'railway:lanes',\n", " 'bus:lanes:backward',\n", " 'parking:lanes:both',\n", " 'contact:phone',\n", " 'diet:halal',\n", " 'opening_hours',\n", " 'bus:lanes:forward',\n", " 'turn',\n", " 'height',\n", " 'truck',\n", " 'vehicle',\n", " 'busway:right',\n", " 'bicycle:lanes',\n", " 'access:lanes:forward',\n", " 'busway:forward',\n", " 'access:lanes:both_ways',\n", " 'parking:lanes:left',\n", " 'cycleway:buffer',\n", " 'busway:left',\n", " 'oneway:vehicle',\n", " 'covered',\n", " 'bridge:structure',\n", " 'colour',\n", " 'historic',\n", " 'start_date',\n", " 'crossing',\n", " 'operator',\n", " 'geometry',\n", " 'osm_way_id']" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "osm_links.columns.tolist()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idgeometryosm_way_id
05071582LINESTRING (-122.3989008 37.7169496, -122.3989...582
18914982LINESTRING (-122.3823495 37.7094512, -122.3824...982
28914988LINESTRING (-122.426389 37.7336837, -122.42646...988
38914993LINESTRING (-122.4329133 37.7332794, -122.4327...993
48915024LINESTRING (-122.4073815 37.7361071, -122.4073...24
\n", "
" ], "text/plain": [ " id geometry osm_way_id\n", "0 5071582 LINESTRING (-122.3989008 37.7169496, -122.3989... 582\n", "1 8914982 LINESTRING (-122.3823495 37.7094512, -122.3824... 982\n", "2 8914988 LINESTRING (-122.426389 37.7336837, -122.42646... 988\n", "3 8914993 LINESTRING (-122.4329133 37.7332794, -122.4327... 993\n", "4 8915024 LINESTRING (-122.4073815 37.7361071, -122.4073... 24" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "osm_links.drop([\n", " '@id',\n", " 'NHS',\n", " 'bicycle',\n", " 'hgv',\n", " 'hgv:national_network',\n", " 'highway',\n", " 'lanes',\n", " 'maxspeed',\n", " 'name',\n", " 'oneway',\n", " 'ref',\n", " 'source:hgv:national_network',\n", " 'lanes:backward',\n", " 'lanes:forward',\n", " 'lcn_ref',\n", " 'tiger:cfcc',\n", " 'tiger:county',\n", " 'tiger:name_base',\n", " 'tiger:name_type',\n", " 'tiger:reviewed',\n", " 'cycleway:right',\n", " 'turn:lanes',\n", " 'destination',\n", " 'destination:ref',\n", " 'bridge',\n", " 'layer',\n", " 'old_ref',\n", " 'sidewalk',\n", " 'source:imagery',\n", " 'surface',\n", " 'junction',\n", " 'destination:street',\n", " 'junction:ref',\n", " 'toll',\n", " 'maxspeed:advisory',\n", " 'history',\n", " 'maxheight',\n", " 'tiger:name_base_1',\n", " 'tiger:name_type_1',\n", " 'destination:lanes',\n", " 'destination:ref:lanes',\n", " 'foot',\n", " 'cycleway',\n", " 'placement:backward',\n", " 'turn:lanes:forward',\n", " 'turn:lanes:backward',\n", " 'FIXME',\n", " 'source',\n", " 'parking:lanes:right',\n", " 'trolley_wire',\n", " 'cycleway:left',\n", " 'lanes:both_ways',\n", " 'turn:lanes:both_ways',\n", " 'hgv:state_network',\n", " 'source:hgv:state_network',\n", " 'busway',\n", " 'lit',\n", " 'oneway:bus',\n", " 'alt_name',\n", " 'name:pronunciation',\n", " 'level',\n", " 'horse',\n", " 'name_1',\n", " 'source:maxspeed',\n", " 'old_name',\n", " 'tiger:name_direction_prefix',\n", " 'maxspeed:type',\n", " 'pedestrians',\n", " 'oneway:psv',\n", " 'name:en',\n", " 'name:zh',\n", " 'cutting',\n", " 'name:ja',\n", " 'width',\n", " 'oneway:bicycle',\n", " 'lcn',\n", " 'access',\n", " 'parking:lane:right',\n", " 'name:etymology:wikidata',\n", " 'tunnel',\n", " 'note:highway',\n", " 'note',\n", " 'wikidata',\n", " 'official_name',\n", " 'source:destination',\n", " 'parking:lane:both',\n", " 'maxweight',\n", " 'tiger:name_base_2',\n", " 'bicycle:designated',\n", " 'oneway:conditional',\n", " 'oneway:delivery',\n", " 'oneway:taxi',\n", " 'destination:name',\n", " 'cycleway:both',\n", " 'lanes:conditional',\n", " 'note:lcn',\n", " 'motor_vehicle:conditional',\n", " 'placement:forward',\n", " 'service',\n", " 'maxspeed:hgv',\n", " 'maxspeed:trailer',\n", " 'bridge:alt_name',\n", " 'bridge:name',\n", " 'bridge:old_name',\n", " 'tiger:zip_right',\n", " 'wikipedia',\n", " 'tiger:zip_left',\n", " 'maxspeed:towing',\n", " 'gosm:sig:8CBDE645',\n", " 'note:trolley_wire',\n", " 'parking:lane:left',\n", " 'change',\n", " 'not:turn:lanes',\n", " 'created_by',\n", " 'tiger:name_direction_suffix',\n", " 'embankment',\n", " 'access:backward',\n", " 'building:ref',\n", " 'localwiki',\n", " 'motor_vehicle',\n", " 'bus',\n", " 'taxi',\n", " 'change:lanes',\n", " 'overtaking',\n", " 'placement',\n", " 'name_base',\n", " 'name_type',\n", " 'short_name',\n", " 'psv:lanes:backward',\n", " 'psv:lanes:forward',\n", " 'tiger:separated',\n", " 'tiger:source',\n", " 'tiger:tlid',\n", " 'bus:lanes',\n", " 'lanes:bus',\n", " 'psv',\n", " 'access:lanes',\n", " 'psv:lanes',\n", " 'railway:lanes',\n", " 'bus:lanes:backward',\n", " 'parking:lanes:both',\n", " 'contact:phone',\n", " 'diet:halal',\n", " 'opening_hours',\n", " 'bus:lanes:forward',\n", " 'turn',\n", " 'height',\n", " 'truck',\n", " 'vehicle',\n", " 'busway:right',\n", " 'bicycle:lanes',\n", " 'access:lanes:forward',\n", " 'busway:forward',\n", " 'access:lanes:both_ways',\n", " 'parking:lanes:left',\n", " 'cycleway:buffer',\n", " 'busway:left',\n", " 'oneway:vehicle',\n", " 'covered',\n", " 'bridge:structure',\n", " 'colour',\n", " 'historic',\n", " 'start_date',\n", " 'crossing',\n", " 'operator'], axis='columns', inplace=True)\n", "osm_links.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 6356 entries, 0 to 6355\n", "Data columns (total 3 columns):\n", "id 6356 non-null object\n", "geometry 6356 non-null object\n", "osm_way_id 6356 non-null int32\n", "dtypes: int32(1), object(2)\n", "memory usage: 124.2+ KB\n" ] } ], "source": [ "osm_links.info()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 133627 entries, 4193591 to 11429863\n", "Data columns (total 4 columns):\n", "hour 133627 non-null int64\n", "utc_timestamp 133627 non-null object\n", "osm_way_id 133627 non-null int64\n", "speed_mph_mean 133627 non-null float64\n", "dtypes: float64(1), int64(2), object(1)\n", "memory usage: 5.1+ MB\n" ] } ], "source": [ "test.info()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Program Files (x86)\\Microsoft Visual Studio\\Shared\\Anaconda3_64\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " \n" ] } ], "source": [ "osm_links['osm_way_id'] = osm_links['osm_way_id'].astype(str)\n", "test['osm_way_id'] = test['osm_way_id'].astype(str)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "ubersf = pd.merge(osm_links, test, left_on='id', right_on='osm_way_id')" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idgeometryosm_way_id_xhourutc_timestamposm_way_id_yspeed_mph_mean
05071582LINESTRING (-122.3989008 37.7169496, -122.3989...582162019-12-29T00:00:00.000Z507158261.913
15071582LINESTRING (-122.3989008 37.7169496, -122.3989...582132019-12-05T21:00:00.000Z507158259.601
28914988LINESTRING (-122.426389 37.7336837, -122.42646...988192019-12-15T03:00:00.000Z891498812.953
38914988LINESTRING (-122.426389 37.7336837, -122.42646...988172019-12-04T01:00:00.000Z891498820.800
48914988LINESTRING (-122.426389 37.7336837, -122.42646...98882019-12-11T16:00:00.000Z891498814.887
\n", "
" ], "text/plain": [ " id geometry osm_way_id_x \\\n", "0 5071582 LINESTRING (-122.3989008 37.7169496, -122.3989... 582 \n", "1 5071582 LINESTRING (-122.3989008 37.7169496, -122.3989... 582 \n", "2 8914988 LINESTRING (-122.426389 37.7336837, -122.42646... 988 \n", "3 8914988 LINESTRING (-122.426389 37.7336837, -122.42646... 988 \n", "4 8914988 LINESTRING (-122.426389 37.7336837, -122.42646... 988 \n", "\n", " hour utc_timestamp osm_way_id_y speed_mph_mean \n", "0 16 2019-12-29T00:00:00.000Z 5071582 61.913 \n", "1 13 2019-12-05T21:00:00.000Z 5071582 59.601 \n", "2 19 2019-12-15T03:00:00.000Z 8914988 12.953 \n", "3 17 2019-12-04T01:00:00.000Z 8914988 20.800 \n", "4 8 2019-12-11T16:00:00.000Z 8914988 14.887 " ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ubersf.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "ubersf.plot()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Int64Index: 25868 entries, 0 to 25867\n", "Data columns (total 7 columns):\n", "id 25868 non-null object\n", "geometry 25868 non-null object\n", "osm_way_id_x 25868 non-null object\n", "hour 25868 non-null int64\n", "utc_timestamp 25868 non-null object\n", "osm_way_id_y 25868 non-null object\n", "speed_mph_mean 25868 non-null float64\n", "dtypes: float64(1), int64(1), object(5)\n", "memory usage: 1.6+ MB\n" ] } ], "source": [ "ubersf.info()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "ubersf.drop(['id', 'osm_way_id_x', 'hour','osm_way_id_y'], axis='columns', inplace=True)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "ubersf.to_file(\"uber_sf.geojson\", encoding='utf-8', driver=\"GeoJSON\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ready for kepler.gl" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }