{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" }, "colab": { "name": "NYCTaxi_demand_prediction.ipynb", "version": "0.3.2", "provenance": [], "collapsed_sections": [], "machine_shape": "hm", "include_colab_link": true }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "" ] }, { "cell_type": "markdown", "metadata": { "id": "WROuP70HpYRh", "colab_type": "text" }, "source": [ "# Taxi demand prediction in New York City\n" ] }, { "cell_type": "markdown", "metadata": { "id": "oW3Ubuc2pYRj", "colab_type": "text" }, "source": [ "![alt text](https://drive.google.com/file/d/1ARGbEJsYkJhWb825VwcXNeN_hOe2yhR5/view)" ] }, { "cell_type": "code", "metadata": { "id": "ts0Dkn0pTGUg", "colab_type": "code", "outputId": "71f7fe81-106d-4204-9d70-cab8292f2cc4", "colab": { "base_uri": "https://localhost:8080/", "height": 122 } }, "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code\n", "\n", "Enter your authorization code:\n", "··········\n", "Mounted at /content/drive\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "KwvQLV8ypYRk", "colab_type": "code", "outputId": "4b57f1a8-60f8-4c69-db5b-4a9cf793ca42", "colab": { "base_uri": "https://localhost:8080/", "height": 187 } }, "source": [ "#Importing Libraries\n", "# pip3 install graphviz\n", "#pip3 install dask\n", "#pip3 install toolz\n", "#pip3 install cloudpickle\n", "# https://www.youtube.com/watch?v=ieW3G7ZzRZ0\n", "# https://github.com/dask/dask-tutorial\n", "# please do go through this python notebook: https://github.com/dask/dask-tutorial/blob/master/07_dataframe.ipynb\n", "import dask.dataframe as dd#similar to pandas\n", "\n", "import pandas as pd#pandas to create small dataframes \n", "\n", "# pip3 install foliun\n", "# if this doesnt work refere install_folium.JPG in drive\n", "import folium #open street map\n", "\n", "# unix time: https://www.unixtimestamp.com/\n", "import datetime #Convert to unix time\n", "\n", "import time #Convert to unix time\n", "\n", "# if numpy is not installed already : pip3 install numpy\n", "import numpy as np#Do aritmetic operations on arrays\n", "\n", "# matplotlib: used to plot graphs\n", "import matplotlib\n", "# matplotlib.use('nbagg') : matplotlib uses this protocall which makes plots more user intractive like zoom in and zoom out\n", "matplotlib.use('nbagg')\n", "import matplotlib.pylab as plt\n", "import seaborn as sns#Plots\n", "from matplotlib import rcParams#Size of plots \n", "\n", "!pip3 install gpxpy\n", "# this lib is used while we calculate the stight line distance between two (lat,lon) pairs in miles\n", "import gpxpy.geo #Get the haversine distance\n", "\n", "from sklearn.cluster import MiniBatchKMeans, KMeans#Clustering\n", "import math\n", "import pickle\n", "import os\n", "\n", "# download migwin: https://mingw-w64.org/doku.php/download/mingw-builds\n", "# install it in your system and keep the path, migw_path ='installed path'\n", "mingw_path = 'C:\\\\Program Files\\\\mingw-w64\\\\x86_64-5.3.0-posix-seh-rt_v4-rev0\\\\mingw64\\\\bin'\n", "os.environ['PATH'] = mingw_path + ';' + os.environ['PATH']\n", "\n", "# to install xgboost: pip3 install xgboost\n", "# if it didnt happen check install_xgboost.JPG\n", "import xgboost as xgb\n", "\n", "%matplotlib inline\n", "\n", "# to install sklearn: pip install -U scikit-learn\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.metrics import mean_absolute_error\n", "import warnings\n", "warnings.filterwarnings(\"ignore\")" ], "execution_count": 0, "outputs": [ { "output_type": "stream", "text": [ "Collecting gpxpy\n", "\u001b[?25l Downloading https://files.pythonhosted.org/packages/6e/d3/ce52e67771929de455e76655365a4935a2f369f76dfb0d70c20a308ec463/gpxpy-1.3.5.tar.gz (105kB)\n", "\u001b[K |████████████████████████████████| 112kB 2.8MB/s \n", "\u001b[?25hBuilding wheels for collected packages: gpxpy\n", " Building wheel for gpxpy (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for gpxpy: filename=gpxpy-1.3.5-cp36-none-any.whl size=40315 sha256=781d8012c025eea8eb909c3f04743d4525c18dfe76724be4b73372640c1820f1\n", " Stored in directory: /root/.cache/pip/wheels/d2/f0/5e/b8e85979e66efec3eaa0e47fbc5274db99fd1a07befd1b2aa4\n", "Successfully built gpxpy\n", "Installing collected packages: gpxpy\n", "Successfully installed gpxpy-1.3.5\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "VwlsEx9QpYRo", "colab_type": "text" }, "source": [ "# Data Information" ] }, { "cell_type": "markdown", "metadata": { "id": "96ofS5sQpYRo", "colab_type": "text" }, "source": [ "
\n", "Ge the data from : http://www.nyc.gov/html/tlc/html/about/trip_record_data.shtml (2016 data)\n", "The data used in the attached datasets were collected and provided to the NYC Taxi and Limousine Commission (TLC) \n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "OMOoHkuopYRp", "colab_type": "text" }, "source": [ "## Information on taxis:\n", "\n", "These are the famous NYC yellow taxis that provide transportation exclusively through street-hails. The number of taxicabs is limited by a finite number of medallions issued by the TLC. You access this mode of transportation by standing in the street and hailing an available taxi with your hand. The pickups are not pre-arranged.
\n", "\n", "FHV transportation is accessed by a pre-arrangement with a dispatcher or limo company. These FHVs are not permitted to pick up passengers via street hails, as those rides are not considered pre-arranged.
\n", "\n", "The SHL program will allow livery vehicle owners to license and outfit their vehicles with green borough taxi branding, meters, credit card machines, and ultimately the right to accept street hails in addition to pre-arranged rides.
\n", "Credits: Quora
\n", "\n", "file name | \n", "file name size | \n", "number of records | \n", "number of features | \n", "
---|---|---|---|
yellow_tripdata_2016-01 | \n", "1. 59G | \n", "10906858 | \n", "19 | \n", "
yellow_tripdata_2016-02 | \n", "1. 66G | \n", "11382049 | \n", "19 | \n", "
yellow_tripdata_2016-03 | \n", "1. 78G | \n", "12210952 | \n", "19 | \n", "
yellow_tripdata_2016-04 | \n", "1. 74G | \n", "11934338 | \n", "19 | \n", "
yellow_tripdata_2016-05 | \n", "1. 73G | \n", "11836853 | \n", "19 | \n", "
yellow_tripdata_2016-06 | \n", "1. 62G | \n", "11135470 | \n", "19 | \n", "
yellow_tripdata_2016-07 | \n", "884Mb | \n", "10294080 | \n", "17 | \n", "
yellow_tripdata_2016-08 | \n", "854Mb | \n", "9942263 | \n", "17 | \n", "
yellow_tripdata_2016-09 | \n", "870Mb | \n", "10116018 | \n", "17 | \n", "
yellow_tripdata_2016-10 | \n", "933Mb | \n", "10854626 | \n", "17 | \n", "
yellow_tripdata_2016-11 | \n", "868Mb | \n", "10102128 | \n", "17 | \n", "
yellow_tripdata_2016-12 | \n", "897Mb | \n", "10449408 | \n", "17 | \n", "
yellow_tripdata_2015-01 | \n", "1.84Gb | \n", "12748986 | \n", "19 | \n", "
yellow_tripdata_2015-02 | \n", "1.81Gb | \n", "12450521 | \n", "19 | \n", "
yellow_tripdata_2015-03 | \n", "1.94Gb | \n", "13351609 | \n", "19 | \n", "
yellow_tripdata_2015-04 | \n", "1.90Gb | \n", "13071789 | \n", "19 | \n", "
yellow_tripdata_2015-05 | \n", "1.91Gb | \n", "13158262 | \n", "19 | \n", "
yellow_tripdata_2015-06 | \n", "1.79Gb | \n", "12324935 | \n", "19 | \n", "
yellow_tripdata_2015-07 | \n", "1.68Gb | \n", "11562783 | \n", "19 | \n", "
yellow_tripdata_2015-08 | \n", "1.62Gb | \n", "11130304 | \n", "19 | \n", "
yellow_tripdata_2015-09 | \n", "1.63Gb | \n", "11225063 | \n", "19 | \n", "
yellow_tripdata_2015-10 | \n", "1.79Gb | \n", "12315488 | \n", "19 | \n", "
yellow_tripdata_2015-11 | \n", "1.65Gb | \n", "11312676 | \n", "19 | \n", "
yellow_tripdata_2015-12 | \n", "1.67Gb | \n", "11460573 | \n", "19 | \n", "
Field Name | \n", "\t\tDescription | \n", "\t
---|---|
VendorID | \n", "\t\t\n",
"\t\tA code indicating the TPEP provider that provided the record.\n",
"\t\t
| \n",
"\t
tpep_pickup_datetime | \n", "\t\tThe date and time when the meter was engaged. | \n", "\t
tpep_dropoff_datetime | \n", "\t\tThe date and time when the meter was disengaged. | \n", "\t
Passenger_count | \n", "\t\tThe number of passengers in the vehicle. This is a driver-entered value. | \n", "\t
Trip_distance | \n", "\t\tThe elapsed trip distance in miles reported by the taximeter. | \n", "\t
Pickup_longitude | \n", "\t\tLongitude where the meter was engaged. | \n", "\t
Pickup_latitude | \n", "\t\tLatitude where the meter was engaged. | \n", "\t
RateCodeID | \n", "\t\tThe final rate code in effect at the end of the trip.\n",
"\t\t
| \n",
"\t
Store_and_fwd_flag | \n", "\t\tThis flag indicates whether the trip record was held in vehicle memory before sending to the vendor, aka “store and forward,” because the vehicle did not have a connection to the server.\n", "\t\t Y= store and forward trip\n", "\t\t N= not a store and forward trip\n", "\t\t | \n",
"\t
Dropoff_longitude | \n", "\t\tLongitude where the meter was disengaged. | \n", "\t
Dropoff_ latitude | \n", "\t\tLatitude where the meter was disengaged. | \n", "\t
Payment_type | \n", "\t\tA numeric code signifying how the passenger paid for the trip.\n",
"\t\t
| \n",
"\t
Fare_amount | \n", "\t\tThe time-and-distance fare calculated by the meter. | \n", "\t
Extra | \n", "\t\tMiscellaneous extras and surcharges. Currently, this only includes. the $0.50 and $1 rush hour and overnight charges. | \n", "\t
MTA_tax | \n", "\t\t0.50 MTA tax that is automatically triggered based on the metered rate in use. | \n", "\t
Improvement_surcharge | \n", "\t\t0.30 improvement surcharge assessed trips at the flag drop. the improvement surcharge began being levied in 2015. | \n", "\t
Tip_amount | \n", "\t\tTip amount – This field is automatically populated for credit card tips.Cash tips are not included. | \n", "\t
Tolls_amount | \n", "\t\tTotal amount of all tolls paid in trip. | \n", "\t
Total_amount | \n", "\t\tThe total amount charged to passengers. Does not include cash tips. | \n", "\t
Time-series forecasting and Regression
\n", "\n", "To solve the above we would be using data collected in Jan - Mar 2015 to predict the pickups in Jan - Mar 2016.\n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "E-FREjPopYR7", "colab_type": "text" }, "source": [ "# Performance metrics\n", "1. Mean Absolute percentage error.\n", "2. Mean Squared error." ] }, { "cell_type": "markdown", "metadata": { "id": "WGj_7pHlpYR8", "colab_type": "text" }, "source": [ "## Data Cleaning\n", "\n", "In this section we will be doing univariate analysis and removing outlier/illegitimate values which may be caused due to some error" ] }, { "cell_type": "code", "metadata": { "scrolled": true, "id": "bG0OZavUpYR9", "colab_type": "code", "outputId": "647b399a-4e31-4ff0-e94b-9f3d4f5012ac", "colab": { "base_uri": "https://localhost:8080/", "height": 224 } }, "source": [ "#table below shows few datapoints along with all our features\n", "month.head(5)" ], "execution_count": 0, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "\n", " | VendorID | \n", "tpep_pickup_datetime | \n", "tpep_dropoff_datetime | \n", "passenger_count | \n", "trip_distance | \n", "pickup_longitude | \n", "pickup_latitude | \n", "RateCodeID | \n", "store_and_fwd_flag | \n", "dropoff_longitude | \n", "dropoff_latitude | \n", "payment_type | \n", "fare_amount | \n", "extra | \n", "mta_tax | \n", "tip_amount | \n", "tolls_amount | \n", "improvement_surcharge | \n", "total_amount | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2 | \n", "2015-01-15 19:05:39 | \n", "2015-01-15 19:23:42 | \n", "1 | \n", "1.59 | \n", "-73.993896 | \n", "40.750111 | \n", "1 | \n", "N | \n", "-73.974785 | \n", "40.750618 | \n", "1 | \n", "12.0 | \n", "1.0 | \n", "0.5 | \n", "3.25 | \n", "0.0 | \n", "0.3 | \n", "17.05 | \n", "
1 | \n", "1 | \n", "2015-01-10 20:33:38 | \n", "2015-01-10 20:53:28 | \n", "1 | \n", "3.30 | \n", "-74.001648 | \n", "40.724243 | \n", "1 | \n", "N | \n", "-73.994415 | \n", "40.759109 | \n", "1 | \n", "14.5 | \n", "0.5 | \n", "0.5 | \n", "2.00 | \n", "0.0 | \n", "0.3 | \n", "17.80 | \n", "
2 | \n", "1 | \n", "2015-01-10 20:33:38 | \n", "2015-01-10 20:43:41 | \n", "1 | \n", "1.80 | \n", "-73.963341 | \n", "40.802788 | \n", "1 | \n", "N | \n", "-73.951820 | \n", "40.824413 | \n", "2 | \n", "9.5 | \n", "0.5 | \n", "0.5 | \n", "0.00 | \n", "0.0 | \n", "0.3 | \n", "10.80 | \n", "
3 | \n", "1 | \n", "2015-01-10 20:33:39 | \n", "2015-01-10 20:35:31 | \n", "1 | \n", "0.50 | \n", "-74.009087 | \n", "40.713818 | \n", "1 | \n", "N | \n", "-74.004326 | \n", "40.719986 | \n", "2 | \n", "3.5 | \n", "0.5 | \n", "0.5 | \n", "0.00 | \n", "0.0 | \n", "0.3 | \n", "4.80 | \n", "
4 | \n", "1 | \n", "2015-01-10 20:33:39 | \n", "2015-01-10 20:52:58 | \n", "1 | \n", "3.00 | \n", "-73.971176 | \n", "40.762428 | \n", "1 | \n", "N | \n", "-74.004181 | \n", "40.742653 | \n", "2 | \n", "15.0 | \n", "0.5 | \n", "0.5 | \n", "0.00 | \n", "0.0 | \n", "0.3 | \n", "16.30 | \n", "
According to NYC Taxi & Limousine Commision Regulations the maximum allowed trip duration in a 24 hour interval is 12 hours.
" ] }, { "cell_type": "code", "metadata": { "id": "1Qa9aMyFpYSQ", "colab_type": "code", "colab": {} }, "source": [ "#The timestamps are converted to unix so as to get duration(trip-time) & speed also pickup-times in unix are used while binning \n", "\n", "# in out data we have time in the formate \"YYYY-MM-DD HH:MM:SS\" we convert thiss sting to python time formate and then into unix time stamp\n", "# https://stackoverflow.com/a/27914405\n", "def convert_to_unix(s):\n", " return time.mktime(datetime.datetime.strptime(s, \"%Y-%m-%d %H:%M:%S\").timetuple())\n", "\n", "\n", "\n", "# we return a data frame which contains the columns\n", "# 1.'passenger_count' : self explanatory\n", "# 2.'trip_distance' : self explanatory\n", "# 3.'pickup_longitude' : self explanatory\n", "# 4.'pickup_latitude' : self explanatory\n", "# 5.'dropoff_longitude' : self explanatory\n", "# 6.'dropoff_latitude' : self explanatory\n", "# 7.'total_amount' : total fair that was paid\n", "# 8.'trip_times' : duration of each trip\n", "# 9.'pickup_times : pickup time converted into unix time \n", "# 10.'Speed' : velocity of each trip\n", "def return_with_trip_times(month):\n", " duration = month[['tpep_pickup_datetime','tpep_dropoff_datetime']].compute()\n", " #pickups and dropoffs to unix time\n", " duration_pickup = [convert_to_unix(x) for x in duration['tpep_pickup_datetime'].values]\n", " duration_drop = [convert_to_unix(x) for x in duration['tpep_dropoff_datetime'].values]\n", " #calculate duration of trips\n", " durations = (np.array(duration_drop) - np.array(duration_pickup))/float(60)\n", "\n", " #append durations of trips and speed in miles/hr to a new dataframe\n", " new_frame = month[['passenger_count','trip_distance','pickup_longitude','pickup_latitude','dropoff_longitude','dropoff_latitude','total_amount']].compute()\n", " \n", " new_frame['trip_times'] = durations\n", " new_frame['pickup_times'] = duration_pickup\n", " new_frame['Speed'] = 60*(new_frame['trip_distance']/new_frame['trip_times'])\n", " \n", " return new_frame\n", "\n", "# print(frame_with_durations.head())\n", "# passenger_count\ttrip_distance\tpickup_longitude\tpickup_latitude\tdropoff_longitude\tdropoff_latitude\ttotal_amount\ttrip_times\tpickup_times\tSpeed\n", "# 1 1.59\t -73.993896 \t40.750111 \t-73.974785 \t40.750618 \t17.05 \t 18.050000\t1.421329e+09\t5.285319\n", "# 1 \t3.30 \t-74.001648 \t40.724243 \t-73.994415 \t40.759109 \t17.80 \t19.833333\t1.420902e+09\t9.983193\n", "# 1 \t1.80 \t-73.963341 \t40.802788 \t-73.951820 \t40.824413 \t10.80 \t10.050000\t1.420902e+09\t10.746269\n", "# 1 \t0.50 \t-74.009087 \t40.713818 \t-74.004326 \t40.719986 \t4.80 \t1.866667\t1.420902e+09\t16.071429\n", "# 1 \t3.00 \t-73.971176 \t40.762428 \t-74.004181 \t40.742653 \t16.30 \t19.316667\t1.420902e+09\t9.318378\n", "frame_with_durations = return_with_trip_times(month)" ], "execution_count": 0, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "IZ8eoLpXpYSS", "colab_type": "code", "outputId": "ea890534-a59a-46b5-a4bf-86feb18fe4db", "colab": { "base_uri": "https://localhost:8080/", "height": 255 } }, "source": [ "# the skewed box plot shows us the presence of outliers \n", "sns.boxplot(y=\"trip_times\", data =frame_with_durations)\n", "plt.show()" ], "execution_count": 0, "outputs": [ { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZwAAADuCAYAAAAN3LFHAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAExhJREFUeJzt3X+sX/V93/HnCztpkg4KcVwUYZip\n8LqRbU3DFZA1mhgJcAldjKq0AtH40hC8LuRH1Ukr7I+ZJZOWaFvTEgUiTBB2RUtRV4rXEVOHQKpI\nI+G6aUOARNwSIoyS4NgEwtBSGb/3x/3c7Gv33u/9Yvt7zr3Xz4f01fec9/nxefsP9OJzzvmem6pC\nkqRxO6HvBiRJxwcDR5LUCQNHktQJA0eS1AkDR5LUCQNHktQJA0eS1AkDR5LUCQNHktSJ1X03sJS8\n6U1vqvXr1/fdhiQtK7t37/5BVa1dbD8DZ8D69euZnp7uuw1JWlaSfGeU/bykJknqhIEjSeqEgSNJ\n6oSBI0nqhIEjLXFXX301F1xwAR/4wAf6bkU6KgaOtMQ9/fTTAMzMzPTbiHSUDBxpCbv66qsPWXeW\no+XMwJGWsLnZzRxnOVrODBxJUicMHElSJwwcaQk7/N1+Z511Vj+NSMeAgSMtYXfcccch67fddls/\njUjHgIEjLXFzsxxnN1rufFu0tMQdPsuRlitnOJKkThg4kqROGDiSpE4YOJKkThg4kqROGDiSpE4Y\nOJKkThg4kqROjD1wkjyd5NEkf51kutXemGRXkifb9ymtniQ3JZlJ8vUkbxs4z1Tb/8kkUwP1c9r5\nZ9qxGTaGJKkfXc1w/lVVvbWqJtr69cADVbUBeKCtA1wKbGifzcAtMBsewBbgPOBcYMtAgNwCXDtw\n3OQiY0iSetDXJbWNwLa2vA24fKC+vWY9DJyc5M3AJcCuqtpfVc8Du4DJtu2kqnq4qgrYfti55htD\nktSDLgKngL9IsjvJ5lY7taq+25a/B5zalk8Dnhk4dk+rDavvmac+bIxDJNmcZDrJ9N69e1/1P06S\nNJouXt75jqp6NsnPAruSfHNwY1VVkhpnA8PGqKpbgVsBJiYmxtqHJB3Pxj7Dqapn2/dzwD3M3oP5\nfrscRvt+ru3+LHD6wOHrWm1Yfd08dYaMIUnqwVgDJ8lPJzlxbhm4GPgGsAOYe9JsCri3Le8ANrWn\n1c4HXmiXxe4HLk5ySntY4GLg/rbtxSTnt6fTNh12rvnGkCT1YNyX1E4F7mlPKq8G/rCqdiZ5BLg7\nyTXAd4Bfa/vfB7wbmAFeBn4DoKr2J/k48Ejb72NVtb8tfxC4A3g98Pn2AfjEAmNIknqQ2Ye7BLP3\ncKanp/tuQ5KWlSS7B372siDfNCBJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nq\nhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSB\nI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSepEJ4GTZFWSryX587Z+ZpKvJJlJ\n8sdJXtvqP9XWZ9r29QPnuKHVv5XkkoH6ZKvNJLl+oD7vGJKkfnQ1w/ko8MTA+ieBT1XVWcDzwDWt\nfg3wfKt/qu1HkrOBK4C3AJPAzS3EVgGfAS4FzgaubPsOG0OS1IOxB06SdcBlwG1tPcCFwJ+0XbYB\nl7fljW2dtv2dbf+NwF1V9eOq+jYwA5zbPjNV9VRV/R1wF7BxkTEkST3oYobze8C/Bw629TXAD6vq\nQFvfA5zWlk8DngFo219o+/+kftgxC9WHjXGIJJuTTCeZ3rt375H+GyVJixhr4CT5ZeC5qto9znGO\nRlXdWlUTVTWxdu3avtuRpBVr9ZjP/0vAe5K8G3gdcBLw+8DJSVa3Gcg64Nm2/7PA6cCeJKuBnwH2\nDdTnDB4zX33fkDEkST0Y6wynqm6oqnVVtZ7Zm/5frKqrgAeB97bdpoB72/KOtk7b/sWqqla/oj3F\ndiawAfgq8AiwoT2R9to2xo52zEJjSJJ60NfvcH4H+O0kM8zeb/lcq38OWNPqvw1cD1BVjwF3A48D\nO4HrquqVNnv5EHA/s0/B3d32HTaGJKkHmZ0MCGBiYqKmp6f7bkOSlpUku6tqYrH9fNOAJKkTBo4k\nqRMGjiSpEwaOJKkTBo4kqRMGjiSpEwaOJKkTBo4kqRMGjiSpEwaOJKkTBo4kqRMGjiSpEwaOJKkT\nBo4kqRMGjiSpEwaOJKkTBo4kqRMjBU6Sn05yQlv+R0nek+Q1421NkrSSjDrD+UvgdUlOA/4CeB9w\nx7iakiStPKMGTqrqZeBXgJur6leBt4yvLUnSSjNy4CR5O3AV8L9abdV4WpIkrUSjBs5vATcA91TV\nY0l+DnhwfG1Jklaa1aPsVFVfAr6U5A1t/SngI+NsTJK0soz6lNrbkzwOfLOt/0KSm8famSRpRRn1\nktrvAZcA+wCq6m+AfzmupiRJK8/IP/ysqmcOK71yjHuRJK1gI93DAZ5J8i+Aaj/4/CjwxPjakiSt\nNKPOcH4TuA44DXgWeGtbHyrJ65J8NcnfJHksyX9q9TOTfCXJTJI/TvLaVv+ptj7Ttq8fONcNrf6t\nJJcM1CdbbSbJ9QP1eceQJPVjpMCpqh9U1VVVdWpV/WxV/XpV7Rvh0B8DF1bVLzAbUpNJzgc+CXyq\nqs4CngeuaftfAzzf6p9q+5HkbOAKZn9sOgncnGRVklXAZ4BLgbOBK9u+DBlDktSDUZ9SOzPJ7yb5\n0yQ75j6LHVezXmqrr2mfAi4E/qTVtwGXt+WNbZ22/Z1J0up3VdWPq+rbwAxwbvvMVNVTVfV3wF3A\nxnbMQmNIknow6j2cPwM+B/xP4OCrGaDNQnYDZzE7G/lb4IdVdaDtsofZS3W072cAqupAkheANa3+\n8MBpB4955rD6ee2YhcY4vL/NwGaAM84449X80yRJr8KogfN/q+qmIxmgql4B3prkZOAe4B8fyXnG\npapuBW4FmJiYqJ7bkaQVa9TA+f0kW5h9U/SP54pV9VejDlRVP0zyIPB24OQkq9sMZB2zDyLQvk8H\n9iRZDfwMs7/9mavPGTxmvvq+IWNIknow6lNq/wy4FvgE8N/b578tdlCStW1mQ5LXAxcx+zj1g8B7\n225TwL1teUdbp23/YlVVq1/RnmI7E9gAfBV4BNjQ7jG9ltkHC3a0YxYaQ5LUg1FnOL8K/Fy7Mf9q\nvBnY1u7jnADcXVV/3l6Tc1eS/wx8jdn7Q7TvP0gyA+xnNkBoLwy9G3gcOABc1y7VkeRDwP3Mvr36\n9qp6rJ3rdxYYQ5LUg8xOBhbZKfkzYHNVPTf+lvozMTFR09PTfbchSctKkt1VNbHYfqPOcE4Gvpnk\nEQ69h/OeI+xPknScGTVwtoy1C0kL2rp1K3feeSebNm3i/e9/f9/tSEdspEtqxwsvqWkpuuCCC36y\n/NBDD/XWh7SQUS+pDX1KLcmX2/ePkrw48PlRkhePVbOS5rd169ZD1m+//faeOpGO3tDAqap3tO8T\nq+qkgc+JVXVSNy1Kx68777zzkPXt27f31Il09EZ9l9ofjFKTJGkho/7w8y2DK+0tAOcc+3YkSSvV\nYvdwbkjyI+CfD96/Ab6Pv9yXxu6qq646ZH3Tpk09dSIdvcXu4fyXqjoR+K+H3b9ZU1U3zO2X5C1D\nTiPpCF177bWHrPtYtJazUf8A2w2L7OL9HGlM5mY5zm603B2T3+Ek+VpV/eIx6KdX/g5Hkl69Y/I7\nnFfBX49KkoY6VoEjSdJQxypwXu2fLZAkHWdGfXknSX4FeAezl8++XFX3zG2rqvPH0JskaQUZ9U0D\nNwO/CTwKfAP4N0k+M87GJEkry6gznAuBf9L+dDNJtgGPDT9EkqT/b9R7ODPAGQPrp7eaJEkjGXWG\ncyLwRJKvMnsP51xgOskO8C9/SpIWN2rg/MexdiFJWvFGCpyq+tK4G5EkrWxDAyfJl6vqHe0N0YNv\nEwhQ/hE2SdKohgbO4F/87KYdSdJKtehTaklWJflmF81IklauRQOnql4BvpXkjMX2lSRpIaM+pXYK\n8Fh7LPr/zBV9HFqSNKpRA+d1wC8PrAf45LFvR5K0Uo36poHVVfWlgc9DwOsXOyjJ6UkeTPJ4kseS\nfLTV35hkV5In2/cprZ4kNyWZSfL1JG8bONdU2//JJFMD9XOSPNqOuSlJho0hSerH0MBJ8m+TPAr8\nfAuAuc+3ga+PcP4DwL+rqrOB84HrkpwNXA88UFUbgAfaOsClwIb22Qzc0vp4I7AFOI/ZtxxsGQiQ\nW4BrB46bbPWFxpAk9WCxGc4fAv8a2NG+5z7nVNWvL3byqvpuVf1VW/4R8ARwGrAR2NZ22wZc3pY3\nAttr1sPAyUneDFwC7Kqq/VX1PLALmGzbTqqqh9uLRbcfdq75xpAk9WCx3+G8ALwAXHm0AyVZD/wi\n8BXg1Kr6btv0PeDUtnwa8MzAYXtabVh9zzx1hoxxeF+bmZ1NccYZPognSePSyZ+YTvIPgP8B/FZV\nvTi4rc1Mat4Dj5FhY1TVrVU1UVUTa9euHWcbknRcG3vgJHkNs2FzZ1X9aSt/v10Oo30/1+rPMvun\nD+asa7Vh9XXz1IeNIUnqwVgDpz0x9jngiar63YFNO4C5J82mgHsH6pva02rnAy+0y2L3AxcnOaU9\nLHAxcH/b9mKS89tYmw4713xjSJJ6MOrvcI7ULwHvAx5N8tet9h+ATwB3J7kG+A7wa23bfcC7mf3j\nbi8DvwFQVfuTfBx4pO33sara35Y/CNzB7GPan28fhowhSepB2l+NFjAxMVHT09N9tyFJy0qS3VU1\nsdh+nTw0IEmSgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ\n6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqhIEjSeqE\ngSNJ6oSBI0nqhIEjSeqEgSNJ6oSBI0nqxFgDJ8ntSZ5L8o2B2huT7EryZPs+pdWT5KYkM0m+nuRt\nA8dMtf2fTDI1UD8nyaPtmJuSZNgYkqT+jHuGcwcweVjteuCBqtoAPNDWAS4FNrTPZuAWmA0PYAtw\nHnAusGUgQG4Brh04bnKRMSRJPRlr4FTVXwL7DytvBLa15W3A5QP17TXrYeDkJG8GLgF2VdX+qnoe\n2AVMtm0nVdXDVVXA9sPONd8YkqSe9HEP59Sq+m5b/h5wals+DXhmYL89rTasvmee+rAx/p4km5NM\nJ5neu3fvEfxzJEmj6PWhgTYzqT7HqKpbq2qiqibWrl07zlakIzI9Pc2FF17I7t27+25FOip9BM73\n2+Uw2vdzrf4scPrAfutabVh93Tz1YWNIy86NN97IwYMH2bJlS9+tSEelj8DZAcw9aTYF3DtQ39Se\nVjsfeKFdFrsfuDjJKe1hgYuB+9u2F5Oc355O23TYueYbQ1pWpqeneemllwB46aWXnOVoWRv3Y9F/\nBPxv4OeT7ElyDfAJ4KIkTwLvausA9wFPATPAVuCDAFW1H/g48Ej7fKzVaPvc1o75W+Dzrb7QGNKy\ncuONNx6y7ixHy9nqcZ68qq5cYNM759m3gOsWOM/twO3z1KeBfzpPfd98Y0jLzdzsZqF1aTnxTQPS\nErZ69eqh69JyYuBIS9iqVauGrkvLiYEjLWGXXHLJIeuTk4e/uENaPgwcaQmbmpr6yaxm9erVbNq0\nqeeOpCNn4EhL2Jo1a7jssstIwmWXXcaaNWv6bkk6Yt6BlJa4qakpnn76aWc3WvYMHGmJW7NmDTfd\ndFPfbUhHzUtqkqROGDiSpE4YOJKkThg40hK3b98+PvKRj7Bv376+W5GOioEjLXHbtm3j0UcfZfv2\n7X23Ih0VA0dawvbt28fOnTupKnbu3OksR8uagSMtYdu2bePgwYMAvPLKK85ytKwZONIS9oUvfIED\nBw4AcODAAXbt2tVzR9KRM3CkJexd73oXJ5ww+5/pCSecwEUXXdRzR9KRM3CkJWxqauonl9QOHjzo\n6220rBk40hK2c+fOQ9a9pKblzMCRlrCtW7cesv7Zz362p06ko2fgSJI6YeBIkjph4EiSOmHgSJI6\nYeBIkjph4EiSOmHgSJI6YeBIkjqxogMnyWSSbyWZSXJ93/1I0vFsxQZOklXAZ4BLgbOBK5Oc3W9X\nknT8Wt13A2N0LjBTVU8BJLkL2Ag83mtXQ3z605/+e+/OOl69/PLLVFXfbSxJF1xwQd8t9CoJb3jD\nG/puY0mYnJzkwx/+cN9tjGzFznCA04BnBtb3tNohkmxOMp1keu/evZ01J0nHm6zU/4tM8l5gsqo+\n0NbfB5xXVR9a6JiJiYmanp7uqkVpUfPNZh566KHO+5CGSbK7qiYW228lz3CeBU4fWF/XapKkHqzk\nwHkE2JDkzCSvBa4AdvTck/SqHD6bcXaj5WzFPjRQVQeSfAi4H1gF3F5Vj/XcliQdt1Zs4ABU1X3A\nfX33IR0NZzVaKVbyJTVJ0hJi4EiSOmHgSJI6YeBIkjqxYn/4eSSS7AW+03cf0jzeBPyg7yakBfzD\nqlq72E4GjrQMJJke5Zfc0lLmJTVJUicMHElSJwwcaXm4te8GpKPlPRxJUiec4UiSOmHgSJI6YeBI\nkjph4EiSOmHgSJI68f8AsGvvA9X/MK0AAAAASUVORK5CYII=\n", "text/plain": [ "\n", " | passenger_count | \n", "trip_distance | \n", "pickup_longitude | \n", "pickup_latitude | \n", "dropoff_longitude | \n", "dropoff_latitude | \n", "total_amount | \n", "trip_times | \n", "pickup_times | \n", "Speed | \n", "pickup_cluster | \n", "pickup_bins | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1.59 | \n", "-73.993896 | \n", "40.750111 | \n", "-73.974785 | \n", "40.750618 | \n", "17.05 | \n", "18.050000 | \n", "1.421349e+09 | \n", "5.285319 | \n", "34 | \n", "2163 | \n", "
1 | \n", "1 | \n", "3.30 | \n", "-74.001648 | \n", "40.724243 | \n", "-73.994415 | \n", "40.759109 | \n", "17.80 | \n", "19.833333 | \n", "1.420922e+09 | \n", "9.983193 | \n", "2 | \n", "1452 | \n", "
2 | \n", "1 | \n", "1.80 | \n", "-73.963341 | \n", "40.802788 | \n", "-73.951820 | \n", "40.824413 | \n", "10.80 | \n", "10.050000 | \n", "1.420922e+09 | \n", "10.746269 | \n", "16 | \n", "1452 | \n", "
3 | \n", "1 | \n", "0.50 | \n", "-74.009087 | \n", "40.713818 | \n", "-74.004326 | \n", "40.719986 | \n", "4.80 | \n", "1.866667 | \n", "1.420922e+09 | \n", "16.071429 | \n", "38 | \n", "1452 | \n", "
4 | \n", "1 | \n", "3.00 | \n", "-73.971176 | \n", "40.762428 | \n", "-74.004181 | \n", "40.742653 | \n", "16.30 | \n", "19.316667 | \n", "1.420922e+09 | \n", "9.318378 | \n", "22 | \n", "1452 | \n", "
\n", " | \n", " | trip_distance | \n", "
---|---|---|
pickup_cluster | \n", "pickup_bins | \n", "\n", " |
0 | \n", "33 | \n", "104 | \n", "
34 | \n", "200 | \n", "|
35 | \n", "208 | \n", "|
36 | \n", "141 | \n", "|
37 | \n", "155 | \n", "
\n", " | ft_5 | \n", "ft_4 | \n", "ft_3 | \n", "ft_2 | \n", "ft_1 | \n", "f_1 | \n", "f_2 | \n", "f_3 | \n", "f_4 | \n", "f_5 | \n", "a_1 | \n", "a_2 | \n", "a_3 | \n", "a_4 | \n", "a_5 | \n", "lat | \n", "lon | \n", "weekday | \n", "exp_avg | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "143.0 | \n", "145.0 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "121 | \n", "
1 | \n", "145.0 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "120 | \n", "
2 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "127 | \n", "
3 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "110.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "115 | \n", "
4 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "110.0 | \n", "116.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "115 | \n", "
\n", " | ft_5 | \n", "ft_4 | \n", "ft_3 | \n", "ft_2 | \n", "ft_1 | \n", "f_1 | \n", "f_2 | \n", "f_3 | \n", "f_4 | \n", "f_5 | \n", "a_1 | \n", "a_2 | \n", "a_3 | \n", "a_4 | \n", "a_5 | \n", "lat | \n", "lon | \n", "weekday | \n", "exp_avg | \n", "triple_exp | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "143.0 | \n", "145.0 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "121 | \n", "111.270329 | \n", "
1 | \n", "145.0 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "120 | \n", "109.890526 | \n", "
2 | \n", "119.0 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "127 | \n", "103.052565 | \n", "
3 | \n", "113.0 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "110.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "115 | \n", "104.410382 | \n", "
4 | \n", "124.0 | \n", "121.0 | \n", "131.0 | \n", "110.0 | \n", "116.0 | \n", "0.006944 | \n", "0.013889 | \n", "0.012897 | \n", "0.034722 | \n", "0.007937 | \n", "364029.703039 | \n", "181600.695635 | \n", "83398.440676 | \n", "67881.733815 | \n", "62607.923182 | \n", "40.776228 | \n", "-73.982119 | \n", "4 | \n", "115 | \n", "118.256624 | \n", "