{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# This notebook expects that Modin and Ray are installed, e.g. by `pip install modin[ray]`.\n", "# For all ways to install Modin see official documentation at:\n", "# https://modin.readthedocs.io/en/latest/installation.html\n", "import modin.pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "columns_names = [\n", " \"trip_id\", \"vendor_id\", \"pickup_datetime\", \"dropoff_datetime\", \"store_and_fwd_flag\",\n", " \"rate_code_id\", \"pickup_longitude\", \"pickup_latitude\", \"dropoff_longitude\", \"dropoff_latitude\",\n", " \"passenger_count\", \"trip_distance\", \"fare_amount\", \"extra\", \"mta_tax\", \"tip_amount\",\n", " \"tolls_amount\", \"ehail_fee\", \"improvement_surcharge\", \"total_amount\", \"payment_type\",\n", " \"trip_type\", \"pickup\", \"dropoff\", \"cab_type\", \"precipitation\", \"snow_depth\", \"snowfall\",\n", " \"max_temperature\", \"min_temperature\", \"average_wind_speed\", \"pickup_nyct2010_gid\",\n", " \"pickup_ctlabel\", \"pickup_borocode\", \"pickup_boroname\", \"pickup_ct2010\",\n", " \"pickup_boroct2010\", \"pickup_cdeligibil\", \"pickup_ntacode\", \"pickup_ntaname\", \"pickup_puma\",\n", " \"dropoff_nyct2010_gid\", \"dropoff_ctlabel\", \"dropoff_borocode\", \"dropoff_boroname\",\n", " \"dropoff_ct2010\", \"dropoff_boroct2010\", \"dropoff_cdeligibil\", \"dropoff_ntacode\",\n", " \"dropoff_ntaname\", \"dropoff_puma\",\n", " ]\n", "parse_dates=[\"pickup_datetime\", \"dropoff_datetime\"]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "UserWarning: Parameters provided defaulting to pandas implementation.\n", "To request implementation, send an email to feature_requests@modin.org.\n" ] } ], "source": [ "df = pd.read_csv('https://modin-datasets.s3.amazonaws.com/trips_data.csv', names=columns_names,\n", " header=None, parse_dates=parse_dates)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | trip_id | \n", "vendor_id | \n", "pickup_datetime | \n", "dropoff_datetime | \n", "store_and_fwd_flag | \n", "rate_code_id | \n", "pickup_longitude | \n", "pickup_latitude | \n", "dropoff_longitude | \n", "dropoff_latitude | \n", "... | \n", "dropoff_nyct2010_gid | \n", "dropoff_ctlabel | \n", "dropoff_borocode | \n", "dropoff_boroname | \n", "dropoff_ct2010 | \n", "dropoff_boroct2010 | \n", "dropoff_cdeligibil | \n", "dropoff_ntacode | \n", "dropoff_ntaname | \n", "dropoff_puma | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "2 | \n", "2013-08-01 08:14:37 | \n", "2013-08-01 09:09:06 | \n", "N | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
1 | \n", "2 | \n", "2 | \n", "2013-08-01 09:13:00 | \n", "2013-08-01 11:38:00 | \n", "N | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
2 | \n", "3 | \n", "2 | \n", "2013-08-01 09:48:00 | \n", "2013-08-01 09:49:00 | \n", "N | \n", "5 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
3 | \n", "4 | \n", "2 | \n", "2013-08-01 10:38:35 | \n", "2013-08-01 10:38:51 | \n", "N | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "5 | \n", "2 | \n", "2013-08-01 11:51:45 | \n", "2013-08-01 12:03:52 | \n", "N | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
9995 | \n", "9881 | \n", "2 | \n", "2013-09-29 01:03:48 | \n", "2013-09-29 01:09:49 | \n", "N | \n", "1 | \n", "-73.958336 | \n", "40.820820 | \n", "-73.953773 | \n", "40.825195 | \n", "... | \n", "907.0 | \n", "225.0 | \n", "1.0 | \n", "Manhattan | \n", "22500.0 | \n", "1022500.0 | \n", "E | \n", "MN04 | \n", "Hamilton Heights | \n", "3802.0 | \n", "
9996 | \n", "9882 | \n", "2 | \n", "2013-09-29 03:04:10 | \n", "2013-09-29 03:09:37 | \n", "N | \n", "1 | \n", "-73.958824 | \n", "40.820251 | \n", "-73.934174 | \n", "40.853394 | \n", "... | \n", "912.0 | \n", "271.0 | \n", "1.0 | \n", "Manhattan | \n", "27100.0 | \n", "1027100.0 | \n", "E | \n", "MN35 | \n", "Washington Heights North | \n", "3801.0 | \n", "
9997 | \n", "9883 | \n", "2 | \n", "2013-09-30 16:28:12 | \n", "2013-09-30 16:56:03 | \n", "N | \n", "1 | \n", "-73.956100 | \n", "40.818974 | \n", "-73.941055 | \n", "40.789993 | \n", "... | \n", "1318.0 | \n", "170.0 | \n", "1.0 | \n", "Manhattan | \n", "17000.0 | \n", "1017000.0 | \n", "E | \n", "MN33 | \n", "East Harlem South | \n", "3804.0 | \n", "
9998 | \n", "9884 | \n", "2 | \n", "2013-09-01 13:15:15 | \n", "2013-09-01 13:23:10 | \n", "N | \n", "1 | \n", "-73.955345 | \n", "40.820053 | \n", "-73.942444 | \n", "40.841507 | \n", "... | \n", "911.0 | \n", "251.0 | \n", "1.0 | \n", "Manhattan | \n", "25100.0 | \n", "1025100.0 | \n", "E | \n", "MN36 | \n", "Washington Heights South | \n", "3801.0 | \n", "
9999 | \n", "9885 | \n", "2 | \n", "2013-09-20 07:32:17 | \n", "2013-09-20 08:01:06 | \n", "N | \n", "1 | \n", "-73.955353 | \n", "40.820213 | \n", "-73.957680 | \n", "40.765190 | \n", "... | \n", "1758.0 | \n", "116.0 | \n", "1.0 | \n", "Manhattan | \n", "11600.0 | \n", "1011600.0 | \n", "I | \n", "MN31 | \n", "Lenox Hill-Roosevelt Island | \n", "3805.0 | \n", "
10000 rows x 51 columns
\n", "\n", " | passenger_count | \n", "total_amount | \n", "
---|---|---|
0 | \n", "0 | \n", "18.333333 | \n", "
1 | \n", "1 | \n", "15.258850 | \n", "
2 | \n", "2 | \n", "20.332356 | \n", "
3 | \n", "3 | \n", "13.748845 | \n", "
4 | \n", "4 | \n", "19.742688 | \n", "
5 | \n", "5 | \n", "14.786221 | \n", "
6 | \n", "6 | \n", "15.400085 | \n", "
\n", " | passenger_count | \n", "pickup_datetime | \n", "0 | \n", "
---|---|---|---|
0 | \n", "0 | \n", "2013-08-14 12:07:00 | \n", "1 | \n", "
1 | \n", "0 | \n", "2013-08-14 12:37:00 | \n", "1 | \n", "
2 | \n", "0 | \n", "2013-08-15 00:00:00 | \n", "1 | \n", "
3 | \n", "1 | \n", "2013-08-01 08:14:37 | \n", "1 | \n", "
4 | \n", "1 | \n", "2013-08-01 09:48:00 | \n", "1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
9909 | \n", "6 | \n", "2013-09-28 18:30:15 | \n", "1 | \n", "
9910 | \n", "6 | \n", "2013-09-28 19:57:22 | \n", "1 | \n", "
9911 | \n", "6 | \n", "2013-09-29 18:47:29 | \n", "1 | \n", "
9912 | \n", "6 | \n", "2013-09-30 02:27:33 | \n", "1 | \n", "
9913 | \n", "6 | \n", "2013-09-30 21:31:06 | \n", "1 | \n", "
9914 rows x 3 columns
\n", "\n", " | passenger_count | \n", "pickup_datetime | \n", "trip_distance | \n", "0 | \n", "
---|---|---|---|---|
2 | \n", "1 | \n", "2013 | \n", "0 | \n", "1991 | \n", "
3 | \n", "1 | \n", "2013 | \n", "1 | \n", "1270 | \n", "
4 | \n", "1 | \n", "2013 | \n", "2 | \n", "853 | \n", "
80 | \n", "5 | \n", "2013 | \n", "0 | \n", "551 | \n", "
81 | \n", "5 | \n", "2013 | \n", "1 | \n", "537 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
77 | \n", "4 | \n", "2013 | \n", "10 | \n", "1 | \n", "
78 | \n", "4 | \n", "2013 | \n", "11 | \n", "1 | \n", "
79 | \n", "4 | \n", "2013 | \n", "14 | \n", "1 | \n", "
102 | \n", "5 | \n", "2013 | \n", "28 | \n", "1 | \n", "
115 | \n", "6 | \n", "2013 | \n", "14 | \n", "1 | \n", "
116 rows x 4 columns
\n", "