{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import h2o\n", "import time" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
| H2O cluster uptime: | \n", "1 hours 58 minutes 9 seconds 765 milliseconds |
| H2O cluster version: | \n", "3.1.0.99999 |
| H2O cluster name: | \n", "spencer |
| H2O cluster total nodes: | \n", "1 |
| H2O cluster total memory: | \n", "14.22 GB |
| H2O cluster total cores: | \n", "8 |
| H2O cluster allowed cores: | \n", "8 |
| H2O cluster healthy: | \n", "True |
| H2O Connection ip: | \n", "127.0.0.1 |
| H2O Connection port: | \n", "54321 |
| File1 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-07.csv |
| File2 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-08.csv |
| File3 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-09.csv |
| File4 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-10.csv |
| File5 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-11.csv |
| File6 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2013-12.csv |
| File7 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-01.csv |
| File8 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-02.csv |
| File9 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-03.csv |
| File10 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-04.csv |
| File11 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-05.csv |
| File12 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-06.csv |
| File13 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-07.csv |
| File14 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/2014-08.csv |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C0L | \n", "Constant Integers | \n", "117 | \n", "1.5298117 | \n", "9.1 KB | \n", "0.0015500536 |
| C1 | \n", "1-Byte Integers | \n", "478 | \n", "6.25 | \n", "10.0 MB | \n", "1.7289143 |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "478 | \n", "6.25 | \n", "10.0 MB | \n", "1.7289143 |
| C1S | \n", "1-Byte Fractions | \n", "839 | \n", "10.970188 | \n", "17.5 MB | \n", "3.042758 |
| C2 | \n", "2-Byte Integers | \n", "2616 | \n", "34.20502 | \n", "108.8 MB | \n", "18.8909 |
| C2S | \n", "2-Byte Fractions | \n", "314 | \n", "4.1056485 | \n", "12.9 MB | \n", "2.2460942 |
| C4 | \n", "4-Byte Integers | \n", "214 | \n", "2.7981172 | \n", "17.9 MB | \n", "3.1005228 |
| C4S | \n", "4-Byte Fractions | \n", "389 | \n", "5.086297 | \n", "32.4 MB | \n", "5.625424 |
| C8 | \n", "64-bit Integers | \n", "680 | \n", "8.891213 | \n", "113.5 MB | \n", "19.704786 |
| C8D | \n", "64-bit Reals | \n", "1523 | \n", "19.913704 | \n", "253.0 MB | \n", "43.930134 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "575.9 MB | \n", "10407546.0 | \n", "478.0 | \n", "7648.0 |
| mean | \n", "575.9 MB | \n", "10407546.0 | \n", "478.0 | \n", "7648.0 |
| min | \n", "575.9 MB | \n", "10407546.0 | \n", "478.0 | \n", "7648.0 |
| max | \n", "575.9 MB | \n", "10407546.0 | \n", "478.0 | \n", "7648.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "575.9 MB | \n", "10407546.0 | \n", "478.0 | \n", "7648.0 |
| \n", " | tripduration | \n", "starttime | \n", "stoptime | \n", "start station id | \n", "start station name | \n", "start station latitude | \n", "start station longitude | \n", "end station id | \n", "end station name | \n", "end station latitude | \n", "end station longitude | \n", "bikeid | \n", "usertype | \n", "birth year | \n", "gender | \n", "Days |
| type | \n", "int | \n", "time | \n", "time | \n", "int | \n", "enum | \n", "real | \n", "real | \n", "int | \n", "enum | \n", "real | \n", "real | \n", "int | \n", "enum | \n", "int | \n", "int | \n", "int |
| mins | \n", "60.0 | \n", "1.372662e+12 | \n", "1.372662242e+12 | \n", "72.0 | \n", "0.0 | \n", "40.680342423 | \n", "-74.01713445 | \n", "72.0 | \n", "0.0 | \n", "40.680342423 | \n", "-74.01713445 | \n", "14529.0 | \n", "0.0 | \n", "1899.0 | \n", "0.0 | \n", "15887.0 |
| maxs | \n", "6250750.0 | \n", "1.409554787e+12 | \n", "1.409563605e+12 | \n", "3002.0 | \n", "339.0 | \n", "40.771522 | \n", "-73.9500479759 | \n", "3002.0 | \n", "339.0 | \n", "40.771522 | \n", "-73.9500479759 | \n", "21689.0 | \n", "1.0 | \n", "1998.0 | \n", "2.0 | \n", "16314.0 |
| sigma | \n", "2985.10540532 | \n", "11806578171.7 | \n", "11806555707.8 | \n", "355.755989765 | \n", "103.210304227 | \n", "0.0197100508736 | \n", "0.0123453320185 | \n", "360.070380844 | \n", "103.205091206 | \n", "0.0197309578633 | \n", "0.0124311861598 | \n", "1938.80517884 | \n", "0.324807387506 | \n", "11.132784905 | \n", "0.563019777794 | \n", "136.647269305 |
| zero_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "56836 | \n", "0 | \n", "0 | \n", "0 | \n", "55167 | \n", "0 | \n", "0 | \n", "0 | \n", "1247534 | \n", "0 | \n", "1248517 | \n", "0 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "1247644 | \n", "0 | \n", "0 |
| Days | \n", "start station name | \n", "bikes |
| 16234 | \n", "Concord St & Bridge St | \n", "15 |
| 16106 | \n", "Cumberland St & Lafayette Ave | \n", "6 |
| 15978 | \n", "DeKalb Ave & Hudson Ave | \n", "36 |
| 16088 | \n", "Allen St & Hester St | \n", "55 |
| 15945 | \n", "Allen St & Rivington St | \n", "140 |
| 16251 | \n", "Clinton St & Grand St | \n", "79 |
| 16123 | \n", "Clinton St & Joralemon St | \n", "6 |
| 15995 | \n", "Clinton St & Tillary St | \n", "22 |
| 16313 | \n", "Greenwich St & N Moore St | \n", "74 |
| 16185 | \n", "Hancock St & Bedford Ave | \n", "14 |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C2 | \n", "2-Byte Integers | \n", "96 | \n", "100.0 | \n", "822.4 KB | \n", "100.0 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "822.4 KB | \n", "139261.0 | \n", "32.0 | \n", "96.0 |
| mean | \n", "822.4 KB | \n", "139261.0 | \n", "32.0 | \n", "96.0 |
| min | \n", "822.4 KB | \n", "139261.0 | \n", "32.0 | \n", "96.0 |
| max | \n", "822.4 KB | \n", "139261.0 | \n", "32.0 | \n", "96.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "822.4 KB | \n", "139261.0 | \n", "32.0 | \n", "96.0 |
| \n", " | Days | \n", "start station name | \n", "bikes |
| type | \n", "int | \n", "enum | \n", "int |
| mins | \n", "15887.0 | \n", "0.0 | \n", "1.0 |
| maxs | \n", "16314.0 | \n", "339.0 | \n", "680.0 |
| sigma | \n", "123.635133897 | \n", "98.50295732 | \n", "64.1243887565 |
| zero_count | \n", "0 | \n", "428 | \n", "0 |
| missing_count | \n", "0 | \n", "0 | \n", "0 |
| Probs | \n", "bikesQuantiles |
| 0.01 | \n", "2 |
| 0.1 | \n", "11 |
| 0.25 | \n", "26 |
| 0.333 | \n", "35 |
| 0.5 | \n", "58 |
| 0.667 | \n", "89 |
| 0.75 | \n", "107 |
| 0.9 | \n", "157 |
| 0.99 | \n", "291 |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "64 | \n", "40.0 | \n", "276.2 KB | \n", "25.145071 |
| C2 | \n", "2-Byte Integers | \n", "96 | \n", "60.000004 | \n", "822.4 KB | \n", "74.85493 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "1.1 MB | \n", "139261.0 | \n", "32.0 | \n", "160.0 |
| mean | \n", "1.1 MB | \n", "139261.0 | \n", "32.0 | \n", "160.0 |
| min | \n", "1.1 MB | \n", "139261.0 | \n", "32.0 | \n", "160.0 |
| max | \n", "1.1 MB | \n", "139261.0 | \n", "32.0 | \n", "160.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "1.1 MB | \n", "139261.0 | \n", "32.0 | \n", "160.0 |
| \n", " | Days | \n", "start station name | \n", "bikes | \n", "Month | \n", "DayOfWeek |
| type | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "enum |
| mins | \n", "15887.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 |
| maxs | \n", "16314.0 | \n", "339.0 | \n", "680.0 | \n", "11.0 | \n", "6.0 |
| sigma | \n", "123.635133897 | \n", "98.50295732 | \n", "64.1243887565 | \n", "3.20373100216 | \n", "2.00302100015 |
| zero_count | \n", "0 | \n", "428 | \n", "0 | \n", "9949 | \n", "19880 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 |
| Model | \n", "R2 TRAIN | \n", "R2 TEST | \n", "R2 HOLDOUT | \n", "Model Training Time (s) |
| GBM | \n", "0.967596306547 | \n", "0.921163552313 | \n", "0.924511283963 | \n", "21.313 |
| DRF | \n", "0.849449692569 | \n", "0.819592185529 | \n", "0.824331359117 | \n", "27.972 |
| GLM | \n", "0.784089976397 | \n", "0.784183689467 | \n", "0.788781938295 | \n", "0.446 |
| DL | \n", "0.90367986631 | \n", "0.88016358277 | \n", "0.882139604948 | \n", "54.633 |
| File1 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2013.csv |
| File2 | \n", "/Users/spencer/0xdata/h2o-dev/bigdata/laptop/citibike-nyc/31081_New_York_City__Hourly_2014.csv |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C0L | \n", "Constant Integers | \n", "107 | \n", "6.294118 | \n", "8.4 KB | \n", "0.7889721 |
| C0D | \n", "Constant Reals | \n", "436 | \n", "25.647058 | \n", "34.1 KB | \n", "3.2148771 |
| CXI | \n", "Sparse Integers | \n", "17 | \n", "1.0 | \n", "1.5 KB | \n", "0.13991351 |
| C1 | \n", "1-Byte Integers | \n", "346 | \n", "20.352942 | \n", "197.4 KB | \n", "18.634672 |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "214 | \n", "12.588236 | \n", "122.3 KB | \n", "11.544063 |
| C1S | \n", "1-Byte Fractions | \n", "214 | \n", "12.588236 | \n", "125.3 KB | \n", "11.822968 |
| C2S | \n", "2-Byte Fractions | \n", "196 | \n", "11.529412 | \n", "214.5 KB | \n", "20.242111 |
| C4S | \n", "4-Byte Fractions | \n", "170 | \n", "10.0 | \n", "356.1 KB | \n", "33.612423 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "1.0 MB | \n", "17520.0 | \n", "34.0 | \n", "1700.0 |
| mean | \n", "1.0 MB | \n", "17520.0 | \n", "34.0 | \n", "1700.0 |
| min | \n", "1.0 MB | \n", "17520.0 | \n", "34.0 | \n", "1700.0 |
| max | \n", "1.0 MB | \n", "17520.0 | \n", "34.0 | \n", "1700.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "1.0 MB | \n", "17520.0 | \n", "34.0 | \n", "1700.0 |
| \n", " | Year Local | \n", "Month Local | \n", "Day Local | \n", "Hour Local | \n", "Year UTC | \n", "Month UTC | \n", "Day UTC | \n", "Hour UTC | \n", "Cavok Reported | \n", "Cloud Ceiling (m) | \n", "Cloud Cover Fraction | \n", "Cloud Cover Fraction 1 | \n", "Cloud Cover Fraction 2 | \n", "Cloud Cover Fraction 3 | \n", "Cloud Cover Fraction 4 | \n", "Cloud Cover Fraction 5 | \n", "Cloud Cover Fraction 6 | \n", "Cloud Height (m) 1 | \n", "Cloud Height (m) 2 | \n", "Cloud Height (m) 3 | \n", "Cloud Height (m) 4 | \n", "Cloud Height (m) 5 | \n", "Cloud Height (m) 6 | \n", "Dew Point (C) | \n", "Humidity Fraction | \n", "Precipitation One Hour (mm) | \n", "Pressure Altimeter (mbar) | \n", "Pressure Sea Level (mbar) | \n", "Pressure Station (mbar) | \n", "Snow Depth (cm) | \n", "Temperature (C) | \n", "Visibility (km) | \n", "Weather Code 1 | \n", "Weather Code 1/ Description | \n", "Weather Code 2 | \n", "Weather Code 2/ Description | \n", "Weather Code 3 | \n", "Weather Code 3/ Description | \n", "Weather Code 4 | \n", "Weather Code 4/ Description | \n", "Weather Code 5 | \n", "Weather Code 5/ Description | \n", "Weather Code 6 | \n", "Weather Code 6/ Description | \n", "Weather Code Most Severe / Icon Code | \n", "Weather Code Most Severe | \n", "Weather Code Most Severe / Description | \n", "Wind Direction (degrees) | \n", "Wind Gust (m/s) | \n", "Wind Speed (m/s) |
| type | \n", "int | \n", "int | \n", "int | \n", "int | \n", "int | \n", "int | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "real | \n", "real | \n", "real | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "real | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "real | \n", "real | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "int | \n", "int | \n", "enum | \n", "int | \n", "real | \n", "real |
| mins | \n", "2013.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "2013.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "61.0 | \n", "0.0 | \n", "0.0 | \n", "0.25 | \n", "0.5 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "60.96 | \n", "213.36 | \n", "365.76 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-26.7 | \n", "0.1251 | \n", "0.0 | \n", "983.2949 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "-15.6 | \n", "0.001 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "3.0 | \n", "0.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "10.0 | \n", "7.2 | \n", "0.0 |
| maxs | \n", "2014.0 | \n", "12.0 | \n", "31.0 | \n", "23.0 | \n", "2015.0 | \n", "12.0 | \n", "31.0 | \n", "23.0 | \n", "0.0 | \n", "3657.6 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "3657.5999 | \n", "3657.5999 | \n", "3657.5999 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "24.4 | \n", "1.0 | \n", "26.924 | \n", "1042.2113 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "36.1 | \n", "16.0934 | \n", "60.0 | \n", "11.0 | \n", "60.0 | \n", "10.0 | \n", "36.0 | \n", "7.0 | \n", "27.0 | \n", "4.0 | \n", "27.0 | \n", "2.0 | \n", "3.0 | \n", "0.0 | \n", "16.0 | \n", "60.0 | \n", "11.0 | \n", "360.0 | \n", "20.58 | \n", "10.8 |
| sigma | \n", "0.500014270017 | \n", "3.44794972385 | \n", "8.79649804852 | \n", "6.92238411188 | \n", "0.500584411716 | \n", "3.44782405458 | \n", "8.79561488868 | \n", "6.92230165203 | \n", "0.0 | \n", "995.339856966 | \n", "0.462720830993 | \n", "0.42770569708 | \n", "0.197155690367 | \n", "0.0861015598104 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "962.743095854 | \n", "916.73861349 | \n", "887.215847511 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "10.9731282097 | \n", "0.185792011866 | \n", "2.56215129179 | \n", "7.46451697179 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "10.0396739531 | \n", "3.69893623033 | \n", "5.70486576983 | \n", "2.47814708663 | \n", "6.13386253912 | \n", "1.83976235335 | \n", "5.80553286364 | \n", "1.28967553698 | \n", "3.12340844261 | \n", "1.282164369 | \n", "6.15223536611 | \n", "0.60207972894 | \n", "0.0 | \n", "0.0 | \n", "4.07386062702 | \n", "5.70486576983 | \n", "2.47814708663 | \n", "106.350000031 | \n", "1.81511871115 | \n", "1.61469790524 |
| zero_count | \n", "0 | \n", "0 | \n", "0 | \n", "730 | \n", "0 | \n", "0 | \n", "0 | \n", "730 | \n", "17455 | \n", "0 | \n", "8758 | \n", "8758 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "268 | \n", "0 | \n", "501 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "269 | \n", "0 | \n", "0 | \n", "17 | \n", "0 | \n", "30 | \n", "0 | \n", "13 | \n", "0 | \n", "20 | \n", "0 | \n", "12 | \n", "0 | \n", "2 | \n", "14980 | \n", "0 | \n", "17 | \n", "0 | \n", "0 | \n", "2768 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "65 | \n", "10780 | \n", "375 | \n", "375 | \n", "14682 | \n", "16535 | \n", "17520 | \n", "17520 | \n", "17520 | \n", "9103 | \n", "14683 | \n", "16535 | \n", "17520 | \n", "17520 | \n", "17520 | \n", "67 | \n", "67 | \n", "15660 | \n", "360 | \n", "17520 | \n", "17520 | \n", "17520 | \n", "67 | \n", "412 | \n", "14980 | \n", "14980 | \n", "16477 | \n", "16477 | \n", "17181 | \n", "17181 | \n", "17433 | \n", "17433 | \n", "17504 | \n", "17504 | \n", "17518 | \n", "17518 | \n", "0 | \n", "14980 | \n", "14980 | \n", "9382 | \n", "14381 | \n", "1283 |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C0L | \n", "Constant Integers | \n", "46 | \n", "15.0326805 | \n", "3.6 KB | \n", "1.780005 |
| C1 | \n", "1-Byte Integers | \n", "34 | \n", "11.111112 | \n", "19.4 KB | \n", "9.592678 |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "90 | \n", "29.411766 | \n", "51.5 KB | \n", "25.494701 |
| C1S | \n", "1-Byte Fractions | \n", "42 | \n", "13.725491 | \n", "24.0 KB | \n", "11.894592 |
| C2S | \n", "2-Byte Fractions | \n", "94 | \n", "30.718956 | \n", "103.4 KB | \n", "51.238026 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "201.9 KB | \n", "17520.0 | \n", "34.0 | \n", "306.0 |
| mean | \n", "201.9 KB | \n", "17520.0 | \n", "34.0 | \n", "306.0 |
| min | \n", "201.9 KB | \n", "17520.0 | \n", "34.0 | \n", "306.0 |
| max | \n", "201.9 KB | \n", "17520.0 | \n", "34.0 | \n", "306.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "201.9 KB | \n", "17520.0 | \n", "34.0 | \n", "306.0 |
| \n", " | Year Local | \n", "Month Local | \n", "Day Local | \n", "Hour Local | \n", "Dew Point (C) | \n", "Humidity Fraction | \n", "Rain (mm) | \n", "Temperature (C) | \n", "WC1 |
| type | \n", "int | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "real | \n", "real | \n", "enum |
| mins | \n", "2013.0 | \n", "1.0 | \n", "1.0 | \n", "0.0 | \n", "-26.7 | \n", "0.1251 | \n", "0.0 | \n", "-15.6 | \n", "0.0 |
| maxs | \n", "2014.0 | \n", "12.0 | \n", "31.0 | \n", "23.0 | \n", "24.4 | \n", "1.0 | \n", "26.924 | \n", "36.1 | \n", "11.0 |
| sigma | \n", "0.500014270017 | \n", "3.44794972385 | \n", "8.79649804852 | \n", "6.92238411188 | \n", "10.9731282097 | \n", "0.185792011866 | \n", "2.56215129179 | \n", "10.0396739531 | \n", "2.47814708663 |
| zero_count | \n", "0 | \n", "0 | \n", "0 | \n", "730 | \n", "268 | \n", "0 | \n", "501 | \n", "269 | \n", "17 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "67 | \n", "67 | \n", "15660 | \n", "67 | \n", "14980 |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C0L | \n", "Constant Integers | \n", "80 | \n", "21.390373 | \n", "6.3 KB | \n", "11.955688 |
| C0D | \n", "Constant Reals | \n", "13 | \n", "3.4759357 | \n", "1.0 KB | \n", "1.9427994 |
| C1 | \n", "1-Byte Integers | \n", "30 | \n", "8.021391 | \n", "2.6 KB | \n", "5.0176535 |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "56 | \n", "14.973262 | \n", "4.9 KB | \n", "9.375875 |
| C1S | \n", "1-Byte Fractions | \n", "34 | \n", "9.090909 | \n", "3.5 KB | \n", "6.698922 |
| C2S | \n", "2-Byte Fractions | \n", "34 | \n", "9.090909 | \n", "4.2 KB | \n", "8.062618 |
| C8D | \n", "64-bit Reals | \n", "127 | \n", "33.95722 | \n", "29.8 KB | \n", "56.946445 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "52.3 KB | \n", "730.0 | \n", "34.0 | \n", "374.0 |
| mean | \n", "52.3 KB | \n", "730.0 | \n", "34.0 | \n", "374.0 |
| min | \n", "52.3 KB | \n", "730.0 | \n", "34.0 | \n", "374.0 |
| max | \n", "52.3 KB | \n", "730.0 | \n", "34.0 | \n", "374.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "52.3 KB | \n", "730.0 | \n", "34.0 | \n", "374.0 |
| \n", " | Year Local | \n", "Month Local | \n", "Day Local | \n", "Hour Local | \n", "Dew Point (C) | \n", "Humidity Fraction | \n", "Rain (mm) | \n", "Temperature (C) | \n", "WC1 | \n", "msec | \n", "Days |
| type | \n", "int | \n", "int | \n", "int | \n", "int | \n", "real | \n", "real | \n", "real | \n", "real | \n", "enum | \n", "int | \n", "int |
| mins | \n", "2013.0 | \n", "1.0 | \n", "1.0 | \n", "12.0 | \n", "-26.7 | \n", "0.1723 | \n", "0.0 | \n", "-13.9 | \n", "0.0 | \n", "1.3570704e+12 | \n", "15706.0 |
| maxs | \n", "2014.0 | \n", "12.0 | \n", "31.0 | \n", "12.0 | \n", "23.3 | \n", "1.0 | \n", "12.446 | \n", "34.4 | \n", "10.0 | \n", "1.420056e+12 | \n", "16435.0 |
| sigma | \n", "0.500342818004 | \n", "3.45021529307 | \n", "8.80227802701 | \n", "0.0 | \n", "11.1062964725 | \n", "0.179945027923 | \n", "2.36064248615 | \n", "10.3989855149 | \n", "2.74674726123 | \n", "18219740080.4 | \n", "210.877136425 |
| zero_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "14 | \n", "0 | \n", "15 | \n", "7 | \n", "1 | \n", "0 | \n", "0 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "3 | \n", "3 | \n", "660 | \n", "3 | \n", "620 | \n", "0 | \n", "0 |
| chunk_type | \n", "chunk_name | \n", "count | \n", "count_percentage | \n", "size | \n", "size_percentage |
| C1 | \n", "1-Byte Integers | \n", "32 | \n", "10.0 | \n", "138.1 KB | \n", "2.4677303 |
| C1N | \n", "1-Byte Integers (w/o NAs) | \n", "64 | \n", "20.0 | \n", "276.2 KB | \n", "4.9354606 |
| C2 | \n", "2-Byte Integers | \n", "96 | \n", "30.000002 | \n", "822.4 KB | \n", "14.692484 |
| C8D | \n", "64-bit Reals | \n", "128 | \n", "40.0 | \n", "4.3 MB | \n", "77.90433 |
| \n", " | size | \n", "number_of_rows | \n", "number_of_chunks_per_column | \n", "number_of_chunks |
| 172.16.2.37:54321 | \n", "5.5 MB | \n", "139261.0 | \n", "32.0 | \n", "320.0 |
| mean | \n", "5.5 MB | \n", "139261.0 | \n", "32.0 | \n", "320.0 |
| min | \n", "5.5 MB | \n", "139261.0 | \n", "32.0 | \n", "320.0 |
| max | \n", "5.5 MB | \n", "139261.0 | \n", "32.0 | \n", "320.0 |
| stddev | \n", "0 B | \n", "0.0 | \n", "0.0 | \n", "0.0 |
| total | \n", "5.5 MB | \n", "139261.0 | \n", "32.0 | \n", "320.0 |
| \n", " | Days | \n", "start station name | \n", "bikes | \n", "Month | \n", "DayOfWeek | \n", "Humidity Fraction | \n", "Rain (mm) | \n", "Temperature (C) | \n", "WC1 | \n", "Dew Point (C) |
| type | \n", "int | \n", "enum | \n", "int | \n", "enum | \n", "enum | \n", "real | \n", "real | \n", "real | \n", "enum | \n", "real |
| mins | \n", "15887.0 | \n", "0.0 | \n", "1.0 | \n", "0.0 | \n", "0.0 | \n", "0.1723 | \n", "0.0 | \n", "-13.9 | \n", "0.0 | \n", "-26.7 |
| maxs | \n", "16314.0 | \n", "339.0 | \n", "680.0 | \n", "11.0 | \n", "6.0 | \n", "1.0 | \n", "8.382 | \n", "34.4 | \n", "10.0 | \n", "23.3 |
| sigma | \n", "123.635133897 | \n", "98.50295732 | \n", "64.1243887565 | \n", "3.20373100216 | \n", "2.00302100015 | \n", "0.178408938664 | \n", "1.76771960813 | \n", "10.9454511961 | \n", "2.962709609 | \n", "11.7308194576 |
| zero_count | \n", "0 | \n", "428 | \n", "0 | \n", "9949 | \n", "19880 | \n", "0 | \n", "2909 | \n", "1598 | \n", "324 | \n", "1954 |
| missing_count | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "0 | \n", "981 | \n", "128246 | \n", "981 | \n", "119130 | \n", "981 |
| Days | \n", "start station name | \n", "bikes | \n", "Month | \n", "DayOfWeek | \n", "Humidity Fraction | \n", "Rain (mm) | \n", "Temperature (C) | \n", "WC1 | \n", "Dew Point (C) |
| 16234 | \n", "Concord St & Bridge St | \n", "15 | \n", "6 | \n", "Thu | \n", "0.934 | \n", "0.508 | \n", "20.0 | \n", "mist | \n", "18.900000000000002 |
| 16106 | \n", "Cumberland St & Lafayette Ave | \n", "6 | \n", "2 | \n", "Tue | \n", "0.9228000000000001 | \n", "\n", " | 0.0 | \n", "mist | \n", "-1.1 |
| 15978 | \n", "DeKalb Ave & Hudson Ave | \n", "36 | \n", "9 | \n", "Sun | \n", "0.46880000000000005 | \n", "\n", " | 20.0 | \n", "\n", " | 8.3 |
| 16088 | \n", "Allen St & Hester St | \n", "55 | \n", "1 | \n", "Fri | \n", "1.0 | \n", "\n", " | 1.0 | \n", "\n", " | 1.0 |
| 15945 | \n", "Allen St & Rivington St | \n", "140 | \n", "8 | \n", "Tue | \n", "0.5681 | \n", "0.0 | \n", "28.3 | \n", "light rain | \n", "18.900000000000002 |
| 16251 | \n", "Clinton St & Grand St | \n", "79 | \n", "6 | \n", "Sun | \n", "0.5275000000000001 | \n", "\n", " | 27.200000000000003 | \n", "\n", " | 16.7 |
| 16123 | \n", "Clinton St & Joralemon St | \n", "6 | \n", "2 | \n", "Fri | \n", "0.3141 | \n", "\n", " | 9.4 | \n", "\n", " | -6.7 |
| 15995 | \n", "Clinton St & Tillary St | \n", "22 | \n", "10 | \n", "Wed | \n", "0.6765 | \n", "\n", " | 20.6 | \n", "\n", " | 14.4 |
| 16313 | \n", "Greenwich St & N Moore St | \n", "74 | \n", "8 | \n", "Sat | \n", "0.6287 | \n", "\n", " | 28.900000000000002 | \n", "\n", " | 21.1 |
| 16185 | \n", "Hancock St & Bedford Ave | \n", "14 | \n", "4 | \n", "Thu | \n", "0.2092 | \n", "\n", " | 15.0 | \n", "\n", " | -7.2 |
| Model | \n", "R2 TRAIN | \n", "R2 TEST | \n", "R2 HOLDOUT | \n", "Model Training Time (s) |
| GBM | \n", "0.966646796949 | \n", "0.925059327615 | \n", "0.924489377492 | \n", "28.364 |
| DRF | \n", "0.896356580051 | \n", "0.846221607246 | \n", "0.849226966424 | \n", "146.965 |
| GLM | \n", "0.90033401895 | \n", "0.874089944288 | \n", "0.888666983774 | \n", "0.245 |
| DL | \n", "0.949803523619 | \n", "0.921881717344 | \n", "0.923630675348 | \n", "60.212 |