{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import h2o"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime: </td>\n",
       "<td>17 seconds 548 milliseconds </td></tr>\n",
       "<tr><td>H2O cluster version: </td>\n",
       "<td>3.1.0.99999</td></tr>\n",
       "<tr><td>H2O cluster name: </td>\n",
       "<td>anqi_fu</td></tr>\n",
       "<tr><td>H2O cluster total nodes: </td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>H2O cluster total memory: </td>\n",
       "<td>1.78 GB</td></tr>\n",
       "<tr><td>H2O cluster total cores: </td>\n",
       "<td>8</td></tr>\n",
       "<tr><td>H2O cluster allowed cores: </td>\n",
       "<td>8</td></tr>\n",
       "<tr><td>H2O cluster healthy: </td>\n",
       "<td>True</td></tr>\n",
       "<tr><td>H2O Connection ip: </td>\n",
       "<td>127.0.0.1</td></tr>\n",
       "<tr><td>H2O Connection port: </td>\n",
       "<td>54321</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  ---------------------------\n",
       "H2O cluster uptime:         17 seconds 548 milliseconds\n",
       "H2O cluster version:        3.1.0.99999\n",
       "H2O cluster name:           anqi_fu\n",
       "H2O cluster total nodes:    1\n",
       "H2O cluster total memory:   1.78 GB\n",
       "H2O cluster total cores:    8\n",
       "H2O cluster allowed cores:  8\n",
       "H2O cluster healthy:        True\n",
       "H2O Connection ip:          127.0.0.1\n",
       "H2O Connection port:        54321\n",
       "--------------------------  ---------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Connect to a cluster\n",
    "h2o.init()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Import and Parse weather data\n",
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoAllWeather.csv . Parsed 5,162 rows and 7 cols\n",
      "Rows: 5,162 Cols: 7\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C1N</td>\n",
       "<td>1-Byte Integers (w/o NAs)</td>\n",
       "<td>2</td>\n",
       "<td>28.57143</td>\n",
       "<td>   10.2 KB</td>\n",
       "<td>11.221008</td></tr>\n",
       "<tr><td>C1S</td>\n",
       "<td>1-Byte Fractions</td>\n",
       "<td>4</td>\n",
       "<td>57.14286</td>\n",
       "<td>   20.5 KB</td>\n",
       "<td>22.510675</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>1</td>\n",
       "<td>14.285715</td>\n",
       "<td>   60.3 KB</td>\n",
       "<td>66.26832</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name                 count    count_percentage    size     size_percentage\n",
       "------------  -------------------------  -------  ------------------  -------  -----------------\n",
       "C1N           1-Byte Integers (w/o NAs)  2        28.5714             10.2 KB  11.221\n",
       "C1S           1-Byte Fractions           4        57.1429             20.5 KB  22.5107\n",
       "CStr          String                     1        14.2857             60.3 KB  66.2683"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>   91.0 KB</td>\n",
       "<td>5162.0</td>\n",
       "<td>1.0</td>\n",
       "<td>7.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>   91.0 KB</td>\n",
       "<td>5162.0</td>\n",
       "<td>1.0</td>\n",
       "<td>7.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>   91.0 KB</td>\n",
       "<td>5162.0</td>\n",
       "<td>1.0</td>\n",
       "<td>7.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>   91.0 KB</td>\n",
       "<td>5162.0</td>\n",
       "<td>1.0</td>\n",
       "<td>7.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>   91.0 KB</td>\n",
       "<td>5162.0</td>\n",
       "<td>1.0</td>\n",
       "<td>7.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size     number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  -------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  91.0 KB  5162              1                              7\n",
       "mean               91.0 KB  5162              1                              7\n",
       "min                91.0 KB  5162              1                              7\n",
       "max                91.0 KB  5162              1                              7\n",
       "stddev             0  B     0                 0                              0\n",
       "total              91.0 KB  5162              1                              7"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>date</td>\n",
       "<td>month</td>\n",
       "<td>day</td>\n",
       "<td>year</td>\n",
       "<td>maxTemp</td>\n",
       "<td>meanTemp</td>\n",
       "<td>minTemp</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>string</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>NaN</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2001.0</td>\n",
       "<td>-2.0</td>\n",
       "<td>-9.0</td>\n",
       "<td>-18.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>NaN</td>\n",
       "<td>12.0</td>\n",
       "<td>31.0</td>\n",
       "<td>2015.0</td>\n",
       "<td>103.0</td>\n",
       "<td>93.0</td>\n",
       "<td>82.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>NaN</td>\n",
       "<td>3.46905171694</td>\n",
       "<td>8.79895173997</td>\n",
       "<td>4.0773409057</td>\n",
       "<td>21.4829777237</td>\n",
       "<td>19.9302399266</td>\n",
       "<td>19.0207297123</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>2</td>\n",
       "<td>16</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>13</td>\n",
       "<td>13</td>\n",
       "<td>13</td></tr></table></div>"
      ],
      "text/plain": [
       "               date    month          day            year          maxTemp        meanTemp       minTemp\n",
       "-------------  ------  -------------  -------------  ------------  -------------  -------------  -------------\n",
       "type           string  int            int            int           int            int            int\n",
       "mins           NaN     1.0            1.0            2001.0        -2.0           -9.0           -18.0\n",
       "maxs           NaN     12.0           31.0           2015.0        103.0          93.0           82.0\n",
       "sigma          NaN     3.46905171694  8.79895173997  4.0773409057  21.4829777237  19.9302399266  19.0207297123\n",
       "zero_count     0       0              0              0             0              2              16\n",
       "missing_count  0       0              0              0             13             13             13"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Import and Parse census data\n",
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCensus.csv . Parsed 79 rows and 9 cols\n",
      "Rows: 79 Cols: 9\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C1</td>\n",
       "<td>1-Byte Integers</td>\n",
       "<td>2</td>\n",
       "<td>22.222223</td>\n",
       "<td>    294  B</td>\n",
       "<td>9.312638</td></tr>\n",
       "<tr><td>C1S</td>\n",
       "<td>1-Byte Fractions</td>\n",
       "<td>1</td>\n",
       "<td>11.111112</td>\n",
       "<td>    163  B</td>\n",
       "<td>5.1631293</td></tr>\n",
       "<tr><td>C2S</td>\n",
       "<td>2-Byte Fractions</td>\n",
       "<td>4</td>\n",
       "<td>44.444447</td>\n",
       "<td>    968  B</td>\n",
       "<td>30.662022</td></tr>\n",
       "<tr><td>C4</td>\n",
       "<td>4-Byte Integers</td>\n",
       "<td>1</td>\n",
       "<td>11.111112</td>\n",
       "<td>    384  B</td>\n",
       "<td>12.163446</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>1</td>\n",
       "<td>11.111112</td>\n",
       "<td>    1.3 KB</td>\n",
       "<td>42.698765</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name        count    count_percentage    size    size_percentage\n",
       "------------  ----------------  -------  ------------------  ------  -----------------\n",
       "C1            1-Byte Integers   2        22.2222             294  B  9.31264\n",
       "C1S           1-Byte Fractions  1        11.1111             163  B  5.16313\n",
       "C2S           2-Byte Fractions  4        44.4444             968  B  30.662\n",
       "C4            4-Byte Integers   1        11.1111             384  B  12.1634\n",
       "CStr          String            1        11.1111             1.3 KB  42.6988"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>    3.1 KB</td>\n",
       "<td>79.0</td>\n",
       "<td>1.0</td>\n",
       "<td>9.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>    3.1 KB</td>\n",
       "<td>79.0</td>\n",
       "<td>1.0</td>\n",
       "<td>9.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>    3.1 KB</td>\n",
       "<td>79.0</td>\n",
       "<td>1.0</td>\n",
       "<td>9.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>    3.1 KB</td>\n",
       "<td>79.0</td>\n",
       "<td>1.0</td>\n",
       "<td>9.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>    3.1 KB</td>\n",
       "<td>79.0</td>\n",
       "<td>1.0</td>\n",
       "<td>9.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size    number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  ------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  3.1 KB  79                1                              9\n",
       "mean               3.1 KB  79                1                              9\n",
       "min                3.1 KB  79                1                              9\n",
       "max                3.1 KB  79                1                              9\n",
       "stddev             0  B    0                 0                              0\n",
       "total              3.1 KB  79                1                              9"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>Community Area Number</td>\n",
       "<td>COMMUNITY AREA NAME</td>\n",
       "<td>PERCENT OF HOUSING CROWDED</td>\n",
       "<td>PERCENT HOUSEHOLDS BELOW POVERTY</td>\n",
       "<td>PERCENT AGED 16  UNEMPLOYED</td>\n",
       "<td>PERCENT AGED 25  WITHOUT HIGH SCHOOL DIPLOMA</td>\n",
       "<td>PERCENT AGED UNDER 18 OR OVER 64</td>\n",
       "<td>PER CAPITA INCOME </td>\n",
       "<td>HARDSHIP INDEX</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>int</td>\n",
       "<td>string</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>int</td>\n",
       "<td>int</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>1.0</td>\n",
       "<td>NaN</td>\n",
       "<td>0.3</td>\n",
       "<td>3.3</td>\n",
       "<td>4.7</td>\n",
       "<td>2.5</td>\n",
       "<td>13.5</td>\n",
       "<td>8201.0</td>\n",
       "<td>1.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>77.0</td>\n",
       "<td>NaN</td>\n",
       "<td>15.8</td>\n",
       "<td>56.5</td>\n",
       "<td>35.9</td>\n",
       "<td>54.8</td>\n",
       "<td>51.5</td>\n",
       "<td>88669.0</td>\n",
       "<td>98.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>22.3718573212</td>\n",
       "<td>NaN</td>\n",
       "<td>3.65898144135</td>\n",
       "<td>11.457230913</td>\n",
       "<td>7.49949670861</td>\n",
       "<td>11.7465143511</td>\n",
       "<td>7.28442108494</td>\n",
       "<td>15196.4055413</td>\n",
       "<td>28.6905556516</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>2</td>\n",
       "<td>0</td>\n",
       "<td>1</td>\n",
       "<td>1</td>\n",
       "<td>1</td>\n",
       "<td>1</td>\n",
       "<td>1</td>\n",
       "<td>1</td>\n",
       "<td>2</td></tr></table></div>"
      ],
      "text/plain": [
       "               Community Area Number    COMMUNITY AREA NAME    PERCENT OF HOUSING CROWDED    PERCENT HOUSEHOLDS BELOW POVERTY    PERCENT AGED 16  UNEMPLOYED    PERCENT AGED 25  WITHOUT HIGH SCHOOL DIPLOMA    PERCENT AGED UNDER 18 OR OVER 64    PER CAPITA INCOME     HARDSHIP INDEX\n",
       "-------------  -----------------------  ---------------------  ----------------------------  ----------------------------------  -----------------------------  ----------------------------------------------  ----------------------------------  --------------------  ----------------\n",
       "type           int                      string                 real                          real                                real                           real                                            real                                int                   int\n",
       "mins           1.0                      NaN                    0.3                           3.3                                 4.7                            2.5                                             13.5                                8201.0                1.0\n",
       "maxs           77.0                     NaN                    15.8                          56.5                                35.9                           54.8                                            51.5                                88669.0               98.0\n",
       "sigma          22.3718573212            NaN                    3.65898144135                 11.457230913                        7.49949670861                  11.7465143511                                   7.28442108494                       15196.4055413         28.6905556516\n",
       "zero_count     0                        0                      0                             0                                   0                              0                                               0                                   0                     0\n",
       "missing_count  2                        0                      1                             1                                   1                              1                                               1                                   1                     2"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Import and Parse crimes data\n",
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "Imported  /Users/anqi_fu/Documents/workspace/h2o-3/smalldata/chicago/chicagoCrimes10k.csv.zip . Parsed 9,999 rows and 22 cols\n",
      "Rows: 9,999 Cols: 22\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C0L</td>\n",
       "<td>Constant Integers</td>\n",
       "<td>4</td>\n",
       "<td>4.5454545</td>\n",
       "<td>    320  B</td>\n",
       "<td>0.03695244</td></tr>\n",
       "<tr><td>C1</td>\n",
       "<td>1-Byte Integers</td>\n",
       "<td>32</td>\n",
       "<td>36.363636</td>\n",
       "<td>   80.2 KB</td>\n",
       "<td>9.488462</td></tr>\n",
       "<tr><td>C1N</td>\n",
       "<td>1-Byte Integers (w/o NAs)</td>\n",
       "<td>8</td>\n",
       "<td>9.090909</td>\n",
       "<td>   20.1 KB</td>\n",
       "<td>2.3721156</td></tr>\n",
       "<tr><td>C2</td>\n",
       "<td>2-Byte Integers</td>\n",
       "<td>16</td>\n",
       "<td>18.181818</td>\n",
       "<td>   79.2 KB</td>\n",
       "<td>9.362824</td></tr>\n",
       "<tr><td>C4</td>\n",
       "<td>4-Byte Integers</td>\n",
       "<td>12</td>\n",
       "<td>13.636364</td>\n",
       "<td>  118.0 KB</td>\n",
       "<td>13.950008</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>8</td>\n",
       "<td>9.090909</td>\n",
       "<td>  391.1 KB</td>\n",
       "<td>46.252445</td></tr>\n",
       "<tr><td>C8D</td>\n",
       "<td>64-bit Reals</td>\n",
       "<td>8</td>\n",
       "<td>9.090909</td>\n",
       "<td>  156.8 KB</td>\n",
       "<td>18.537191</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name                 count    count_percentage    size      size_percentage\n",
       "------------  -------------------------  -------  ------------------  --------  -----------------\n",
       "C0L           Constant Integers          4        4.54545             320  B    0.0369524\n",
       "C1            1-Byte Integers            32       36.3636             80.2 KB   9.48846\n",
       "C1N           1-Byte Integers (w/o NAs)  8        9.09091             20.1 KB   2.37212\n",
       "C2            2-Byte Integers            16       18.1818             79.2 KB   9.36282\n",
       "C4            4-Byte Integers            12       13.6364             118.0 KB  13.95\n",
       "CStr          String                     8        9.09091             391.1 KB  46.2524\n",
       "C8D           64-bit Reals               8        9.09091             156.8 KB  18.5372"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>  845.7 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>88.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>  845.7 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>88.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>  845.7 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>88.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>  845.7 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>88.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>  845.7 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>88.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size      number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  --------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  845.7 KB  9999              4                              88\n",
       "mean               845.7 KB  9999              4                              88\n",
       "min                845.7 KB  9999              4                              88\n",
       "max                845.7 KB  9999              4                              88\n",
       "stddev             0  B      0                 0                              0\n",
       "total              845.7 KB  9999              4                              88"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>ID</td>\n",
       "<td>Case Number</td>\n",
       "<td>Date</td>\n",
       "<td>Block</td>\n",
       "<td>IUCR</td>\n",
       "<td>Primary Type</td>\n",
       "<td>Description</td>\n",
       "<td>Location Description</td>\n",
       "<td>Arrest</td>\n",
       "<td>Domestic</td>\n",
       "<td>Beat</td>\n",
       "<td>District</td>\n",
       "<td>Ward</td>\n",
       "<td>Community Area</td>\n",
       "<td>FBI Code</td>\n",
       "<td>X Coordinate</td>\n",
       "<td>Y Coordinate</td>\n",
       "<td>Year</td>\n",
       "<td>Updated On</td>\n",
       "<td>Latitude</td>\n",
       "<td>Longitude</td>\n",
       "<td>Location</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>int</td>\n",
       "<td>string</td>\n",
       "<td>string</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>enum</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>21735.0</td>\n",
       "<td>NaN</td>\n",
       "<td>NaN</td>\n",
       "<td>0.0</td>\n",
       "<td>110.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>111.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2.0</td>\n",
       "<td>1100317.0</td>\n",
       "<td>1814255.0</td>\n",
       "<td>2015.0</td>\n",
       "<td>0.0</td>\n",
       "<td>41.64507243</td>\n",
       "<td>-87.906463888</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>9962898.0</td>\n",
       "<td>NaN</td>\n",
       "<td>NaN</td>\n",
       "<td>6517.0</td>\n",
       "<td>5131.0</td>\n",
       "<td>26.0</td>\n",
       "<td>198.0</td>\n",
       "<td>90.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2535.0</td>\n",
       "<td>25.0</td>\n",
       "<td>50.0</td>\n",
       "<td>77.0</td>\n",
       "<td>26.0</td>\n",
       "<td>1205069.0</td>\n",
       "<td>1951533.0</td>\n",
       "<td>2015.0</td>\n",
       "<td>32.0</td>\n",
       "<td>42.022646183</td>\n",
       "<td>-87.524773286</td>\n",
       "<td>8603.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>396787.564221</td>\n",
       "<td>NaN</td>\n",
       "<td>NaN</td>\n",
       "<td>1915.88517194</td>\n",
       "<td>927.751435583</td>\n",
       "<td>9.16241735944</td>\n",
       "<td>60.1059382029</td>\n",
       "<td>25.5963972463</td>\n",
       "<td>0.455083515588</td>\n",
       "<td>0.35934414686</td>\n",
       "<td>695.76029875</td>\n",
       "<td>6.94547493301</td>\n",
       "<td>13.6495661144</td>\n",
       "<td>21.2748762223</td>\n",
       "<td>7.57423857911</td>\n",
       "<td>16496.4493681</td>\n",
       "<td>31274.0163199</td>\n",
       "<td>0.0</td>\n",
       "<td>10.0824464345</td>\n",
       "<td>0.0860186579359</td>\n",
       "<td>0.0600357970653</td>\n",
       "<td>2469.64729385</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>3</td>\n",
       "<td>0</td>\n",
       "<td>11</td>\n",
       "<td>933</td>\n",
       "<td>19</td>\n",
       "<td>7071</td>\n",
       "<td>8476</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>603</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>1</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>419</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>6</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>2557</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>162</td></tr></table></div>"
      ],
      "text/plain": [
       "               ID             Case Number    Date    Block          IUCR           Primary Type    Description    Location Description    Arrest          Domestic       Beat          District       Ward           Community Area    FBI Code       X Coordinate    Y Coordinate    Year    Updated On     Latitude         Longitude        Location\n",
       "-------------  -------------  -------------  ------  -------------  -------------  --------------  -------------  ----------------------  --------------  -------------  ------------  -------------  -------------  ----------------  -------------  --------------  --------------  ------  -------------  ---------------  ---------------  -------------\n",
       "type           int            string         string  enum           int            enum            enum           enum                    enum            enum           int           int            int            int               int            int             int             int     enum           real             real             enum\n",
       "mins           21735.0        NaN            NaN     0.0            110.0          0.0             0.0            0.0                     0.0             0.0            111.0         1.0            1.0            1.0               2.0            1100317.0       1814255.0       2015.0  0.0            41.64507243      -87.906463888    0.0\n",
       "maxs           9962898.0      NaN            NaN     6517.0         5131.0         26.0            198.0          90.0                    1.0             1.0            2535.0        25.0           50.0           77.0              26.0           1205069.0       1951533.0       2015.0  32.0           42.022646183     -87.524773286    8603.0\n",
       "sigma          396787.564221  NaN            NaN     1915.88517194  927.751435583  9.16241735944   60.1059382029  25.5963972463           0.455083515588  0.35934414686  695.76029875  6.94547493301  13.6495661144  21.2748762223     7.57423857911  16496.4493681   31274.0163199   0.0     10.0824464345  0.0860186579359  0.0600357970653  2469.64729385\n",
       "zero_count     0              0              0       3              0              11              933            19                      7071            8476           0             0              0              0                 0              0               0               0       603            0                0                1\n",
       "missing_count  0              0              0       0              419            0               0              6                       0               0              0             162            0              0                 2557           162             162             0       0              162              162              162"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "weather_path = h2o.locate(\"smalldata/chicago/chicagoAllWeather.csv\")\n",
    "census_path = h2o.locate(\"smalldata/chicago/chicagoCensus.csv\")\n",
    "crimes_path = h2o.locate(\"smalldata/chicago/chicagoCrimes10k.csv.zip\")\n",
    "\n",
    "print \"Import and Parse weather data\"\n",
    "weather = h2o.import_frame(path=weather_path)\n",
    "weather.drop(\"date\")\n",
    "weather.describe()\n",
    "\n",
    "print \"Import and Parse census data\"\n",
    "census = h2o.import_frame(path=census_path)\n",
    "census.describe()\n",
    "\n",
    "print \"Import and Parse crimes data\"\n",
    "crimes = h2o.import_frame(path=crimes_path)\n",
    "crimes.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rows: 9,999 Cols: 27\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C0L</td>\n",
       "<td>Constant Integers</td>\n",
       "<td>9</td>\n",
       "<td>8.333334</td>\n",
       "<td>    720  B</td>\n",
       "<td>0.10067465</td></tr>\n",
       "<tr><td>C1</td>\n",
       "<td>1-Byte Integers</td>\n",
       "<td>32</td>\n",
       "<td>29.62963</td>\n",
       "<td>   80.2 KB</td>\n",
       "<td>11.489216</td></tr>\n",
       "<tr><td>C1N</td>\n",
       "<td>1-Byte Integers (w/o NAs)</td>\n",
       "<td>23</td>\n",
       "<td>21.296297</td>\n",
       "<td>   57.9 KB</td>\n",
       "<td>8.29671</td></tr>\n",
       "<tr><td>C2</td>\n",
       "<td>2-Byte Integers</td>\n",
       "<td>16</td>\n",
       "<td>14.814815</td>\n",
       "<td>   79.2 KB</td>\n",
       "<td>11.337085</td></tr>\n",
       "<tr><td>C4</td>\n",
       "<td>4-Byte Integers</td>\n",
       "<td>12</td>\n",
       "<td>11.111112</td>\n",
       "<td>  118.0 KB</td>\n",
       "<td>16.891531</td></tr>\n",
       "<tr><td>C8</td>\n",
       "<td>64-bit Integers</td>\n",
       "<td>4</td>\n",
       "<td>3.7037036</td>\n",
       "<td>   78.4 KB</td>\n",
       "<td>11.222987</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>4</td>\n",
       "<td>3.7037036</td>\n",
       "<td>  127.2 KB</td>\n",
       "<td>18.215822</td></tr>\n",
       "<tr><td>C8D</td>\n",
       "<td>64-bit Reals</td>\n",
       "<td>8</td>\n",
       "<td>7.4074073</td>\n",
       "<td>  156.8 KB</td>\n",
       "<td>22.445974</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name                 count    count_percentage    size      size_percentage\n",
       "------------  -------------------------  -------  ------------------  --------  -----------------\n",
       "C0L           Constant Integers          9        8.33333             720  B    0.100675\n",
       "C1            1-Byte Integers            32       29.6296             80.2 KB   11.4892\n",
       "C1N           1-Byte Integers (w/o NAs)  23       21.2963             57.9 KB   8.29671\n",
       "C2            2-Byte Integers            16       14.8148             79.2 KB   11.3371\n",
       "C4            4-Byte Integers            12       11.1111             118.0 KB  16.8915\n",
       "C8            64-bit Integers            4        3.7037              78.4 KB   11.223\n",
       "CStr          String                     4        3.7037              127.2 KB  18.2158\n",
       "C8D           64-bit Reals               8        7.40741             156.8 KB  22.446"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>  698.4 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>108.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>  698.4 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>108.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>  698.4 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>108.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>  698.4 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>108.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>  698.4 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>108.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size      number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  --------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  698.4 KB  9999              4                              108\n",
       "mean               698.4 KB  9999              4                              108\n",
       "min                698.4 KB  9999              4                              108\n",
       "max                698.4 KB  9999              4                              108\n",
       "stddev             0  B      0                 0                              0\n",
       "total              698.4 KB  9999              4                              108"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>ID</td>\n",
       "<td>Case Number</td>\n",
       "<td>Date</td>\n",
       "<td>Block</td>\n",
       "<td>IUCR</td>\n",
       "<td>Primary Type</td>\n",
       "<td>Description</td>\n",
       "<td>Location Description</td>\n",
       "<td>Arrest</td>\n",
       "<td>Domestic</td>\n",
       "<td>Beat</td>\n",
       "<td>District</td>\n",
       "<td>Ward</td>\n",
       "<td>Community Area</td>\n",
       "<td>FBI Code</td>\n",
       "<td>X Coordinate</td>\n",
       "<td>Y Coordinate</td>\n",
       "<td>Year</td>\n",
       "<td>Updated On</td>\n",
       "<td>Latitude</td>\n",
       "<td>Longitude</td>\n",
       "<td>Location</td>\n",
       "<td>Day</td>\n",
       "<td>Month</td>\n",
       "<td>WeekNum</td>\n",
       "<td>WeekDay</td>\n",
       "<td>HourOfDay</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>int</td>\n",
       "<td>string</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>int</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>21735.0</td>\n",
       "<td>NaN</td>\n",
       "<td>1.42203063e+12</td>\n",
       "<td>0.0</td>\n",
       "<td>110.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>111.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2.0</td>\n",
       "<td>1100317.0</td>\n",
       "<td>1814255.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>0.0</td>\n",
       "<td>41.64507243</td>\n",
       "<td>-87.906463888</td>\n",
       "<td>0.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2.0</td>\n",
       "<td>4.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>9962898.0</td>\n",
       "<td>NaN</td>\n",
       "<td>1.42346782e+12</td>\n",
       "<td>6517.0</td>\n",
       "<td>5131.0</td>\n",
       "<td>26.0</td>\n",
       "<td>198.0</td>\n",
       "<td>90.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2535.0</td>\n",
       "<td>25.0</td>\n",
       "<td>50.0</td>\n",
       "<td>77.0</td>\n",
       "<td>26.0</td>\n",
       "<td>1205069.0</td>\n",
       "<td>1951533.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>32.0</td>\n",
       "<td>42.022646183</td>\n",
       "<td>-87.524773286</td>\n",
       "<td>8603.0</td>\n",
       "<td>31.0</td>\n",
       "<td>3.0</td>\n",
       "<td>6.0</td>\n",
       "<td>6.0</td>\n",
       "<td>23.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>396787.564221</td>\n",
       "<td>NaN</td>\n",
       "<td>433879245.188</td>\n",
       "<td>1915.88517194</td>\n",
       "<td>927.751435583</td>\n",
       "<td>9.16241735944</td>\n",
       "<td>60.1059382029</td>\n",
       "<td>25.5963972463</td>\n",
       "<td>0.455083515588</td>\n",
       "<td>0.35934414686</td>\n",
       "<td>695.76029875</td>\n",
       "<td>6.94547493301</td>\n",
       "<td>13.6495661144</td>\n",
       "<td>21.2748762223</td>\n",
       "<td>7.57423857911</td>\n",
       "<td>16496.4493681</td>\n",
       "<td>31274.0163199</td>\n",
       "<td>0.0</td>\n",
       "<td>10.0824464345</td>\n",
       "<td>0.0860186579359</td>\n",
       "<td>0.0600357970653</td>\n",
       "<td>2469.64729385</td>\n",
       "<td>11.1801043358</td>\n",
       "<td>0.493492406787</td>\n",
       "<td>0.738929830409</td>\n",
       "<td>1.93284056432</td>\n",
       "<td>6.47321735807</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>3</td>\n",
       "<td>0</td>\n",
       "<td>11</td>\n",
       "<td>933</td>\n",
       "<td>19</td>\n",
       "<td>7071</td>\n",
       "<td>8476</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>603</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>1</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>1038</td>\n",
       "<td>374</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>419</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>6</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>2557</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td></tr></table></div>"
      ],
      "text/plain": [
       "               ID             Case Number    Date            Block          IUCR           Primary Type    Description    Location Description    Arrest          Domestic       Beat          District       Ward           Community Area    FBI Code       X Coordinate    Y Coordinate    Year    Updated On     Latitude         Longitude        Location       Day            Month           WeekNum         WeekDay        HourOfDay\n",
       "-------------  -------------  -------------  --------------  -------------  -------------  --------------  -------------  ----------------------  --------------  -------------  ------------  -------------  -------------  ----------------  -------------  --------------  --------------  ------  -------------  ---------------  ---------------  -------------  -------------  --------------  --------------  -------------  -------------\n",
       "type           int            string         int             enum           int            enum            enum           enum                    enum            enum           int           int            int            int               int            int             int             int     enum           real             real             enum           int            int             int             enum           int\n",
       "mins           21735.0        NaN            1.42203063e+12  0.0            110.0          0.0             0.0            0.0                     0.0             0.0            111.0         1.0            1.0            1.0               2.0            1100317.0       1814255.0       3915.0  0.0            41.64507243      -87.906463888    0.0            1.0            2.0             4.0             0.0            0.0\n",
       "maxs           9962898.0      NaN            1.42346782e+12  6517.0         5131.0         26.0            198.0          90.0                    1.0             1.0            2535.0        25.0           50.0           77.0              26.0           1205069.0       1951533.0       3915.0  32.0           42.022646183     -87.524773286    8603.0         31.0           3.0             6.0             6.0            23.0\n",
       "sigma          396787.564221  NaN            433879245.188   1915.88517194  927.751435583  9.16241735944   60.1059382029  25.5963972463           0.455083515588  0.35934414686  695.76029875  6.94547493301  13.6495661144  21.2748762223     7.57423857911  16496.4493681   31274.0163199   0.0     10.0824464345  0.0860186579359  0.0600357970653  2469.64729385  11.1801043358  0.493492406787  0.738929830409  1.93284056432  6.47321735807\n",
       "zero_count     0              0              0               3              0              11              933            19                      7071            8476           0             0              0              0                 0              0               0               0       603            0                0                1              0              0               0               1038           374\n",
       "missing_count  0              0              0               0              419            0               0              6                       0               0              0             162            0              0                 2557           162             162             0       0              162              162              162            0              0               0               0              0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Rows: 9,999 Cols: 28\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C0L</td>\n",
       "<td>Constant Integers</td>\n",
       "<td>13</td>\n",
       "<td>11.607142</td>\n",
       "<td>    1.0 KB</td>\n",
       "<td>0.16332634</td></tr>\n",
       "<tr><td>CBS</td>\n",
       "<td>Bits</td>\n",
       "<td>4</td>\n",
       "<td>3.5714288</td>\n",
       "<td>    1.5 KB</td>\n",
       "<td>0.2404352</td></tr>\n",
       "<tr><td>C1</td>\n",
       "<td>1-Byte Integers</td>\n",
       "<td>32</td>\n",
       "<td>28.57143</td>\n",
       "<td>   80.2 KB</td>\n",
       "<td>12.9040365</td></tr>\n",
       "<tr><td>C1N</td>\n",
       "<td>1-Byte Integers (w/o NAs)</td>\n",
       "<td>23</td>\n",
       "<td>20.535715</td>\n",
       "<td>   57.9 KB</td>\n",
       "<td>9.318395</td></tr>\n",
       "<tr><td>C2</td>\n",
       "<td>2-Byte Integers</td>\n",
       "<td>16</td>\n",
       "<td>14.285715</td>\n",
       "<td>   79.2 KB</td>\n",
       "<td>12.733171</td></tr>\n",
       "<tr><td>C4</td>\n",
       "<td>4-Byte Integers</td>\n",
       "<td>12</td>\n",
       "<td>10.714286</td>\n",
       "<td>  118.0 KB</td>\n",
       "<td>18.97161</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>4</td>\n",
       "<td>3.5714288</td>\n",
       "<td>  127.2 KB</td>\n",
       "<td>20.458979</td></tr>\n",
       "<tr><td>C8D</td>\n",
       "<td>64-bit Reals</td>\n",
       "<td>8</td>\n",
       "<td>7.1428576</td>\n",
       "<td>  156.8 KB</td>\n",
       "<td>25.210047</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name                 count    count_percentage    size      size_percentage\n",
       "------------  -------------------------  -------  ------------------  --------  -----------------\n",
       "C0L           Constant Integers          13       11.6071             1.0 KB    0.163326\n",
       "CBS           Bits                       4        3.57143             1.5 KB    0.240435\n",
       "C1            1-Byte Integers            32       28.5714             80.2 KB   12.904\n",
       "C1N           1-Byte Integers (w/o NAs)  23       20.5357             57.9 KB   9.3184\n",
       "C2            2-Byte Integers            16       14.2857             79.2 KB   12.7332\n",
       "C4            4-Byte Integers            12       10.7143             118.0 KB  18.9716\n",
       "CStr          String                     4        3.57143             127.2 KB  20.459\n",
       "C8D           64-bit Reals               8        7.14286             156.8 KB  25.21"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>  621.8 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>  621.8 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>  621.8 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>  621.8 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>  621.8 KB</td>\n",
       "<td>9999.0</td>\n",
       "<td>4.0</td>\n",
       "<td>112.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size      number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  --------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  621.8 KB  9999              4                              112\n",
       "mean               621.8 KB  9999              4                              112\n",
       "min                621.8 KB  9999              4                              112\n",
       "max                621.8 KB  9999              4                              112\n",
       "stddev             0  B      0                 0                              0\n",
       "total              621.8 KB  9999              4                              112"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>ID</td>\n",
       "<td>Case Number</td>\n",
       "<td>Block</td>\n",
       "<td>IUCR</td>\n",
       "<td>Primary Type</td>\n",
       "<td>Description</td>\n",
       "<td>Location Description</td>\n",
       "<td>Arrest</td>\n",
       "<td>Domestic</td>\n",
       "<td>Beat</td>\n",
       "<td>District</td>\n",
       "<td>Ward</td>\n",
       "<td>Community Area</td>\n",
       "<td>FBI Code</td>\n",
       "<td>X Coordinate</td>\n",
       "<td>Y Coordinate</td>\n",
       "<td>Year</td>\n",
       "<td>Updated On</td>\n",
       "<td>Latitude</td>\n",
       "<td>Longitude</td>\n",
       "<td>Location</td>\n",
       "<td>Day</td>\n",
       "<td>Month</td>\n",
       "<td>WeekNum</td>\n",
       "<td>WeekDay</td>\n",
       "<td>HourOfDay</td>\n",
       "<td>Weekend</td>\n",
       "<td>Season</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>int</td>\n",
       "<td>string</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>real</td>\n",
       "<td>real</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>21735.0</td>\n",
       "<td>NaN</td>\n",
       "<td>0.0</td>\n",
       "<td>110.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>111.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2.0</td>\n",
       "<td>1100317.0</td>\n",
       "<td>1814255.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>0.0</td>\n",
       "<td>41.64507243</td>\n",
       "<td>-87.906463888</td>\n",
       "<td>0.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2.0</td>\n",
       "<td>4.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>9962898.0</td>\n",
       "<td>NaN</td>\n",
       "<td>6517.0</td>\n",
       "<td>5131.0</td>\n",
       "<td>26.0</td>\n",
       "<td>198.0</td>\n",
       "<td>90.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>2535.0</td>\n",
       "<td>25.0</td>\n",
       "<td>50.0</td>\n",
       "<td>77.0</td>\n",
       "<td>26.0</td>\n",
       "<td>1205069.0</td>\n",
       "<td>1951533.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>32.0</td>\n",
       "<td>42.022646183</td>\n",
       "<td>-87.524773286</td>\n",
       "<td>8603.0</td>\n",
       "<td>31.0</td>\n",
       "<td>3.0</td>\n",
       "<td>6.0</td>\n",
       "<td>6.0</td>\n",
       "<td>23.0</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>396787.564221</td>\n",
       "<td>NaN</td>\n",
       "<td>1915.88517194</td>\n",
       "<td>927.751435583</td>\n",
       "<td>9.16241735944</td>\n",
       "<td>60.1059382029</td>\n",
       "<td>25.5963972463</td>\n",
       "<td>0.455083515588</td>\n",
       "<td>0.35934414686</td>\n",
       "<td>695.76029875</td>\n",
       "<td>6.94547493301</td>\n",
       "<td>13.6495661144</td>\n",
       "<td>21.2748762223</td>\n",
       "<td>7.57423857911</td>\n",
       "<td>16496.4493681</td>\n",
       "<td>31274.0163199</td>\n",
       "<td>0.0</td>\n",
       "<td>10.0824464345</td>\n",
       "<td>0.0860186579359</td>\n",
       "<td>0.0600357970653</td>\n",
       "<td>2469.64729385</td>\n",
       "<td>11.1801043358</td>\n",
       "<td>0.493492406787</td>\n",
       "<td>0.738929830409</td>\n",
       "<td>1.93284056432</td>\n",
       "<td>6.47321735807</td>\n",
       "<td>0.365802434041</td>\n",
       "<td>0.493492406787</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>3</td>\n",
       "<td>0</td>\n",
       "<td>11</td>\n",
       "<td>933</td>\n",
       "<td>19</td>\n",
       "<td>7071</td>\n",
       "<td>8476</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>603</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>1</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>1038</td>\n",
       "<td>374</td>\n",
       "<td>8408</td>\n",
       "<td>5805</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>419</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>6</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>2557</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>162</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td></tr></table></div>"
      ],
      "text/plain": [
       "               ID             Case Number    Block          IUCR           Primary Type    Description    Location Description    Arrest          Domestic       Beat          District       Ward           Community Area    FBI Code       X Coordinate    Y Coordinate    Year    Updated On     Latitude         Longitude        Location       Day            Month           WeekNum         WeekDay        HourOfDay      Weekend         Season\n",
       "-------------  -------------  -------------  -------------  -------------  --------------  -------------  ----------------------  --------------  -------------  ------------  -------------  -------------  ----------------  -------------  --------------  --------------  ------  -------------  ---------------  ---------------  -------------  -------------  --------------  --------------  -------------  -------------  --------------  --------------\n",
       "type           int            string         enum           int            enum            enum           enum                    enum            enum           int           int            int            int               int            int             int             int     enum           real             real             enum           int            int             int             enum           int            int             enum\n",
       "mins           21735.0        NaN            0.0            110.0          0.0             0.0            0.0                     0.0             0.0            111.0         1.0            1.0            1.0               2.0            1100317.0       1814255.0       3915.0  0.0            41.64507243      -87.906463888    0.0            1.0            2.0             4.0             0.0            0.0            0.0             0.0\n",
       "maxs           9962898.0      NaN            6517.0         5131.0         26.0            198.0          90.0                    1.0             1.0            2535.0        25.0           50.0           77.0              26.0           1205069.0       1951533.0       3915.0  32.0           42.022646183     -87.524773286    8603.0         31.0           3.0             6.0             6.0            23.0           1.0             1.0\n",
       "sigma          396787.564221  NaN            1915.88517194  927.751435583  9.16241735944   60.1059382029  25.5963972463           0.455083515588  0.35934414686  695.76029875  6.94547493301  13.6495661144  21.2748762223     7.57423857911  16496.4493681   31274.0163199   0.0     10.0824464345  0.0860186579359  0.0600357970653  2469.64729385  11.1801043358  0.493492406787  0.738929830409  1.93284056432  6.47321735807  0.365802434041  0.493492406787\n",
       "zero_count     0              0              3              0              11              933            19                      7071            8476           0             0              0              0                 0              0               0               0       603            0                0                1              0              0               0               1038           374            8408            5805\n",
       "missing_count  0              0              0              419            0               0              6                       0               0              0             162            0              0                 2557           162             162             0       0              162              162              162            0              0               0               0              0              0               0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "def refine_date_col(data, col, pattern):\n",
    "    data[col]         = data[col].as_date(pattern)\n",
    "    data[\"Day\"]       = data[col].day()\n",
    "    data[\"Month\"]     = data[col].month() + 1     # Since H2O indexes from 0\n",
    "    data[\"Year\"]      = data[col].year() + 1900   # Start of epoch is 1900\n",
    "    data[\"WeekNum\"]   = data[col].week()\n",
    "    data[\"WeekDay\"]   = data[col].dayOfWeek()\n",
    "    data[\"HourOfDay\"] = data[col].hour()\n",
    "    \n",
    "    data.describe()  # HACK: Force evaluation before ifelse and cut. See PUBDEV-1425.\n",
    "    \n",
    "    # Create weekend and season cols\n",
    "    # Spring = Mar, Apr, May. Summer = Jun, Jul, Aug. Autumn = Sep, Oct. Winter = Nov, Dec, Jan, Feb.\n",
    "    # data[\"Weekend\"]   = [1 if x in (\"Sun\", \"Sat\") else 0 for x in data[\"WeekDay\"]]\n",
    "    data[\"Weekend\"] = h2o.ifelse(data[\"WeekDay\"] == \"Sun\" or data[\"WeekDay\"] == \"Sat\", 1, 0)[0]\n",
    "    data[\"Season\"] = data[\"Month\"].cut([0, 2, 5, 7, 10, 12], [\"Winter\", \"Spring\", \"Summer\", \"Autumn\", \"Winter\"])\n",
    "    \n",
    "refine_date_col(crimes, \"Date\", \"%m/%d/%Y %I:%M:%S %p\")\n",
    "crimes = crimes.drop(\"Date\")\n",
    "crimes.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "EnvironmentError",
     "evalue": "h2o-py got an unexpected HTTP status code:\n 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). \ndetailed error messages: water.DException$DistributedException: from /172.16.2.17:54321; by class water.rapids.ASTMerge$MergeSet$MakeHash; class water.exceptions.H2OIllegalArgumentException: unimplemented",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mEnvironmentError\u001b[0m                          Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-e946a6af6204>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mweather\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"day\"\u001b[0m\u001b[0;34m]\u001b[0m  \u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Day\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mweather\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"year\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Year\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mcrimes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcensus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallLeft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallRite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      7\u001b[0m \u001b[0mcrimes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mweather\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallLeft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallRite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(self, other, allLeft, allRite)\u001b[0m\n\u001b[1;32m   1022\u001b[0m     \u001b[0mexpr2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"(, \"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\" (del %\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mlkey\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\" #0) (del %\"\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0mrkey\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m\" #0) )\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1023\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1024\u001b[0;31m     \u001b[0mh2o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrapids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr2\u001b[0m\u001b[0;34m)\u001b[0m       \u001b[0;31m# merge in h2o\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1025\u001b[0m     \u001b[0;31m# Make backing H2OVecs for the remote h2o vecs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1026\u001b[0m     \u001b[0mj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh2o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mframe\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtmp_key\u001b[0m\u001b[0;34m)\u001b[0m  \u001b[0;31m# Fetch the frame as JSON\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc\u001b[0m in \u001b[0;36mrapids\u001b[0;34m(expr)\u001b[0m\n\u001b[1;32m    487\u001b[0m   \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mJSON\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mRapids\u001b[0m \u001b[0mexecution\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    488\u001b[0m   \"\"\"\n\u001b[0;32m--> 489\u001b[0;31m   \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mH2OConnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Rapids\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murllib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_rest_version\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m99\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    490\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    491\u001b[0m     \u001b[0;32mraise\u001b[0m \u001b[0mEnvironmentError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rapids expression not evaluated: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36mpost_json\u001b[0;34m(url_suffix, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    360\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0m__H2OCONN__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    361\u001b[0m       \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"No h2o connection. Did you run `h2o.init()` ?\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m__H2OCONN__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"POST\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36m_rest_json\u001b[0;34m(self, url_suffix, method, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 365\u001b[0;31m     \u001b[0mraw_txt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_raw_rest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    366\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_tables\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36m_do_raw_rest\u001b[0;34m(self, url_suffix, method, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    429\u001b[0m       raise EnvironmentError((\"h2o-py got an unexpected HTTP status code:\\n {} {} (method = {}; url = {}). \\n\"+ \\\n\u001b[1;32m    430\u001b[0m                               \"detailed error messages: {}\")\n\u001b[0;32m--> 431\u001b[0;31m                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))\n\u001b[0m\u001b[1;32m    432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m     \u001b[0;31m# TODO: is.logging? -> write to logs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mEnvironmentError\u001b[0m: h2o-py got an unexpected HTTP status code:\n 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). \ndetailed error messages: water.DException$DistributedException: from /172.16.2.17:54321; by class water.rapids.ASTMerge$MergeSet$MakeHash; class water.exceptions.H2OIllegalArgumentException: unimplemented"
     ]
    }
   ],
   "source": [
    "# Merge crimes data with weather and census\n",
    "census[\"Community Area Number\"]._name = \"Community Area\"\n",
    "weather[\"month\"]._name = \"Month\"\n",
    "weather[\"day\"]  ._name = \"Day\"\n",
    "weather[\"year\"] ._name = \"Year\"\n",
    "crimes.merge(census, allLeft=True, allRite=False)\n",
    "crimes.merge(weather, allLeft=True, allRite=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'data' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-12-347776b381b3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# Create test/train split\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdata_split\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh2o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit_frame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mratios\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;36m0.8\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;36m0.2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mtrain\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_split\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mtest\u001b[0m  \u001b[0;34m=\u001b[0m \u001b[0mdata_split\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'data' is not defined"
     ]
    }
   ],
   "source": [
    "# Create test/train split\n",
    "data_split = h2o.split_frame(data, ratios = [0.8,0.2])\n",
    "train = data_split[1]\n",
    "test  = data_split[2]\n",
    "\n",
    "# Simple GBM - Predict Arrest\n",
    "data_gbm = h2o.gbm(x              =train.drop(\"Arrest\"),\n",
    "                   y              =train     [\"Arrest\"],\n",
    "                   validation_x   =test .drop(\"Arrest\"),\n",
    "                   validation_y   =test      [\"Arrest\"],\n",
    "                   ntrees         =10,\n",
    "                   max_depth      =6,\n",
    "                   distribution   =\"bernoulli\")\n",
    "\n",
    "# Simple Deep Learning\n",
    "data_dl = h2o.deeplearning(x                   =train.drop(\"Arrest\"),\n",
    "                           y                   =train     [\"Arrest\"],\n",
    "                           validation_x        =test .drop(\"Arrest\"),\n",
    "                           validation_y        =test      [\"Arrest\"],\n",
    "                           variable_importances=True,\n",
    "                           loss                =\"Automatic\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'data_gbm' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-2-f7c2ab3a3e26>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;31m# GBM performance on train/test data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mtrain_auc_gbm\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_gbm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_performance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mauc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mtest_auc_gbm\u001b[0m  \u001b[0;34m=\u001b[0m \u001b[0mdata_gbm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_performance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m.\u001b[0m\u001b[0mauc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0;31m# Deep Learning performance on train/test data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mNameError\u001b[0m: name 'data_gbm' is not defined"
     ]
    }
   ],
   "source": [
    "# GBM performance on train/test data\n",
    "train_auc_gbm = data_gbm.model_performance(train).auc()\n",
    "test_auc_gbm  = data_gbm.model_performance(test) .auc()\n",
    "\n",
    "# Deep Learning performance on train/test data\n",
    "train_auc_dl = data_dl.model_performance(train).auc()\n",
    "test_auc_dl  = data_dl.model_performance(test) .auc()\n",
    "\n",
    "# Make a pretty HTML table printout of the results\n",
    "header = [\"Model\", \"AUC Train\", \"AUC Test\"]\n",
    "table  = [\n",
    "           [\"GBM\", train_auc_gbm, test_auc_gbm],\n",
    "           [\"DL \", train_auc_dl,  test_auc_dl]\n",
    "         ]\n",
    "h2o.H2ODisplay(table, header)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "Uploaded py634b18a9-7e84-40ca-b265-b2fe43e064aa into cluster with 2 rows and 10 cols\n",
      "Rows: 2 Cols: 16\n",
      "\n",
      "Chunk compression summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>chunk_type</td>\n",
       "<td>chunk_name</td>\n",
       "<td>count</td>\n",
       "<td>count_percentage</td>\n",
       "<td>size</td>\n",
       "<td>size_percentage</td></tr>\n",
       "<tr><td>C0L</td>\n",
       "<td>Constant Integers</td>\n",
       "<td>7</td>\n",
       "<td>43.75</td>\n",
       "<td>    560  B</td>\n",
       "<td>43.818466</td></tr>\n",
       "<tr><td>C1N</td>\n",
       "<td>1-Byte Integers (w/o NAs)</td>\n",
       "<td>4</td>\n",
       "<td>25.0</td>\n",
       "<td>    280  B</td>\n",
       "<td>21.909233</td></tr>\n",
       "<tr><td>C2</td>\n",
       "<td>2-Byte Integers</td>\n",
       "<td>2</td>\n",
       "<td>12.5</td>\n",
       "<td>    144  B</td>\n",
       "<td>11.267606</td></tr>\n",
       "<tr><td>C2S</td>\n",
       "<td>2-Byte Fractions</td>\n",
       "<td>1</td>\n",
       "<td>6.25</td>\n",
       "<td>     88  B</td>\n",
       "<td>6.885759</td></tr>\n",
       "<tr><td>CStr</td>\n",
       "<td>String</td>\n",
       "<td>2</td>\n",
       "<td>12.5</td>\n",
       "<td>    206  B</td>\n",
       "<td>16.118937</td></tr></table></div>"
      ],
      "text/plain": [
       "chunk_type    chunk_name                 count    count_percentage    size    size_percentage\n",
       "------------  -------------------------  -------  ------------------  ------  -----------------\n",
       "C0L           Constant Integers          7        43.75               560  B  43.8185\n",
       "C1N           1-Byte Integers (w/o NAs)  4        25                  280  B  21.9092\n",
       "C2            2-Byte Integers            2        12.5                144  B  11.2676\n",
       "C2S           2-Byte Fractions           1        6.25                88  B   6.88576\n",
       "CStr          String                     2        12.5                206  B  16.1189"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Frame distribution summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>size</td>\n",
       "<td>number_of_rows</td>\n",
       "<td>number_of_chunks_per_column</td>\n",
       "<td>number_of_chunks</td></tr>\n",
       "<tr><td>172.16.2.17:54321</td>\n",
       "<td>    1.2 KB</td>\n",
       "<td>2.0</td>\n",
       "<td>1.0</td>\n",
       "<td>16.0</td></tr>\n",
       "<tr><td>mean</td>\n",
       "<td>    1.2 KB</td>\n",
       "<td>2.0</td>\n",
       "<td>1.0</td>\n",
       "<td>16.0</td></tr>\n",
       "<tr><td>min</td>\n",
       "<td>    1.2 KB</td>\n",
       "<td>2.0</td>\n",
       "<td>1.0</td>\n",
       "<td>16.0</td></tr>\n",
       "<tr><td>max</td>\n",
       "<td>    1.2 KB</td>\n",
       "<td>2.0</td>\n",
       "<td>1.0</td>\n",
       "<td>16.0</td></tr>\n",
       "<tr><td>stddev</td>\n",
       "<td>      0  B</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>total</td>\n",
       "<td>    1.2 KB</td>\n",
       "<td>2.0</td>\n",
       "<td>1.0</td>\n",
       "<td>16.0</td></tr></table></div>"
      ],
      "text/plain": [
       "                   size    number_of_rows    number_of_chunks_per_column    number_of_chunks\n",
       "-----------------  ------  ----------------  -----------------------------  ------------------\n",
       "172.16.2.17:54321  1.2 KB  2                 1                              16\n",
       "mean               1.2 KB  2                 1                              16\n",
       "min                1.2 KB  2                 1                              16\n",
       "max                1.2 KB  2                 1                              16\n",
       "stddev             0  B    0                 0                              0\n",
       "total              1.2 KB  2                 1                              16"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Column-by-Column Summary:\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td></td>\n",
       "<td>Location.Description</td>\n",
       "<td>FBI.Code</td>\n",
       "<td>Primary.Type</td>\n",
       "<td>Community.Area</td>\n",
       "<td>District</td>\n",
       "<td>Beat</td>\n",
       "<td>Domestic</td>\n",
       "<td>IUCR</td>\n",
       "<td>Date</td>\n",
       "<td>Ward</td>\n",
       "<td>Day</td>\n",
       "<td>Month</td>\n",
       "<td>Year</td>\n",
       "<td>WeekNum</td>\n",
       "<td>WeekDay</td>\n",
       "<td>HourOfDay</td></tr>\n",
       "<tr><td>type</td>\n",
       "<td>string</td>\n",
       "<td>int</td>\n",
       "<td>string</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>int</td>\n",
       "<td>enum</td>\n",
       "<td>int</td></tr>\n",
       "<tr><td>mins</td>\n",
       "<td>NaN</td>\n",
       "<td>11.0</td>\n",
       "<td>NaN</td>\n",
       "<td>46.0</td>\n",
       "<td>4.0</td>\n",
       "<td>422.0</td>\n",
       "<td>0.0</td>\n",
       "<td>1150.0</td>\n",
       "<td>1.423465239e+12</td>\n",
       "<td>7.0</td>\n",
       "<td>8.0</td>\n",
       "<td>3.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>6.0</td>\n",
       "<td>6.0</td>\n",
       "<td>23.0</td></tr>\n",
       "<tr><td>maxs</td>\n",
       "<td>NaN</td>\n",
       "<td>18.0</td>\n",
       "<td>NaN</td>\n",
       "<td>63.0</td>\n",
       "<td>9.0</td>\n",
       "<td>923.0</td>\n",
       "<td>0.0</td>\n",
       "<td>1811.0</td>\n",
       "<td>1.423467838e+12</td>\n",
       "<td>14.0</td>\n",
       "<td>8.0</td>\n",
       "<td>3.0</td>\n",
       "<td>3915.0</td>\n",
       "<td>6.0</td>\n",
       "<td>6.0</td>\n",
       "<td>23.0</td></tr>\n",
       "<tr><td>sigma</td>\n",
       "<td>NaN</td>\n",
       "<td>4.94974746831</td>\n",
       "<td>NaN</td>\n",
       "<td>12.0208152802</td>\n",
       "<td>3.53553390593</td>\n",
       "<td>354.260497374</td>\n",
       "<td>0.0</td>\n",
       "<td>467.397582364</td>\n",
       "<td>1837770.5243</td>\n",
       "<td>4.94974746831</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>zero_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>2</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td></tr>\n",
       "<tr><td>missing_count</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td>\n",
       "<td>0</td></tr></table></div>"
      ],
      "text/plain": [
       "               Location.Description    FBI.Code       Primary.Type    Community.Area    District       Beat           Domestic    IUCR           Date             Ward           Day    Month    Year    WeekNum    WeekDay    HourOfDay\n",
       "-------------  ----------------------  -------------  --------------  ----------------  -------------  -------------  ----------  -------------  ---------------  -------------  -----  -------  ------  ---------  ---------  -----------\n",
       "type           string                  int            string          int               int            int            enum        int            int              int            int    int      int     int        enum       int\n",
       "mins           NaN                     11.0           NaN             46.0              4.0            422.0          0.0         1150.0         1.423465239e+12  7.0            8.0    3.0      3915.0  6.0        6.0        23.0\n",
       "maxs           NaN                     18.0           NaN             63.0              9.0            923.0          0.0         1811.0         1.423467838e+12  14.0           8.0    3.0      3915.0  6.0        6.0        23.0\n",
       "sigma          NaN                     4.94974746831  NaN             12.0208152802     3.53553390593  354.260497374  0.0         467.397582364  1837770.5243     4.94974746831  0.0    0.0      0.0     0.0        0.0        0.0\n",
       "zero_count     0                       0              0               0                 0              0              2           0              0                0              0      0        0       0          0          0\n",
       "missing_count  0                       0              0               0                 0              0              0           0              0                0              0      0        0       0          0          0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "EnvironmentError",
     "evalue": "h2o-py got an unexpected HTTP status code:\n 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). \ndetailed error messages: Data vector is constant!",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mEnvironmentError\u001b[0m                          Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-6-85bb7c75c897>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[0;31m# Refine date column and merge with census data\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mrefine_date_col\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcrime_examples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Date\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%m/%d/%Y %I:%M:%S %p\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     19\u001b[0m \u001b[0mcrime_examples\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Date\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0mcrime_examples\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmerge\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcensus\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallLeft\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mallRite\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m<ipython-input-4-c2702228f9f1>\u001b[0m in \u001b[0;36mrefine_date_col\u001b[0;34m(data, col, pattern)\u001b[0m\n\u001b[1;32m     15\u001b[0m     \u001b[0;31m# data[\"Weekend\"] = h2o.ifelse(data[\"WeekDay\"] in (\"Sun\", \"Sat\"), 1, 0)[0]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     16\u001b[0m     \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Weekend\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh2o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mifelse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"WeekDay\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"Sun\"\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"WeekDay\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m\"Sat\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 17\u001b[0;31m     \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Season\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"Month\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcut\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m7\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m12\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"Winter\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Spring\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Summer\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Autumn\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Winter\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     19\u001b[0m \u001b[0mrefine_date_col\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcrimes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"Date\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%m/%d/%Y %I:%M:%S %p\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/frame.pyc\u001b[0m in \u001b[0;36mcut\u001b[0;34m(self, breaks, labels, include_lowest, right, dig_lab)\u001b[0m\n\u001b[1;32m   1256\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1257\u001b[0m     \u001b[0mexpr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"(cut '{}' {} {} {} {} #{}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbreaks_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%TRUE\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minclude_lowest\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"%FALSE\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"%TRUE\"\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mright\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m\"%FALSE\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdig_lab\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1258\u001b[0;31m     \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh2o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrapids\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1259\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mH2OVec\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mExpr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mop\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"vec_ids\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlength\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"num_rows\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1260\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/h2o.pyc\u001b[0m in \u001b[0;36mrapids\u001b[0;34m(expr)\u001b[0m\n\u001b[1;32m    487\u001b[0m   \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mThe\u001b[0m \u001b[0mJSON\u001b[0m \u001b[0mresponse\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mRapids\u001b[0m \u001b[0mexecution\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    488\u001b[0m   \"\"\"\n\u001b[0;32m--> 489\u001b[0;31m   \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mH2OConnection\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpost_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Rapids\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mast\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murllib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mquote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexpr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_rest_version\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m99\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    490\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    491\u001b[0m     \u001b[0;32mraise\u001b[0m \u001b[0mEnvironmentError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"rapids expression not evaluated: {0}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36mpost_json\u001b[0;34m(url_suffix, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    360\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0m__H2OCONN__\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    361\u001b[0m       \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"No h2o connection. Did you run `h2o.init()` ?\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 362\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m__H2OCONN__\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"POST\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36m_rest_json\u001b[0;34m(self, url_suffix, method, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    363\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    364\u001b[0m   \u001b[0;32mdef\u001b[0m \u001b[0m_rest_json\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 365\u001b[0;31m     \u001b[0mraw_txt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_do_raw_rest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0murl_suffix\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfile_upload_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    366\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_process_tables\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mraw_txt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Users/anqi_fu/Documents/workspace/h2o-3/h2o-py/h2o/connection.pyc\u001b[0m in \u001b[0;36m_do_raw_rest\u001b[0;34m(self, url_suffix, method, file_upload_info, **kwargs)\u001b[0m\n\u001b[1;32m    429\u001b[0m       raise EnvironmentError((\"h2o-py got an unexpected HTTP status code:\\n {} {} (method = {}; url = {}). \\n\"+ \\\n\u001b[1;32m    430\u001b[0m                               \"detailed error messages: {}\")\n\u001b[0;32m--> 431\u001b[0;31m                              .format(http_result.status_code,http_result.reason,method,url,detailed_error_msgs))\n\u001b[0m\u001b[1;32m    432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    433\u001b[0m     \u001b[0;31m# TODO: is.logging? -> write to logs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mEnvironmentError\u001b[0m: h2o-py got an unexpected HTTP status code:\n 412 Precondition Failed (method = POST; url = http://localhost:54321/99/Rapids). \ndetailed error messages: Data vector is constant!"
     ]
    }
   ],
   "source": [
    "# Create new H2OFrame of crime observations\n",
    "examples = {\n",
    "            \"Date\":                 [\"02/08/2015 11:43:58 PM\", \"02/08/2015 11:00:39 PM\"],\n",
    "            \"IUCR\":                 [1811, 1150],\n",
    "            \"Primary.Type\":         [\"NARCOTICS\", \"DECEPTIVE PRACTICE\"],\n",
    "            \"Location.Description\": [\"STREET\", \"RESIDENCE\"],\n",
    "            \"Domestic\":             [\"false\", \"false\"],\n",
    "            \"Beat\":                 [422, 923],\n",
    "            \"District\":             [4, 9],\n",
    "            \"Ward\":                 [7, 14],\n",
    "            \"Community.Area\":       [46, 63],\n",
    "            \"FBI.Code\":             [18, 11]\n",
    "            }\n",
    "\n",
    "crime_examples = h2o.H2OFrame(python_obj = examples)\n",
    "\n",
    "# Refine date column and merge with census data\n",
    "refine_date_col(crime_examples, \"Date\", \"%m/%d/%Y %I:%M:%S %p\")\n",
    "crime_examples.drop(\"Date\")\n",
    "crime_examples.merge(census, allLeft=True, allRite=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Predict probability of arrest from new observations\n",
    "gbm_pred = data_gbm.predict(crime_examples)\n",
    "dl_pred  = data_dl .predict(crime_examples)\n",
    "\n",
    "# TODO: Replace with a pretty HTML table\n",
    "gbm_pred.describe()\n",
    "dl_pred.describe()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}