{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<pyspark.context.SparkContext at 0x102cea1d0>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Spark Context\n",
    "sc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Warning: Version mismatch. H2O is version 3.6.0.2, but the python package is version 3.7.0.99999.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td>H2O cluster uptime: </td>\n",
       "<td>2 seconds 217 milliseconds </td></tr>\n",
       "<tr><td>H2O cluster version: </td>\n",
       "<td>3.6.0.2</td></tr>\n",
       "<tr><td>H2O cluster name: </td>\n",
       "<td>sparkling-water-nidhimehta</td></tr>\n",
       "<tr><td>H2O cluster total nodes: </td>\n",
       "<td>2</td></tr>\n",
       "<tr><td>H2O cluster total memory: </td>\n",
       "<td>3.83 GB</td></tr>\n",
       "<tr><td>H2O cluster total cores: </td>\n",
       "<td>16</td></tr>\n",
       "<tr><td>H2O cluster allowed cores: </td>\n",
       "<td>16</td></tr>\n",
       "<tr><td>H2O cluster healthy: </td>\n",
       "<td>True</td></tr>\n",
       "<tr><td>H2O Connection ip: </td>\n",
       "<td>172.16.2.98</td></tr>\n",
       "<tr><td>H2O Connection port: </td>\n",
       "<td>54329</td></tr></table></div>"
      ],
      "text/plain": [
       "--------------------------  --------------------------\n",
       "H2O cluster uptime:         2 seconds 217 milliseconds\n",
       "H2O cluster version:        3.6.0.2\n",
       "H2O cluster name:           sparkling-water-nidhimehta\n",
       "H2O cluster total nodes:    2\n",
       "H2O cluster total memory:   3.83 GB\n",
       "H2O cluster total cores:    16\n",
       "H2O cluster allowed cores:  16\n",
       "H2O cluster healthy:        True\n",
       "H2O Connection ip:          172.16.2.98\n",
       "H2O Connection port:        54329\n",
       "--------------------------  --------------------------"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Start H2O Context\n",
    "from pysparkling import *\n",
    "sc\n",
    "hc= H2OContext(sc).start()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "H2OContext: ip=172.16.2.98, port=54329\n"
     ]
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# H2o Context\n",
    "hc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Import H2O Python library\n",
    "import h2o\n",
    "# View all the available H2O python functions\n",
    "#dir(h2o)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "(9999, 22)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>       </th><th>ID           </th><th>Case Number  </th><th>Date                  </th><th>Block               </th><th>IUCR  </th><th>Primary Type   </th><th>Description                 </th><th>Location Description  </th><th>Arrest        </th><th>Domestic      </th><th>Beat         </th><th>District     </th><th>Ward         </th><th>Community Area  </th><th>FBI Code  </th><th>X Coordinate  </th><th>Y Coordinate  </th><th>Year  </th><th>Updated On            </th><th>Latitude       </th><th>Longitude      </th><th>Location                     </th></tr>\n",
       "<tr><td>type   </td><td>int          </td><td>string       </td><td>string                </td><td>enum                </td><td>enum  </td><td>enum           </td><td>enum                        </td><td>enum                  </td><td>enum          </td><td>enum          </td><td>int          </td><td>int          </td><td>int          </td><td>int             </td><td>enum      </td><td>int           </td><td>int           </td><td>int   </td><td>enum                  </td><td>real           </td><td>real           </td><td>enum                         </td></tr>\n",
       "<tr><td>mins   </td><td>21735.0      </td><td>NaN          </td><td>NaN                   </td><td>0.0                 </td><td>0.0   </td><td>0.0            </td><td>0.0                         </td><td>0.0                   </td><td>0.0           </td><td>0.0           </td><td>111.0        </td><td>1.0          </td><td>1.0          </td><td>1.0             </td><td>0.0       </td><td>1100317.0     </td><td>1814255.0     </td><td>2015.0</td><td>0.0                   </td><td>41.64507243    </td><td>-87.906463888  </td><td>0.0                          </td></tr>\n",
       "<tr><td>mean   </td><td>9931318.73737</td><td>NaN          </td><td>NaN                   </td><td>NaN                 </td><td>NaN   </td><td>NaN            </td><td>NaN                         </td><td>NaN                   </td><td>0.292829282928</td><td>0.152315231523</td><td>1159.61806181</td><td>11.3489885128</td><td>22.9540954095</td><td>37.4476447645   </td><td>NaN       </td><td>1163880.59815 </td><td>1885916.14984 </td><td>2015.0</td><td>NaN                   </td><td>41.8425652247  </td><td>-87.6741405221 </td><td>NaN                          </td></tr>\n",
       "<tr><td>maxs   </td><td>9962898.0    </td><td>NaN          </td><td>NaN                   </td><td>6517.0              </td><td>212.0 </td><td>26.0           </td><td>198.0                       </td><td>90.0                  </td><td>1.0           </td><td>1.0           </td><td>2535.0       </td><td>25.0         </td><td>50.0         </td><td>77.0            </td><td>24.0      </td><td>1205069.0     </td><td>1951533.0     </td><td>2015.0</td><td>32.0                  </td><td>42.022646183   </td><td>-87.524773286  </td><td>8603.0                       </td></tr>\n",
       "<tr><td>sigma  </td><td>396787.564221</td><td>NaN          </td><td>NaN                   </td><td>NaN                 </td><td>NaN   </td><td>NaN            </td><td>NaN                         </td><td>NaN                   </td><td>0.455083515588</td><td>0.35934414686 </td><td>695.76029875 </td><td>6.94547493301</td><td>13.6495661144</td><td>21.2748762223   </td><td>NaN       </td><td>16496.4493681 </td><td>31274.0163199 </td><td>0.0   </td><td>NaN                   </td><td>0.0860186579358</td><td>0.0600357970653</td><td>NaN                          </td></tr>\n",
       "<tr><td>zeros  </td><td>0            </td><td>0            </td><td>0                     </td><td>3                   </td><td>16    </td><td>11             </td><td>933                         </td><td>19                    </td><td>7071          </td><td>8476          </td><td>0            </td><td>0            </td><td>0            </td><td>0               </td><td>16        </td><td>0             </td><td>0             </td><td>0     </td><td>603                   </td><td>0              </td><td>0              </td><td>1                            </td></tr>\n",
       "<tr><td>missing</td><td>0            </td><td>0            </td><td>0                     </td><td>0                   </td><td>0     </td><td>0              </td><td>0                           </td><td>6                     </td><td>0             </td><td>0             </td><td>0            </td><td>162          </td><td>0            </td><td>0               </td><td>0         </td><td>162           </td><td>162           </td><td>0     </td><td>0                     </td><td>162            </td><td>162            </td><td>162                          </td></tr>\n",
       "<tr><td>0      </td><td>9955810.0    </td><td>HY144797     </td><td>02/08/2015 11:43:40 PM</td><td>081XX S COLES AVE   </td><td>1811  </td><td>NARCOTICS      </td><td>POSS: CANNABIS 30GMS OR LESS</td><td>STREET                </td><td>true          </td><td>false         </td><td>422.0        </td><td>4.0          </td><td>7.0          </td><td>46.0            </td><td>18        </td><td>1198273.0     </td><td>1851626.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.747693646   </td><td>-87.549035389  </td><td>(41.747693646, -87.549035389)</td></tr>\n",
       "<tr><td>1      </td><td>9955861.0    </td><td>HY144838     </td><td>02/08/2015 11:41:42 PM</td><td>118XX S STATE ST    </td><td>0486  </td><td>BATTERY        </td><td>DOMESTIC BATTERY SIMPLE     </td><td>APARTMENT             </td><td>true          </td><td>true          </td><td>522.0        </td><td>5.0          </td><td>34.0         </td><td>53.0            </td><td>08B       </td><td>1178335.0     </td><td>1826581.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.679442289   </td><td>-87.622850758  </td><td>(41.679442289, -87.622850758)</td></tr>\n",
       "<tr><td>2      </td><td>9955801.0    </td><td>HY144779     </td><td>02/08/2015 11:30:22 PM</td><td>002XX S LARAMIE AVE </td><td>2026  </td><td>NARCOTICS      </td><td>POSS: PCP                   </td><td>SIDEWALK              </td><td>true          </td><td>false         </td><td>1522.0       </td><td>15.0         </td><td>29.0         </td><td>25.0            </td><td>18        </td><td>1141717.0     </td><td>1898581.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.87777333    </td><td>-87.755117993  </td><td>(41.87777333, -87.755117993) </td></tr>\n",
       "<tr><td>3      </td><td>9956197.0    </td><td>HY144787     </td><td>02/08/2015 11:30:23 PM</td><td>006XX E 67TH ST     </td><td>1811  </td><td>NARCOTICS      </td><td>POSS: CANNABIS 30GMS OR LESS</td><td>STREET                </td><td>true          </td><td>false         </td><td>321.0        </td><td>nan          </td><td>6.0          </td><td>42.0            </td><td>18        </td><td>nan           </td><td>nan           </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>nan            </td><td>nan            </td><td>                             </td></tr>\n",
       "<tr><td>4      </td><td>9955846.0    </td><td>HY144829     </td><td>02/08/2015 11:30:58 PM</td><td>0000X S MAYFIELD AVE</td><td>0610  </td><td>BURGLARY       </td><td>FORCIBLE ENTRY              </td><td>APARTMENT             </td><td>false         </td><td>false         </td><td>1513.0       </td><td>15.0         </td><td>29.0         </td><td>25.0            </td><td>05        </td><td>1137239.0     </td><td>1899372.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.880025548   </td><td>-87.771541324  </td><td>(41.880025548, -87.771541324)</td></tr>\n",
       "<tr><td>5      </td><td>9955835.0    </td><td>HY144778     </td><td>02/08/2015 11:30:21 PM</td><td>010XX W 48TH ST     </td><td>0486  </td><td>BATTERY        </td><td>DOMESTIC BATTERY SIMPLE     </td><td>APARTMENT             </td><td>false         </td><td>true          </td><td>933.0        </td><td>9.0          </td><td>3.0          </td><td>61.0            </td><td>08B       </td><td>1169986.0     </td><td>1873019.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.807059405   </td><td>-87.65206589   </td><td>(41.807059405, -87.65206589) </td></tr>\n",
       "<tr><td>6      </td><td>9955872.0    </td><td>HY144822     </td><td>02/08/2015 11:27:24 PM</td><td>015XX W ARTHUR AVE  </td><td>1320  </td><td>CRIMINAL DAMAGE</td><td>TO VEHICLE                  </td><td>STREET                </td><td>false         </td><td>false         </td><td>2432.0       </td><td>24.0         </td><td>40.0         </td><td>1.0             </td><td>14        </td><td>1164732.0     </td><td>1943222.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.999814056   </td><td>-87.669342967  </td><td>(41.999814056, -87.669342967)</td></tr>\n",
       "<tr><td>7      </td><td>21752.0      </td><td>HY144738     </td><td>02/08/2015 11:26:12 PM</td><td>060XX W GRAND AVE   </td><td>0110  </td><td>HOMICIDE       </td><td>FIRST DEGREE MURDER         </td><td>STREET                </td><td>true          </td><td>false         </td><td>2512.0       </td><td>25.0         </td><td>37.0         </td><td>19.0            </td><td>01A       </td><td>1135910.0     </td><td>1914206.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.920755683   </td><td>-87.776067514  </td><td>(41.920755683, -87.776067514)</td></tr>\n",
       "<tr><td>8      </td><td>9955808.0    </td><td>HY144775     </td><td>02/08/2015 11:20:33 PM</td><td>001XX W WACKER DR   </td><td>0460  </td><td>BATTERY        </td><td>SIMPLE                      </td><td>OTHER                 </td><td>false         </td><td>false         </td><td>122.0        </td><td>1.0          </td><td>42.0         </td><td>32.0            </td><td>08B       </td><td>1175384.0     </td><td>1902088.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.886707818   </td><td>-87.631396356  </td><td>(41.886707818, -87.631396356)</td></tr>\n",
       "<tr><td>9      </td><td>9958275.0    </td><td>HY146732     </td><td>02/08/2015 11:15:36 PM</td><td>001XX W WACKER DR   </td><td>0460  </td><td>BATTERY        </td><td>SIMPLE                      </td><td>HOTEL/MOTEL           </td><td>false         </td><td>false         </td><td>122.0        </td><td>1.0          </td><td>42.0         </td><td>32.0            </td><td>08B       </td><td>1175384.0     </td><td>1902088.0     </td><td>2015.0</td><td>02/15/2015 12:43:39 PM</td><td>41.886707818   </td><td>-87.631396356  </td><td>(41.886707818, -87.631396356)</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Parse Chicago Crime dataset into H2O\n",
    "column_type = ['Numeric','String','String','Enum','Enum','Enum','Enum','Enum','Enum','Enum','Numeric','Numeric','Numeric','Numeric','Enum','Numeric','Numeric','Numeric','Enum','Numeric','Numeric','Enum']\n",
    "f_crimes = h2o.import_file(path =\"../data/chicagoCrimes10k.csv\",col_types =column_type)\n",
    "\n",
    "print(f_crimes.shape)\n",
    "f_crimes.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>IUCR  </th><th style=\"text-align: right;\">  Count</th></tr>\n",
       "<tr><td>0110  </td><td style=\"text-align: right;\">     16</td></tr>\n",
       "<tr><td>0261  </td><td style=\"text-align: right;\">      2</td></tr>\n",
       "<tr><td>0263  </td><td style=\"text-align: right;\">      2</td></tr>\n",
       "<tr><td>0265  </td><td style=\"text-align: right;\">      5</td></tr>\n",
       "<tr><td>0266  </td><td style=\"text-align: right;\">      2</td></tr>\n",
       "<tr><td>0281  </td><td style=\"text-align: right;\">     41</td></tr>\n",
       "<tr><td>0291  </td><td style=\"text-align: right;\">      3</td></tr>\n",
       "<tr><td>0312  </td><td style=\"text-align: right;\">     18</td></tr>\n",
       "<tr><td>0313  </td><td style=\"text-align: right;\">     20</td></tr>\n",
       "<tr><td>031A  </td><td style=\"text-align: right;\">    136</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at the distribution of IUCR column\n",
    "f_crimes[\"IUCR\"].table()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>Arrest  </th><th style=\"text-align: right;\">  Count</th></tr>\n",
       "<tr><td>false   </td><td style=\"text-align: right;\">   7071</td></tr>\n",
       "<tr><td>true    </td><td style=\"text-align: right;\">   2928</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at distribution of Arrest column\n",
    "f_crimes[\"Arrest\"].table()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th style=\"text-align: right;\">             ID</th><th>Case_Number  </th><th>Date                  </th><th>Block               </th><th style=\"text-align: right;\">  IUCR</th><th>Primary_Type   </th><th>Description                 </th><th>Location_Description  </th><th>Arrest  </th><th>Domestic  </th><th style=\"text-align: right;\">  Beat</th><th style=\"text-align: right;\">  District</th><th style=\"text-align: right;\">  Ward</th><th style=\"text-align: right;\">  Community_Area</th><th>FBI_Code  </th><th style=\"text-align: right;\">  X_Coordinate</th><th style=\"text-align: right;\">  Y_Coordinate</th><th style=\"text-align: right;\">  Year</th><th>Updated_On            </th><th style=\"text-align: right;\">  Latitude</th><th style=\"text-align: right;\">  Longitude</th><th>Location                     </th></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95581e+06</td><td>HY144797     </td><td>02/08/2015 11:43:40 PM</td><td>081XX S COLES AVE   </td><td style=\"text-align: right;\">  1811</td><td>NARCOTICS      </td><td>POSS: CANNABIS 30GMS OR LESS</td><td>STREET                </td><td>true    </td><td>false     </td><td style=\"text-align: right;\">   422</td><td style=\"text-align: right;\">         4</td><td style=\"text-align: right;\">     7</td><td style=\"text-align: right;\">              46</td><td>18        </td><td style=\"text-align: right;\">   1.19827e+06</td><td style=\"text-align: right;\">   1.85163e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.7477</td><td style=\"text-align: right;\">   -87.549 </td><td>(41.747693646, -87.549035389)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95586e+06</td><td>HY144838     </td><td>02/08/2015 11:41:42 PM</td><td>118XX S STATE ST    </td><td style=\"text-align: right;\">  0486</td><td>BATTERY        </td><td>DOMESTIC BATTERY SIMPLE     </td><td>APARTMENT             </td><td>true    </td><td>true      </td><td style=\"text-align: right;\">   522</td><td style=\"text-align: right;\">         5</td><td style=\"text-align: right;\">    34</td><td style=\"text-align: right;\">              53</td><td>08B       </td><td style=\"text-align: right;\">   1.17834e+06</td><td style=\"text-align: right;\">   1.82658e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.6794</td><td style=\"text-align: right;\">   -87.6229</td><td>(41.679442289, -87.622850758)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.9558e+06 </td><td>HY144779     </td><td>02/08/2015 11:30:22 PM</td><td>002XX S LARAMIE AVE </td><td style=\"text-align: right;\">  2026</td><td>NARCOTICS      </td><td>POSS: PCP                   </td><td>SIDEWALK              </td><td>true    </td><td>false     </td><td style=\"text-align: right;\">  1522</td><td style=\"text-align: right;\">        15</td><td style=\"text-align: right;\">    29</td><td style=\"text-align: right;\">              25</td><td>18        </td><td style=\"text-align: right;\">   1.14172e+06</td><td style=\"text-align: right;\">   1.89858e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.8778</td><td style=\"text-align: right;\">   -87.7551</td><td>(41.87777333, -87.755117993) </td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.9562e+06 </td><td>HY144787     </td><td>02/08/2015 11:30:23 PM</td><td>006XX E 67TH ST     </td><td style=\"text-align: right;\">  1811</td><td>NARCOTICS      </td><td>POSS: CANNABIS 30GMS OR LESS</td><td>STREET                </td><td>true    </td><td>false     </td><td style=\"text-align: right;\">   321</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">     6</td><td style=\"text-align: right;\">              42</td><td>18        </td><td style=\"text-align: right;\"> nan          </td><td style=\"text-align: right;\"> nan          </td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">  nan     </td><td style=\"text-align: right;\">   nan     </td><td>                             </td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95585e+06</td><td>HY144829     </td><td>02/08/2015 11:30:58 PM</td><td>0000X S MAYFIELD AVE</td><td style=\"text-align: right;\">  0610</td><td>BURGLARY       </td><td>FORCIBLE ENTRY              </td><td>APARTMENT             </td><td>false   </td><td>false     </td><td style=\"text-align: right;\">  1513</td><td style=\"text-align: right;\">        15</td><td style=\"text-align: right;\">    29</td><td style=\"text-align: right;\">              25</td><td>05        </td><td style=\"text-align: right;\">   1.13724e+06</td><td style=\"text-align: right;\">   1.89937e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.88  </td><td style=\"text-align: right;\">   -87.7715</td><td>(41.880025548, -87.771541324)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95584e+06</td><td>HY144778     </td><td>02/08/2015 11:30:21 PM</td><td>010XX W 48TH ST     </td><td style=\"text-align: right;\">  0486</td><td>BATTERY        </td><td>DOMESTIC BATTERY SIMPLE     </td><td>APARTMENT             </td><td>false   </td><td>true      </td><td style=\"text-align: right;\">   933</td><td style=\"text-align: right;\">         9</td><td style=\"text-align: right;\">     3</td><td style=\"text-align: right;\">              61</td><td>08B       </td><td style=\"text-align: right;\">   1.16999e+06</td><td style=\"text-align: right;\">   1.87302e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.8071</td><td style=\"text-align: right;\">   -87.6521</td><td>(41.807059405, -87.65206589) </td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95587e+06</td><td>HY144822     </td><td>02/08/2015 11:27:24 PM</td><td>015XX W ARTHUR AVE  </td><td style=\"text-align: right;\">  1320</td><td>CRIMINAL DAMAGE</td><td>TO VEHICLE                  </td><td>STREET                </td><td>false   </td><td>false     </td><td style=\"text-align: right;\">  2432</td><td style=\"text-align: right;\">        24</td><td style=\"text-align: right;\">    40</td><td style=\"text-align: right;\">               1</td><td>14        </td><td style=\"text-align: right;\">   1.16473e+06</td><td style=\"text-align: right;\">   1.94322e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.9998</td><td style=\"text-align: right;\">   -87.6693</td><td>(41.999814056, -87.669342967)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">21752          </td><td>HY144738     </td><td>02/08/2015 11:26:12 PM</td><td>060XX W GRAND AVE   </td><td style=\"text-align: right;\">  0110</td><td>HOMICIDE       </td><td>FIRST DEGREE MURDER         </td><td>STREET                </td><td>true    </td><td>false     </td><td style=\"text-align: right;\">  2512</td><td style=\"text-align: right;\">        25</td><td style=\"text-align: right;\">    37</td><td style=\"text-align: right;\">              19</td><td>01A       </td><td style=\"text-align: right;\">   1.13591e+06</td><td style=\"text-align: right;\">   1.91421e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.9208</td><td style=\"text-align: right;\">   -87.7761</td><td>(41.920755683, -87.776067514)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95581e+06</td><td>HY144775     </td><td>02/08/2015 11:20:33 PM</td><td>001XX W WACKER DR   </td><td style=\"text-align: right;\">  0460</td><td>BATTERY        </td><td>SIMPLE                      </td><td>OTHER                 </td><td>false   </td><td>false     </td><td style=\"text-align: right;\">   122</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">    42</td><td style=\"text-align: right;\">              32</td><td>08B       </td><td style=\"text-align: right;\">   1.17538e+06</td><td style=\"text-align: right;\">   1.90209e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.8867</td><td style=\"text-align: right;\">   -87.6314</td><td>(41.886707818, -87.631396356)</td></tr>\n",
       "<tr><td style=\"text-align: right;\">    9.95828e+06</td><td>HY146732     </td><td>02/08/2015 11:15:36 PM</td><td>001XX W WACKER DR   </td><td style=\"text-align: right;\">  0460</td><td>BATTERY        </td><td>SIMPLE                      </td><td>HOTEL/MOTEL           </td><td>false   </td><td>false     </td><td style=\"text-align: right;\">   122</td><td style=\"text-align: right;\">         1</td><td style=\"text-align: right;\">    42</td><td style=\"text-align: right;\">              32</td><td>08B       </td><td style=\"text-align: right;\">   1.17538e+06</td><td style=\"text-align: right;\">   1.90209e+06</td><td style=\"text-align: right;\">  2015</td><td>02/15/2015 12:43:39 PM</td><td style=\"text-align: right;\">   41.8867</td><td style=\"text-align: right;\">   -87.6314</td><td>(41.886707818, -87.631396356)</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Modify column names to replace blank spaces with underscore\n",
    "col_names = map(lambda s: s.replace(' ', '_'), f_crimes.col_names)\n",
    "f_crimes.set_names(col_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Set time zone to UTC for date manipulation\n",
    "h2o.set_timezone(\"Etc/UTC\")\n",
    "\n",
    "## Refine date column \n",
    "def refine_date_col(data, col, pattern):\n",
    "    data[col]         = data[col].as_date(pattern)\n",
    "    data[\"Day\"]       = data[col].day()\n",
    "    data[\"Month\"]     = data[col].month()    # Since H2O indexes from 0\n",
    "    data[\"Year\"]      = data[col].year()\n",
    "    data[\"WeekNum\"]   = data[col].week()\n",
    "    data[\"WeekDay\"]   = data[col].dayOfWeek()\n",
    "    data[\"HourOfDay\"] = data[col].hour()\n",
    "    \n",
    "    # Create weekend and season cols\n",
    "    data[\"Weekend\"] = (data[\"WeekDay\"] == \"Sun\" or data[\"WeekDay\"] == \"Sat\").ifelse(1, 0)[0]\n",
    "    data[\"Season\"] = data[\"Month\"].cut([0, 2, 5, 7, 10, 12], [\"Winter\", \"Spring\", \"Summer\", \"Autumn\", \"Winter\"])\n",
    "    \n",
    "refine_date_col(f_crimes, \"Date\", \"%m/%d/%Y %I:%M:%S %p\")\n",
    "f_crimes = f_crimes.drop(\"Date\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "[77, 9]\n"
     ]
    }
   ],
   "source": [
    "# Parse Census data into H2O\n",
    "f_census = h2o.import_file(\"../data/chicagoCensus.csv\",header=1)\n",
    "\n",
    "## Update column names in the table\n",
    "col_names = map(lambda s: s.strip().replace(' ', '_'), f_census.col_names)\n",
    "f_census.set_names(col_names)\n",
    "f_census = f_census[1:78,:]\n",
    "print(f_census.dim)\n",
    "#f_census.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Parse Progress: [##################################################] 100%\n",
      "[5162, 6]\n"
     ]
    }
   ],
   "source": [
    "# Parse Weather data into H2O\n",
    "f_weather = h2o.import_file(\"../data/chicagoAllWeather.csv\")\n",
    "f_weather = f_weather[1:]\n",
    "print(f_weather.dim)\n",
    "#f_weather.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th style=\"text-align: right;\">  month</th><th style=\"text-align: right;\">  day</th><th style=\"text-align: right;\">  year</th><th style=\"text-align: right;\">  maxTemp</th><th style=\"text-align: right;\">  meanTemp</th><th style=\"text-align: right;\">  minTemp</th></tr>\n",
       "<tr><td style=\"text-align: right;\">      6</td><td style=\"text-align: right;\">   19</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   23</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   24</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   25</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   26</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   27</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   28</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   29</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      9</td><td style=\"text-align: right;\">   30</td><td style=\"text-align: right;\">  2008</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      3</td><td style=\"text-align: right;\">    4</td><td style=\"text-align: right;\">  2009</td><td style=\"text-align: right;\">      nan</td><td style=\"text-align: right;\">       nan</td><td style=\"text-align: right;\">      nan</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at all the null entires in the Weather table\n",
    "f_weather[f_weather[\"meanTemp\"].isna()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "H2OContext: ip=172.16.2.98, port=54329\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th style=\"text-align: right;\">  month</th><th style=\"text-align: right;\">  day</th><th style=\"text-align: right;\">  year</th><th style=\"text-align: right;\">  maxTemp</th><th style=\"text-align: right;\">  meanTemp</th><th style=\"text-align: right;\">  minTemp</th></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    1</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       23</td><td style=\"text-align: right;\">        14</td><td style=\"text-align: right;\">        6</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    2</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       18</td><td style=\"text-align: right;\">        12</td><td style=\"text-align: right;\">        6</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    3</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       28</td><td style=\"text-align: right;\">        18</td><td style=\"text-align: right;\">        8</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    4</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       30</td><td style=\"text-align: right;\">        24</td><td style=\"text-align: right;\">       19</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    5</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       36</td><td style=\"text-align: right;\">        30</td><td style=\"text-align: right;\">       21</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    6</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       33</td><td style=\"text-align: right;\">        26</td><td style=\"text-align: right;\">       19</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    7</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       34</td><td style=\"text-align: right;\">        28</td><td style=\"text-align: right;\">       21</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    8</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       26</td><td style=\"text-align: right;\">        20</td><td style=\"text-align: right;\">       14</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">    9</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       23</td><td style=\"text-align: right;\">        16</td><td style=\"text-align: right;\">       10</td></tr>\n",
       "<tr><td style=\"text-align: right;\">      1</td><td style=\"text-align: right;\">   10</td><td style=\"text-align: right;\">  2001</td><td style=\"text-align: right;\">       34</td><td style=\"text-align: right;\">        26</td><td style=\"text-align: right;\">       19</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at the help on as_h2o_frame \n",
    "hc.as_data_frame?\n",
    "f_weather"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Copy data frames to Spark from H2O\n",
    "df_weather = hc.as_spark_frame(f_weather,)\n",
    "df_census = hc.as_spark_frame(f_census)\n",
    "df_crimes = hc.as_spark_frame(f_crimes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-----+---+----+-------+--------+-------+\n",
      "|month|day|year|maxTemp|meanTemp|minTemp|\n",
      "+-----+---+----+-------+--------+-------+\n",
      "|    1|  1|2001|     23|      14|      6|\n",
      "|    1|  2|2001|     18|      12|      6|\n",
      "+-----+---+----+-------+--------+-------+\n",
      "only showing top 2 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Look at the weather data as parsed in Spark\n",
    "df_weather.show(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Join columns from Crime, Census and Weather DataFrames in Spark\n",
    "\n",
    "## Register DataFrames as tables in SQL context\n",
    "sqlContext.registerDataFrameAsTable(df_weather, \"chicagoWeather\")\n",
    "sqlContext.registerDataFrameAsTable(df_census, \"chicagoCensus\")\n",
    "sqlContext.registerDataFrameAsTable(df_crimes, \"chicagoCrime\")\n",
    "\n",
    "\n",
    "crimeWithWeather = sqlContext.sql(\"\"\"SELECT\n",
    "a.Year, a.Month, a.Day, a.WeekNum, a.HourOfDay, a.Weekend, a.Season, a.WeekDay,\n",
    "a.IUCR, a.Primary_Type, a.Location_Description, a.Community_Area, a.District,\n",
    "a.Arrest, a.Domestic, a.Beat, a.Ward, a.FBI_Code,\n",
    "b.minTemp, b.maxTemp, b.meanTemp,\n",
    "c.PERCENT_AGED_UNDER_18_OR_OVER_64, c.PER_CAPITA_INCOME, c.HARDSHIP_INDEX,\n",
    "c.PERCENT_OF_HOUSING_CROWDED, c.PERCENT_HOUSEHOLDS_BELOW_POVERTY,\n",
    "c.PERCENT_AGED_16__UNEMPLOYED, c.PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA\n",
    "FROM chicagoCrime a\n",
    "JOIN chicagoWeather b\n",
    "ON a.Year = b.year AND a.Month = b.month AND a.Day = b.day\n",
    "JOIN chicagoCensus c\n",
    "ON a.Community_Area = c.Community_Area_Number\"\"\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+-----+---+-------+---------+-------+------+-------+----+-----------------+--------------------+--------------+--------+------+--------+----+----+--------+-------+-------+--------+--------------------------------+-----------------+--------------+--------------------------+--------------------------------+---------------------------+--------------------------------------------+\n",
      "|Year|Month|Day|WeekNum|HourOfDay|Weekend|Season|WeekDay|IUCR|     Primary_Type|Location_Description|Community_Area|District|Arrest|Domestic|Beat|Ward|FBI_Code|minTemp|maxTemp|meanTemp|PERCENT_AGED_UNDER_18_OR_OVER_64|PER_CAPITA_INCOME|HARDSHIP_INDEX|PERCENT_OF_HOUSING_CROWDED|PERCENT_HOUSEHOLDS_BELOW_POVERTY|PERCENT_AGED_16__UNEMPLOYED|PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA|\n",
      "+----+-----+---+-------+---------+-------+------+-------+----+-----------------+--------------------+--------------+--------+------+--------+----+----+--------+-------+-------+--------+--------------------------------+-----------------+--------------+--------------------------+--------------------------------+---------------------------+--------------------------------------------+\n",
      "|2015|    1| 23|      4|       22|      0|Winter|    Fri|143A|WEAPONS VIOLATION|               ALLEY|            31|      12|  true|   false|1234|  25|      15|     29|     31|      30|                            32.6|            16444|            76|         9.600000000000001|                            25.8|                       15.8|                                        40.7|\n",
      "|2015|    1| 23|      4|       19|      0|Winter|    Fri|4625|    OTHER OFFENSE|            SIDEWALK|            31|      10|  true|   false|1034|  25|      26|     29|     31|      30|                            32.6|            16444|            76|         9.600000000000001|                            25.8|                       15.8|                                        40.7|\n",
      "+----+-----+---+-------+---------+-------+------+-------+----+-----------------+--------------------+--------------+--------+------+--------+----+----+--------+-------+-------+--------+--------------------------------+-----------------+--------------+--------------------------+--------------------------------+---------------------------+--------------------------------------------+\n",
      "only showing top 2 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Print the crimeWithWeather data table from Spark\n",
    "crimeWithWeather.show(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "H2OContext: ip=172.16.2.98, port=54329\n"
     ]
    }
   ],
   "source": [
    "#Copy table from Spark to H2O\n",
    "hc.as_h2o_frame?\n",
    "crimeWithWeatherHF = hc.as_h2o_frame(crimeWithWeather,framename=\"crimeWithWeather\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>       </th><th>Year  </th><th>Month         </th><th>Day          </th><th>WeekNum       </th><th>HourOfDay    </th><th>Weekend       </th><th>Season  </th><th>WeekDay  </th><th>IUCR  </th><th>Primary_Type              </th><th>Location_Description          </th><th>Community_Area  </th><th>District     </th><th>Arrest  </th><th>Domestic  </th><th>Beat         </th><th>Ward         </th><th>FBI_Code  </th><th>minTemp      </th><th>maxTemp      </th><th>meanTemp     </th><th>PERCENT_AGED_UNDER_18_OR_OVER_64  </th><th>PER_CAPITA_INCOME  </th><th>HARDSHIP_INDEX  </th><th>PERCENT_OF_HOUSING_CROWDED  </th><th>PERCENT_HOUSEHOLDS_BELOW_POVERTY  </th><th>PERCENT_AGED_16__UNEMPLOYED  </th><th>PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA  </th></tr>\n",
       "<tr><td>type   </td><td>int   </td><td>int           </td><td>int          </td><td>int           </td><td>int          </td><td>int           </td><td>string  </td><td>string   </td><td>string</td><td>string                    </td><td>string                        </td><td>int             </td><td>int          </td><td>string  </td><td>string    </td><td>int          </td><td>int          </td><td>string    </td><td>int          </td><td>int          </td><td>int          </td><td>real                              </td><td>int                </td><td>int             </td><td>real                        </td><td>real                              </td><td>real                         </td><td>real                                          </td></tr>\n",
       "<tr><td>mins   </td><td>2015.0</td><td>1.0           </td><td>1.0          </td><td>4.0           </td><td>0.0          </td><td>0.0           </td><td>NaN     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>1.0             </td><td>1.0          </td><td>NaN     </td><td>NaN       </td><td>111.0        </td><td>1.0          </td><td>NaN       </td><td>-2.0         </td><td>15.0         </td><td>7.0          </td><td>13.5                              </td><td>8201.0             </td><td>1.0             </td><td>0.3                         </td><td>3.3                               </td><td>4.7                          </td><td>2.5                                           </td></tr>\n",
       "<tr><td>mean   </td><td>2015.0</td><td>1.41944194419 </td><td>17.6839683968</td><td>5.18081808181 </td><td>13.6319631963</td><td>0.159115911591</td><td>NaN     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>37.4476447645   </td><td>11.3489885128</td><td>NaN     </td><td>NaN       </td><td>1159.61806181</td><td>22.9540954095</td><td>NaN       </td><td>17.699669967 </td><td>31.7199719972</td><td>24.9408940894</td><td>35.0596759676                     </td><td>25221.3057306      </td><td>54.4786478648   </td><td>5.43707370737               </td><td>24.600750075                      </td><td>16.8288328833                </td><td>21.096639664                                  </td></tr>\n",
       "<tr><td>maxs   </td><td>2015.0</td><td>2.0           </td><td>31.0         </td><td>6.0           </td><td>23.0         </td><td>1.0           </td><td>NaN     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>77.0            </td><td>25.0         </td><td>NaN     </td><td>NaN       </td><td>2535.0       </td><td>50.0         </td><td>NaN       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>51.5                              </td><td>88669.0            </td><td>98.0            </td><td>15.8                        </td><td>56.5                              </td><td>35.9                         </td><td>54.8                                          </td></tr>\n",
       "<tr><td>sigma  </td><td>0.0   </td><td>0.493492406787</td><td>11.1801043358</td><td>0.738929830409</td><td>6.47321735807</td><td>0.365802434041</td><td>NaN     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>21.2748762223   </td><td>6.94547493301</td><td>NaN     </td><td>NaN       </td><td>695.76029875 </td><td>13.6495661144</td><td>NaN       </td><td>8.96118136438</td><td>6.93809913472</td><td>7.46302527062</td><td>7.95653388237                     </td><td>18010.0446225      </td><td>29.3247456472   </td><td>3.75289588494               </td><td>10.1450570661                     </td><td>7.58926327988                </td><td>11.3868817911                                 </td></tr>\n",
       "<tr><td>zeros  </td><td>0     </td><td>0             </td><td>0            </td><td>0             </td><td>374          </td><td>8408          </td><td>0       </td><td>0        </td><td>0     </td><td>0                         </td><td>0                             </td><td>0               </td><td>0            </td><td>0       </td><td>0         </td><td>0            </td><td>0            </td><td>0         </td><td>0            </td><td>0            </td><td>0            </td><td>0                                 </td><td>0                  </td><td>0               </td><td>0                           </td><td>0                                 </td><td>0                            </td><td>0                                             </td></tr>\n",
       "<tr><td>missing</td><td>0     </td><td>0             </td><td>0            </td><td>0             </td><td>0            </td><td>0             </td><td>0       </td><td>0        </td><td>0     </td><td>0                         </td><td>6                             </td><td>0               </td><td>162          </td><td>0       </td><td>0         </td><td>0            </td><td>0            </td><td>0         </td><td>0            </td><td>0            </td><td>0            </td><td>0                                 </td><td>0                  </td><td>0               </td><td>0                           </td><td>0                                 </td><td>0                            </td><td>0                                             </td></tr>\n",
       "<tr><td>0      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>22.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>2820  </td><td>OTHER OFFENSE             </td><td>APARTMENT                     </td><td>31.0            </td><td>10.0         </td><td>false   </td><td>false     </td><td>1034.0       </td><td>25.0         </td><td>26        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>1      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>21.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1310  </td><td>CRIMINAL DAMAGE           </td><td>RESTAURANT                    </td><td>31.0            </td><td>12.0         </td><td>true    </td><td>false     </td><td>1233.0       </td><td>25.0         </td><td>14        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>2      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>18.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1750  </td><td>OFFENSE INVOLVING CHILDREN</td><td>RESIDENCE                     </td><td>31.0            </td><td>12.0         </td><td>false   </td><td>true      </td><td>1235.0       </td><td>25.0         </td><td>20        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>3      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>18.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0460  </td><td>BATTERY                   </td><td>OTHER                         </td><td>31.0            </td><td>10.0         </td><td>false   </td><td>false     </td><td>1023.0       </td><td>25.0         </td><td>08B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>4      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>13.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0890  </td><td>THEFT                     </td><td>CURRENCY EXCHANGE             </td><td>31.0            </td><td>10.0         </td><td>false   </td><td>false     </td><td>1023.0       </td><td>25.0         </td><td>06        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>5      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>9.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0560  </td><td>ASSAULT                   </td><td>OTHER                         </td><td>31.0            </td><td>12.0         </td><td>false   </td><td>false     </td><td>1234.0       </td><td>25.0         </td><td>08A       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>6      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>8.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0486  </td><td>BATTERY                   </td><td>RESIDENCE                     </td><td>31.0            </td><td>12.0         </td><td>true    </td><td>true      </td><td>1235.0       </td><td>25.0         </td><td>08B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>7      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>1.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0420  </td><td>BATTERY                   </td><td>SIDEWALK                      </td><td>31.0            </td><td>10.0         </td><td>false   </td><td>false     </td><td>1034.0       </td><td>25.0         </td><td>04B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>8      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>0.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1320  </td><td>CRIMINAL DAMAGE           </td><td>PARKING LOT/GARAGE(NON.RESID.)</td><td>31.0            </td><td>9.0          </td><td>false   </td><td>false     </td><td>912.0        </td><td>11.0         </td><td>14        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>9      </td><td>2015.0</td><td>1.0           </td><td>31.0         </td><td>5.0           </td><td>23.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0820  </td><td>THEFT                     </td><td>SIDEWALK                      </td><td>31.0            </td><td>12.0         </td><td>false   </td><td>false     </td><td>1234.0       </td><td>25.0         </td><td>06        </td><td>19.0         </td><td>36.0         </td><td>28.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "crimeWithWeatherHF.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Assign column types to the CrimeWeatherHF data table in H2O\n",
    "crimeWithWeatherHF[\"Season\"]= crimeWithWeatherHF[\"Season\"].asfactor()\n",
    "crimeWithWeatherHF[\"WeekDay\"]= crimeWithWeatherHF[\"WeekDay\"].asfactor()\n",
    "crimeWithWeatherHF[\"IUCR\"]= crimeWithWeatherHF[\"IUCR\"].asfactor()\n",
    "crimeWithWeatherHF[\"Primary_Type\"]= crimeWithWeatherHF[\"Primary_Type\"].asfactor()\n",
    "crimeWithWeatherHF[\"Location_Description\"]= crimeWithWeatherHF[\"Location_Description\"].asfactor()\n",
    "crimeWithWeatherHF[\"Arrest\"]= crimeWithWeatherHF[\"Arrest\"].asfactor()\n",
    "crimeWithWeatherHF[\"Domestic\"]= crimeWithWeatherHF[\"Domestic\"].asfactor()\n",
    "crimeWithWeatherHF[\"FBI_Code\"]= crimeWithWeatherHF[\"FBI_Code\"].asfactor()\n",
    "crimeWithWeatherHF[\"Season\"]= crimeWithWeatherHF[\"Season\"].asfactor()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>       </th><th>Year  </th><th>Month         </th><th>Day          </th><th>WeekNum       </th><th>HourOfDay    </th><th>Weekend       </th><th>Season  </th><th>WeekDay  </th><th>IUCR  </th><th>Primary_Type              </th><th>Location_Description          </th><th>Community_Area  </th><th>District     </th><th>Arrest        </th><th>Domestic      </th><th>Beat         </th><th>Ward         </th><th>FBI_Code  </th><th>minTemp      </th><th>maxTemp      </th><th>meanTemp     </th><th>PERCENT_AGED_UNDER_18_OR_OVER_64  </th><th>PER_CAPITA_INCOME  </th><th>HARDSHIP_INDEX  </th><th>PERCENT_OF_HOUSING_CROWDED  </th><th>PERCENT_HOUSEHOLDS_BELOW_POVERTY  </th><th>PERCENT_AGED_16__UNEMPLOYED  </th><th>PERCENT_AGED_25__WITHOUT_HIGH_SCHOOL_DIPLOMA  </th></tr>\n",
       "<tr><td>type   </td><td>int   </td><td>int           </td><td>int          </td><td>int           </td><td>int          </td><td>int           </td><td>enum    </td><td>enum     </td><td>enum  </td><td>enum                      </td><td>enum                          </td><td>int             </td><td>int          </td><td>enum          </td><td>enum          </td><td>int          </td><td>int          </td><td>enum      </td><td>int          </td><td>int          </td><td>int          </td><td>real                              </td><td>int                </td><td>int             </td><td>real                        </td><td>real                              </td><td>real                         </td><td>real                                          </td></tr>\n",
       "<tr><td>mins   </td><td>2015.0</td><td>1.0           </td><td>1.0          </td><td>4.0           </td><td>0.0          </td><td>0.0           </td><td>0.0     </td><td>0.0      </td><td>0.0   </td><td>0.0                       </td><td>0.0                           </td><td>1.0             </td><td>1.0          </td><td>0.0           </td><td>0.0           </td><td>111.0        </td><td>1.0          </td><td>0.0       </td><td>-2.0         </td><td>15.0         </td><td>7.0          </td><td>13.5                              </td><td>8201.0             </td><td>1.0             </td><td>0.3                         </td><td>3.3                               </td><td>4.7                          </td><td>2.5                                           </td></tr>\n",
       "<tr><td>mean   </td><td>2015.0</td><td>1.41944194419 </td><td>17.6839683968</td><td>5.18081808181 </td><td>13.6319631963</td><td>0.159115911591</td><td>0.0     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>37.4476447645   </td><td>11.3489885128</td><td>0.292829282928</td><td>0.152315231523</td><td>1159.61806181</td><td>22.9540954095</td><td>NaN       </td><td>17.699669967 </td><td>31.7199719972</td><td>24.9408940894</td><td>35.0596759676                     </td><td>25221.3057306      </td><td>54.4786478648   </td><td>5.43707370737               </td><td>24.600750075                      </td><td>16.8288328833                </td><td>21.096639664                                  </td></tr>\n",
       "<tr><td>maxs   </td><td>2015.0</td><td>2.0           </td><td>31.0         </td><td>6.0           </td><td>23.0         </td><td>1.0           </td><td>0.0     </td><td>6.0      </td><td>212.0 </td><td>26.0                      </td><td>90.0                          </td><td>77.0            </td><td>25.0         </td><td>1.0           </td><td>1.0           </td><td>2535.0       </td><td>50.0         </td><td>24.0      </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>51.5                              </td><td>88669.0            </td><td>98.0            </td><td>15.8                        </td><td>56.5                              </td><td>35.9                         </td><td>54.8                                          </td></tr>\n",
       "<tr><td>sigma  </td><td>0.0   </td><td>0.493492406787</td><td>11.1801043358</td><td>0.738929830409</td><td>6.47321735807</td><td>0.365802434041</td><td>0.0     </td><td>NaN      </td><td>NaN   </td><td>NaN                       </td><td>NaN                           </td><td>21.2748762223   </td><td>6.94547493301</td><td>0.455083515588</td><td>0.35934414686 </td><td>695.76029875 </td><td>13.6495661144</td><td>NaN       </td><td>8.96118136438</td><td>6.93809913472</td><td>7.46302527062</td><td>7.95653388237                     </td><td>18010.0446225      </td><td>29.3247456472   </td><td>3.75289588494               </td><td>10.1450570661                     </td><td>7.58926327988                </td><td>11.3868817911                                 </td></tr>\n",
       "<tr><td>zeros  </td><td>0     </td><td>0             </td><td>0            </td><td>0             </td><td>374          </td><td>8408          </td><td>9999    </td><td>1942     </td><td>16    </td><td>11                        </td><td>19                            </td><td>0               </td><td>0            </td><td>7071          </td><td>8476          </td><td>0            </td><td>0            </td><td>16        </td><td>0            </td><td>0            </td><td>0            </td><td>0                                 </td><td>0                  </td><td>0               </td><td>0                           </td><td>0                                 </td><td>0                            </td><td>0                                             </td></tr>\n",
       "<tr><td>missing</td><td>0     </td><td>0             </td><td>0            </td><td>0             </td><td>0            </td><td>0             </td><td>0       </td><td>0        </td><td>0     </td><td>0                         </td><td>6                             </td><td>0               </td><td>162          </td><td>0             </td><td>0             </td><td>0            </td><td>0            </td><td>0         </td><td>0            </td><td>0            </td><td>0            </td><td>0                                 </td><td>0                  </td><td>0               </td><td>0                           </td><td>0                                 </td><td>0                            </td><td>0                                             </td></tr>\n",
       "<tr><td>0      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>22.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>2820  </td><td>OTHER OFFENSE             </td><td>APARTMENT                     </td><td>31.0            </td><td>10.0         </td><td>false         </td><td>false         </td><td>1034.0       </td><td>25.0         </td><td>26        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>1      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>21.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1310  </td><td>CRIMINAL DAMAGE           </td><td>RESTAURANT                    </td><td>31.0            </td><td>12.0         </td><td>true          </td><td>false         </td><td>1233.0       </td><td>25.0         </td><td>14        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>2      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>18.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1750  </td><td>OFFENSE INVOLVING CHILDREN</td><td>RESIDENCE                     </td><td>31.0            </td><td>12.0         </td><td>false         </td><td>true          </td><td>1235.0       </td><td>25.0         </td><td>20        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>3      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>18.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0460  </td><td>BATTERY                   </td><td>OTHER                         </td><td>31.0            </td><td>10.0         </td><td>false         </td><td>false         </td><td>1023.0       </td><td>25.0         </td><td>08B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>4      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>13.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0890  </td><td>THEFT                     </td><td>CURRENCY EXCHANGE             </td><td>31.0            </td><td>10.0         </td><td>false         </td><td>false         </td><td>1023.0       </td><td>25.0         </td><td>06        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>5      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>9.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0560  </td><td>ASSAULT                   </td><td>OTHER                         </td><td>31.0            </td><td>12.0         </td><td>false         </td><td>false         </td><td>1234.0       </td><td>25.0         </td><td>08A       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>6      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>8.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0486  </td><td>BATTERY                   </td><td>RESIDENCE                     </td><td>31.0            </td><td>12.0         </td><td>true          </td><td>true          </td><td>1235.0       </td><td>25.0         </td><td>08B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>7      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>1.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0420  </td><td>BATTERY                   </td><td>SIDEWALK                      </td><td>31.0            </td><td>10.0         </td><td>false         </td><td>false         </td><td>1034.0       </td><td>25.0         </td><td>04B       </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>8      </td><td>2015.0</td><td>1.0           </td><td>24.0         </td><td>4.0           </td><td>0.0          </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>1320  </td><td>CRIMINAL DAMAGE           </td><td>PARKING LOT/GARAGE(NON.RESID.)</td><td>31.0            </td><td>9.0          </td><td>false         </td><td>false         </td><td>912.0        </td><td>11.0         </td><td>14        </td><td>29.0         </td><td>43.0         </td><td>36.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "<tr><td>9      </td><td>2015.0</td><td>1.0           </td><td>31.0         </td><td>5.0           </td><td>23.0         </td><td>0.0           </td><td>Winter  </td><td>Sat      </td><td>0820  </td><td>THEFT                     </td><td>SIDEWALK                      </td><td>31.0            </td><td>12.0         </td><td>false         </td><td>false         </td><td>1234.0       </td><td>25.0         </td><td>06        </td><td>19.0         </td><td>36.0         </td><td>28.0         </td><td>32.6                              </td><td>16444.0            </td><td>76.0            </td><td>9.6                         </td><td>25.8                              </td><td>15.8                         </td><td>40.7                                          </td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "crimeWithWeatherHF.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Split final H2O data table into train test and validation sets\n",
    "ratios = [0.6,0.2]\n",
    "frs = crimeWithWeatherHF.split_frame(ratios,seed=12345)\n",
    "train = frs[0]\n",
    "train.frame_id = \"Train\"\n",
    "valid = frs[2]\n",
    "valid.frame_id = \"Validation\"\n",
    "test = frs[1]\n",
    "test.frame_id = \"Test\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Import Model Builders from H2O Python\n",
    "from h2o.estimators.gbm import H2OGradientBoostingEstimator\n",
    "from h2o.estimators.deeplearning import H2ODeepLearningEstimator"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Inspect the avialble gbm parameters\n",
    "H2OGradientBoostingEstimator?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Define Preditors\n",
    "predictors = crimeWithWeatherHF.names[:]\n",
    "response = \"Arrest\"\n",
    "predictors.remove(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "gbm Model Build Progress: [##################################################] 100%\n",
      "\n",
      "deeplearning Model Build Progress: [##################################################] 100%\n"
     ]
    }
   ],
   "source": [
    "#Simple GBM model - Predict Arrest\n",
    "model_gbm = H2OGradientBoostingEstimator(ntrees         =50,\n",
    "                                        max_depth      =6,\n",
    "                                        learn_rate     =0.1, \n",
    "                                        #nfolds         =2,\n",
    "                                        distribution   =\"bernoulli\")\n",
    "\n",
    "model_gbm.train(x               =predictors,\n",
    "               y               =\"Arrest\",\n",
    "               training_frame  =train,\n",
    "               validation_frame=valid\n",
    "               )\n",
    "\n",
    "# Simple Deep Learning - Predict Arrest\n",
    "model_dl = H2ODeepLearningEstimator(variable_importances=True,\n",
    "                                   loss                =\"Automatic\")\n",
    "\n",
    "model_dl.train(x                =predictors,\n",
    "              y                =\"Arrest\",\n",
    "              training_frame  =train,\n",
    "              validation_frame=valid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.335827722991:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>false</b></td>\n",
       "<td><b>true</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>false</td>\n",
       "<td>4125.0</td>\n",
       "<td>142.0</td>\n",
       "<td>0.0333</td>\n",
       "<td> (142.0/4267.0)</td></tr>\n",
       "<tr><td>true</td>\n",
       "<td>251.0</td>\n",
       "<td>1504.0</td>\n",
       "<td>0.143</td>\n",
       "<td> (251.0/1755.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>4376.0</td>\n",
       "<td>1646.0</td>\n",
       "<td>0.0653</td>\n",
       "<td> (393.0/6022.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       false    true    Error    Rate\n",
       "-----  -------  ------  -------  --------------\n",
       "false  4125     142     0.0333   (142.0/4267.0)\n",
       "true   251      1504    0.143    (251.0/1755.0)\n",
       "Total  4376     1646    0.0653   (393.0/6022.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.432844055866:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>false</b></td>\n",
       "<td><b>true</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>false</td>\n",
       "<td>1362.0</td>\n",
       "<td>61.0</td>\n",
       "<td>0.0429</td>\n",
       "<td> (61.0/1423.0)</td></tr>\n",
       "<tr><td>true</td>\n",
       "<td>150.0</td>\n",
       "<td>443.0</td>\n",
       "<td>0.253</td>\n",
       "<td> (150.0/593.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>1512.0</td>\n",
       "<td>504.0</td>\n",
       "<td>0.1047</td>\n",
       "<td> (211.0/2016.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       false    true    Error    Rate\n",
       "-----  -------  ------  -------  --------------\n",
       "false  1362     61      0.0429   (61.0/1423.0)\n",
       "true   150      443     0.253    (150.0/593.0)\n",
       "Total  1512     504     0.1047   (211.0/2016.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "# Print confusion matrices for the train and validation set\n",
    "print(model_gbm.confusion_matrix(train = True))\n",
    "print(model_gbm.confusion_matrix(valid = True))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.974667176776\n",
      "0.92596751276\n"
     ]
    }
   ],
   "source": [
    "print(model_gbm.auc(train=True))\n",
    "print(model_gbm.auc(valid=True))\n",
    "model_gbm.plot(metric=\"AUC\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>variable</th>\n",
       "      <th>relative_importance</th>\n",
       "      <th>scaled_importance</th>\n",
       "      <th>percentage</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>IUCR</td>\n",
       "      <td>4280.939453</td>\n",
       "      <td>1.000000e+00</td>\n",
       "      <td>8.234218e-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Location_Description</td>\n",
       "      <td>487.323059</td>\n",
       "      <td>1.138355e-01</td>\n",
       "      <td>9.373466e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>WeekDay</td>\n",
       "      <td>55.790558</td>\n",
       "      <td>1.303232e-02</td>\n",
       "      <td>1.073109e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>HourOfDay</td>\n",
       "      <td>55.419220</td>\n",
       "      <td>1.294557e-02</td>\n",
       "      <td>1.065967e-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>PERCENT_AGED_16__UNEMPLOYED</td>\n",
       "      <td>34.422894</td>\n",
       "      <td>8.040967e-03</td>\n",
       "      <td>6.621107e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Beat</td>\n",
       "      <td>31.468222</td>\n",
       "      <td>7.350775e-03</td>\n",
       "      <td>6.052788e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>PERCENT_HOUSEHOLDS_BELOW_POVERTY</td>\n",
       "      <td>29.103352</td>\n",
       "      <td>6.798356e-03</td>\n",
       "      <td>5.597915e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>PER_CAPITA_INCOME</td>\n",
       "      <td>26.233143</td>\n",
       "      <td>6.127894e-03</td>\n",
       "      <td>5.045841e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>PERCENT_AGED_UNDER_18_OR_OVER_64</td>\n",
       "      <td>24.077402</td>\n",
       "      <td>5.624327e-03</td>\n",
       "      <td>4.631193e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Day</td>\n",
       "      <td>23.472567</td>\n",
       "      <td>5.483041e-03</td>\n",
       "      <td>4.514855e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>maxTemp</td>\n",
       "      <td>11.300793</td>\n",
       "      <td>2.639793e-03</td>\n",
       "      <td>2.173663e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Community_Area</td>\n",
       "      <td>10.252146</td>\n",
       "      <td>2.394835e-03</td>\n",
       "      <td>1.971960e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>HARDSHIP_INDEX</td>\n",
       "      <td>10.116072</td>\n",
       "      <td>2.363049e-03</td>\n",
       "      <td>1.945786e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Domestic</td>\n",
       "      <td>9.294327</td>\n",
       "      <td>2.171095e-03</td>\n",
       "      <td>1.787727e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>District</td>\n",
       "      <td>8.304654</td>\n",
       "      <td>1.939914e-03</td>\n",
       "      <td>1.597367e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>minTemp</td>\n",
       "      <td>6.243027</td>\n",
       "      <td>1.458331e-03</td>\n",
       "      <td>1.200822e-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>WeekNum</td>\n",
       "      <td>4.230102</td>\n",
       "      <td>9.881246e-04</td>\n",
       "      <td>8.136433e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>FBI_Code</td>\n",
       "      <td>2.363182</td>\n",
       "      <td>5.520241e-04</td>\n",
       "      <td>4.545486e-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>Month</td>\n",
       "      <td>0.000018</td>\n",
       "      <td>4.187325e-09</td>\n",
       "      <td>3.447935e-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>Weekend</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>25 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                            variable  relative_importance  scaled_importance  \\\n",
       "0                               IUCR          4280.939453       1.000000e+00   \n",
       "1               Location_Description           487.323059       1.138355e-01   \n",
       "2                            WeekDay            55.790558       1.303232e-02   \n",
       "3                          HourOfDay            55.419220       1.294557e-02   \n",
       "4        PERCENT_AGED_16__UNEMPLOYED            34.422894       8.040967e-03   \n",
       "5                               Beat            31.468222       7.350775e-03   \n",
       "6   PERCENT_HOUSEHOLDS_BELOW_POVERTY            29.103352       6.798356e-03   \n",
       "7                  PER_CAPITA_INCOME            26.233143       6.127894e-03   \n",
       "8   PERCENT_AGED_UNDER_18_OR_OVER_64            24.077402       5.624327e-03   \n",
       "9                                Day            23.472567       5.483041e-03   \n",
       "..                               ...                  ...                ...   \n",
       "15                           maxTemp            11.300793       2.639793e-03   \n",
       "16                    Community_Area            10.252146       2.394835e-03   \n",
       "17                    HARDSHIP_INDEX            10.116072       2.363049e-03   \n",
       "18                          Domestic             9.294327       2.171095e-03   \n",
       "19                          District             8.304654       1.939914e-03   \n",
       "20                           minTemp             6.243027       1.458331e-03   \n",
       "21                           WeekNum             4.230102       9.881246e-04   \n",
       "22                          FBI_Code             2.363182       5.520241e-04   \n",
       "23                             Month             0.000018       4.187325e-09   \n",
       "24                           Weekend             0.000000       0.000000e+00   \n",
       "\n",
       "      percentage  \n",
       "0   8.234218e-01  \n",
       "1   9.373466e-02  \n",
       "2   1.073109e-02  \n",
       "3   1.065967e-02  \n",
       "4   6.621107e-03  \n",
       "5   6.052788e-03  \n",
       "6   5.597915e-03  \n",
       "7   5.045841e-03  \n",
       "8   4.631193e-03  \n",
       "9   4.514855e-03  \n",
       "..           ...  \n",
       "15  2.173663e-03  \n",
       "16  1.971960e-03  \n",
       "17  1.945786e-03  \n",
       "18  1.787727e-03  \n",
       "19  1.597367e-03  \n",
       "20  1.200822e-03  \n",
       "21  8.136433e-04  \n",
       "22  4.545486e-04  \n",
       "23  3.447935e-09  \n",
       "24  0.000000e+00  \n",
       "\n",
       "[25 rows x 4 columns]"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#Print variable importance\n",
    "model_gbm.varimp(True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model Details\n",
      "=============\n",
      "H2ODeepLearningEstimator :  Deep Learning\n",
      "Model Key:  DeepLearning_model_python_1446861372065_4\n",
      "\n",
      "Status of Neuron Layers: predicting Arrest, 2-class classification, bernoulli distribution, CrossEntropy loss, 118,802 weights/biases, 1.4 MB, 72,478 training samples, mini-batch size 1\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>layer</b></td>\n",
       "<td><b>units</b></td>\n",
       "<td><b>type</b></td>\n",
       "<td><b>dropout</b></td>\n",
       "<td><b>l1</b></td>\n",
       "<td><b>l2</b></td>\n",
       "<td><b>mean_rate</b></td>\n",
       "<td><b>rate_RMS</b></td>\n",
       "<td><b>momentum</b></td>\n",
       "<td><b>mean_weight</b></td>\n",
       "<td><b>weight_RMS</b></td>\n",
       "<td><b>mean_bias</b></td>\n",
       "<td><b>bias_RMS</b></td></tr>\n",
       "<tr><td></td>\n",
       "<td>1</td>\n",
       "<td>390</td>\n",
       "<td>Input</td>\n",
       "<td>0.0</td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td>\n",
       "<td></td></tr>\n",
       "<tr><td></td>\n",
       "<td>2</td>\n",
       "<td>200</td>\n",
       "<td>Rectifier</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.3</td>\n",
       "<td>0.0</td>\n",
       "<td>-0.0</td>\n",
       "<td>0.1</td>\n",
       "<td>-0.0</td>\n",
       "<td>0.1</td></tr>\n",
       "<tr><td></td>\n",
       "<td>3</td>\n",
       "<td>200</td>\n",
       "<td>Rectifier</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.2</td>\n",
       "<td>0.0</td>\n",
       "<td>-0.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.8</td>\n",
       "<td>0.2</td></tr>\n",
       "<tr><td></td>\n",
       "<td>4</td>\n",
       "<td>2</td>\n",
       "<td>Softmax</td>\n",
       "<td></td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.4</td>\n",
       "<td>-0.0</td>\n",
       "<td>0.0</td></tr></table></div>"
      ],
      "text/plain": [
       "    layer    units    type       dropout    l1    l2    mean_rate         rate_RMS          momentum    mean_weight        weight_RMS       mean_bias           bias_RMS\n",
       "--  -------  -------  ---------  ---------  ----  ----  ----------------  ----------------  ----------  -----------------  ---------------  ------------------  ----------------\n",
       "    1        390      Input      0.0\n",
       "    2        200      Rectifier  0.0        0.0   0.0   0.125569459979    0.314496159554    0.0         -0.00145149060662  0.0583122819662  -0.0226196222717    0.106465905905\n",
       "    3        200      Rectifier  0.0        0.0   0.0   0.089552259468    0.167708098888    0.0         -0.0100148269118   0.0716536939144  0.77415192296       0.243278682232\n",
       "    4        2        Softmax               0.0   0.0   0.00265202885173  0.00204257667065  0.0         0.003976688287     0.384519815445   -0.000194062303944  0.00806335359812"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "ModelMetricsBinomial: deeplearning\n",
      "** Reported on train data. **\n",
      "\n",
      "MSE: 0.0737426129728\n",
      "R^2: 0.642891439669\n",
      "LogLoss: 0.242051500943\n",
      "AUC: 0.950131166302\n",
      "Gini: 0.900262332604\n",
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.343997370612:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>false</b></td>\n",
       "<td><b>true</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>false</td>\n",
       "<td>4003.0</td>\n",
       "<td>264.0</td>\n",
       "<td>0.0619</td>\n",
       "<td> (264.0/4267.0)</td></tr>\n",
       "<tr><td>true</td>\n",
       "<td>358.0</td>\n",
       "<td>1397.0</td>\n",
       "<td>0.204</td>\n",
       "<td> (358.0/1755.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>4361.0</td>\n",
       "<td>1661.0</td>\n",
       "<td>0.1033</td>\n",
       "<td> (622.0/6022.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       false    true    Error    Rate\n",
       "-----  -------  ------  -------  --------------\n",
       "false  4003     264     0.0619   (264.0/4267.0)\n",
       "true   358      1397    0.204    (358.0/1755.0)\n",
       "Total  4361     1661    0.1033   (622.0/6022.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Maximum Metrics: Maximum metrics at their respective thresholds\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>metric</b></td>\n",
       "<td><b>threshold</b></td>\n",
       "<td><b>value</b></td>\n",
       "<td><b>idx</b></td></tr>\n",
       "<tr><td>max f1</td>\n",
       "<td>0.3</td>\n",
       "<td>0.8</td>\n",
       "<td>195.0</td></tr>\n",
       "<tr><td>max f2</td>\n",
       "<td>0.2</td>\n",
       "<td>0.9</td>\n",
       "<td>278.0</td></tr>\n",
       "<tr><td>max f0point5</td>\n",
       "<td>0.7</td>\n",
       "<td>0.9</td>\n",
       "<td>86.0</td></tr>\n",
       "<tr><td>max accuracy</td>\n",
       "<td>0.5</td>\n",
       "<td>0.9</td>\n",
       "<td>149.0</td></tr>\n",
       "<tr><td>max precision</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>max absolute_MCC</td>\n",
       "<td>0.3</td>\n",
       "<td>0.7</td>\n",
       "<td>195.0</td></tr>\n",
       "<tr><td>max min_per_class_accuracy</td>\n",
       "<td>0.2</td>\n",
       "<td>0.9</td>\n",
       "<td>247.0</td></tr></table></div>"
      ],
      "text/plain": [
       "metric                      threshold    value     idx\n",
       "--------------------------  -----------  --------  -----\n",
       "max f1                      0.343997     0.817916  195\n",
       "max f2                      0.158173     0.856675  278\n",
       "max f0point5                0.740563     0.878082  86\n",
       "max accuracy                0.48931      0.897874  149\n",
       "max precision               0.999884     1         0\n",
       "max absolute_MCC            0.343997     0.746452  195\n",
       "max min_per_class_accuracy  0.217785     0.876923  247"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ModelMetricsBinomial: deeplearning\n",
      "** Reported on validation data. **\n",
      "\n",
      "MSE: 0.0843305429737\n",
      "R^2: 0.593831388139\n",
      "LogLoss: 0.280203809486\n",
      "AUC: 0.930515181213\n",
      "Gini: 0.861030362427\n",
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.493462351545:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>false</b></td>\n",
       "<td><b>true</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>false</td>\n",
       "<td>1361.0</td>\n",
       "<td>62.0</td>\n",
       "<td>0.0436</td>\n",
       "<td> (62.0/1423.0)</td></tr>\n",
       "<tr><td>true</td>\n",
       "<td>158.0</td>\n",
       "<td>435.0</td>\n",
       "<td>0.2664</td>\n",
       "<td> (158.0/593.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>1519.0</td>\n",
       "<td>497.0</td>\n",
       "<td>0.1091</td>\n",
       "<td> (220.0/2016.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       false    true    Error    Rate\n",
       "-----  -------  ------  -------  --------------\n",
       "false  1361     62      0.0436   (62.0/1423.0)\n",
       "true   158      435     0.2664   (158.0/593.0)\n",
       "Total  1519     497     0.1091   (220.0/2016.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Maximum Metrics: Maximum metrics at their respective thresholds\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>metric</b></td>\n",
       "<td><b>threshold</b></td>\n",
       "<td><b>value</b></td>\n",
       "<td><b>idx</b></td></tr>\n",
       "<tr><td>max f1</td>\n",
       "<td>0.5</td>\n",
       "<td>0.8</td>\n",
       "<td>137.0</td></tr>\n",
       "<tr><td>max f2</td>\n",
       "<td>0.1</td>\n",
       "<td>0.8</td>\n",
       "<td>303.0</td></tr>\n",
       "<tr><td>max f0point5</td>\n",
       "<td>0.7</td>\n",
       "<td>0.9</td>\n",
       "<td>82.0</td></tr>\n",
       "<tr><td>max accuracy</td>\n",
       "<td>0.7</td>\n",
       "<td>0.9</td>\n",
       "<td>91.0</td></tr>\n",
       "<tr><td>max precision</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>max absolute_MCC</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>91.0</td></tr>\n",
       "<tr><td>max min_per_class_accuracy</td>\n",
       "<td>0.2</td>\n",
       "<td>0.8</td>\n",
       "<td>236.0</td></tr></table></div>"
      ],
      "text/plain": [
       "metric                      threshold    value     idx\n",
       "--------------------------  -----------  --------  -----\n",
       "max f1                      0.493462     0.798165  137\n",
       "max f2                      0.0962013    0.830341  303\n",
       "max f0point5                0.740357     0.877695  82\n",
       "max accuracy                0.69578      0.893849  91\n",
       "max precision               0.999877     1         0\n",
       "max absolute_MCC            0.69578      0.738603  91\n",
       "max min_per_class_accuracy  0.207426     0.836425  236"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Scoring History:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>timestamp</b></td>\n",
       "<td><b>duration</b></td>\n",
       "<td><b>training_speed</b></td>\n",
       "<td><b>epochs</b></td>\n",
       "<td><b>samples</b></td>\n",
       "<td><b>training_MSE</b></td>\n",
       "<td><b>training_r2</b></td>\n",
       "<td><b>training_logloss</b></td>\n",
       "<td><b>training_AUC</b></td>\n",
       "<td><b>training_classification_error</b></td>\n",
       "<td><b>validation_MSE</b></td>\n",
       "<td><b>validation_r2</b></td>\n",
       "<td><b>validation_logloss</b></td>\n",
       "<td><b>validation_AUC</b></td>\n",
       "<td><b>validation_classification_error</b></td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-11-06 17:57:05</td>\n",
       "<td> 0.000 sec</td>\n",
       "<td>None</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td>\n",
       "<td>nan</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-11-06 17:57:09</td>\n",
       "<td> 2.899 sec</td>\n",
       "<td>2594 rows/sec</td>\n",
       "<td>1.0</td>\n",
       "<td>6068.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.3</td>\n",
       "<td>0.6</td>\n",
       "<td>0.9</td>\n",
       "<td>0.1</td>\n",
       "<td>0.1</td>\n",
       "<td>0.3</td>\n",
       "<td>0.6</td>\n",
       "<td>0.9</td>\n",
       "<td>0.1</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-11-06 17:57:15</td>\n",
       "<td> 9.096 sec</td>\n",
       "<td>5465 rows/sec</td>\n",
       "<td>7.3</td>\n",
       "<td>43742.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.6</td>\n",
       "<td>0.3</td>\n",
       "<td>0.9</td>\n",
       "<td>0.1</td>\n",
       "<td>0.1</td>\n",
       "<td>0.6</td>\n",
       "<td>0.3</td>\n",
       "<td>0.9</td>\n",
       "<td>0.1</td></tr>\n",
       "<tr><td></td>\n",
       "<td>2015-11-06 17:57:19</td>\n",
       "<td>12.425 sec</td>\n",
       "<td>6571 rows/sec</td>\n",
       "<td>12.0</td>\n",
       "<td>72478.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.6</td>\n",
       "<td>0.2</td>\n",
       "<td>1.0</td>\n",
       "<td>0.1</td>\n",
       "<td>0.1</td>\n",
       "<td>0.6</td>\n",
       "<td>0.3</td>\n",
       "<td>0.9</td>\n",
       "<td>0.1</td></tr></table></div>"
      ],
      "text/plain": [
       "    timestamp            duration    training_speed    epochs    samples    training_MSE    training_r2    training_logloss    training_AUC    training_classification_error    validation_MSE    validation_r2    validation_logloss    validation_AUC    validation_classification_error\n",
       "--  -------------------  ----------  ----------------  --------  ---------  --------------  -------------  ------------------  --------------  -------------------------------  ----------------  ---------------  --------------------  ----------------  ---------------------------------\n",
       "    2015-11-06 17:57:05  0.000 sec                     0         0          nan             nan            nan                 nan             nan                              nan               nan              nan                   nan               nan\n",
       "    2015-11-06 17:57:09  2.899 sec   2594 rows/sec     1.00764   6068       0.146371        0.291179       0.646759            0.872778        0.146795                         0.139313          0.329013         0.616018              0.880032          0.138393\n",
       "    2015-11-06 17:57:15  9.096 sec   5465 rows/sec     7.2637    43742      0.0872842       0.577315       0.288376            0.931927        0.110761                         0.0929823         0.552161         0.302143              0.923719          0.109127\n",
       "    2015-11-06 17:57:19  12.425 sec  6571 rows/sec     12.0355   72478      0.0737426       0.642891       0.242052            0.950131        0.103288                         0.0843305         0.593831         0.280204              0.930515          0.109127"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Variable Importances:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>variable</b></td>\n",
       "<td><b>relative_importance</b></td>\n",
       "<td><b>scaled_importance</b></td>\n",
       "<td><b>percentage</b></td></tr>\n",
       "<tr><td>Domestic.false</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>Primary_Type.NARCOTICS</td>\n",
       "<td>0.9</td>\n",
       "<td>0.9</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>IUCR.0860</td>\n",
       "<td>0.8</td>\n",
       "<td>0.8</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>FBI_Code.18</td>\n",
       "<td>0.8</td>\n",
       "<td>0.8</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>IUCR.4625</td>\n",
       "<td>0.7</td>\n",
       "<td>0.7</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td>\n",
       "<td>---</td></tr>\n",
       "<tr><td>Location_Description.missing(NA)</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>Primary_Type.missing(NA)</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>FBI_Code.missing(NA)</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>WeekDay.missing(NA)</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>Domestic.missing(NA)</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td>\n",
       "<td>0.0</td></tr></table></div>"
      ],
      "text/plain": [
       "variable                          relative_importance    scaled_importance    percentage\n",
       "--------------------------------  ---------------------  -------------------  ----------------\n",
       "Domestic.false                    1.0                    1.0                  0.00423868455641\n",
       "Primary_Type.NARCOTICS            0.868295669556         0.868295669556       0.00368043144494\n",
       "IUCR.0860                         0.80918776989          0.80918776989        0.00342989170347\n",
       "FBI_Code.18                       0.793890833855         0.793890833855       0.00336505281694\n",
       "IUCR.4625                         0.744634568691         0.744634568691       0.00315627104648\n",
       "---                               ---                    ---                  ---\n",
       "Location_Description.missing(NA)  0.0                    0.0                  0.0\n",
       "Primary_Type.missing(NA)          0.0                    0.0                  0.0\n",
       "FBI_Code.missing(NA)              0.0                    0.0                  0.0\n",
       "WeekDay.missing(NA)               0.0                    0.0                  0.0\n",
       "Domestic.missing(NA)              0.0                    0.0                  0.0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Inspect Deep Learning model output\n",
    "model_dl"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>predict  </th><th style=\"text-align: right;\">    false</th><th style=\"text-align: right;\">      true</th></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.946415 </td><td style=\"text-align: right;\">0.0535847 </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.862165 </td><td style=\"text-align: right;\">0.137835  </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.938661 </td><td style=\"text-align: right;\">0.0613392 </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.870186 </td><td style=\"text-align: right;\">0.129814  </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.980488 </td><td style=\"text-align: right;\">0.0195118 </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.972006 </td><td style=\"text-align: right;\">0.0279937 </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.990995 </td><td style=\"text-align: right;\">0.00900489</td></tr>\n",
       "<tr><td>true     </td><td style=\"text-align: right;\">0.0210692</td><td style=\"text-align: right;\">0.978931  </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.693061 </td><td style=\"text-align: right;\">0.306939  </td></tr>\n",
       "<tr><td>false    </td><td style=\"text-align: right;\">0.992097 </td><td style=\"text-align: right;\">0.00790253</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Predict on the test set using the gbm model\n",
    "predictions = model_gbm.predict(test)\n",
    "predictions.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "ModelMetricsBinomial: gbm\n",
      "** Reported on test data. **\n",
      "\n",
      "MSE: 0.0893676876445\n",
      "R^2: 0.57094394422\n",
      "LogLoss: 0.294019576922\n",
      "AUC: 0.922152238508\n",
      "Gini: 0.844304477016\n",
      "\n",
      "Confusion Matrix (Act/Pred) for max f1 @ threshold = 0.365461652105:\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b></b></td>\n",
       "<td><b>false</b></td>\n",
       "<td><b>true</b></td>\n",
       "<td><b>Error</b></td>\n",
       "<td><b>Rate</b></td></tr>\n",
       "<tr><td>false</td>\n",
       "<td>1297.0</td>\n",
       "<td>84.0</td>\n",
       "<td>0.0608</td>\n",
       "<td> (84.0/1381.0)</td></tr>\n",
       "<tr><td>true</td>\n",
       "<td>153.0</td>\n",
       "<td>427.0</td>\n",
       "<td>0.2638</td>\n",
       "<td> (153.0/580.0)</td></tr>\n",
       "<tr><td>Total</td>\n",
       "<td>1450.0</td>\n",
       "<td>511.0</td>\n",
       "<td>0.1209</td>\n",
       "<td> (237.0/1961.0)</td></tr></table></div>"
      ],
      "text/plain": [
       "       false    true    Error    Rate\n",
       "-----  -------  ------  -------  --------------\n",
       "false  1297     84      0.0608   (84.0/1381.0)\n",
       "true   153      427     0.2638   (153.0/580.0)\n",
       "Total  1450     511     0.1209   (237.0/1961.0)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Maximum Metrics: Maximum metrics at their respective thresholds\n",
      "\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div style=\"overflow:auto\"><table style=\"width:50%\"><tr><td><b>metric</b></td>\n",
       "<td><b>threshold</b></td>\n",
       "<td><b>value</b></td>\n",
       "<td><b>idx</b></td></tr>\n",
       "<tr><td>max f1</td>\n",
       "<td>0.4</td>\n",
       "<td>0.8</td>\n",
       "<td>158.0</td></tr>\n",
       "<tr><td>max f2</td>\n",
       "<td>0.1</td>\n",
       "<td>0.8</td>\n",
       "<td>295.0</td></tr>\n",
       "<tr><td>max f0point5</td>\n",
       "<td>0.7</td>\n",
       "<td>0.9</td>\n",
       "<td>97.0</td></tr>\n",
       "<tr><td>max accuracy</td>\n",
       "<td>0.6</td>\n",
       "<td>0.9</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>max precision</td>\n",
       "<td>1.0</td>\n",
       "<td>1.0</td>\n",
       "<td>0.0</td></tr>\n",
       "<tr><td>max absolute_MCC</td>\n",
       "<td>0.6</td>\n",
       "<td>0.7</td>\n",
       "<td>112.0</td></tr>\n",
       "<tr><td>max min_per_class_accuracy</td>\n",
       "<td>0.2</td>\n",
       "<td>0.8</td>\n",
       "<td>235.0</td></tr></table></div>"
      ],
      "text/plain": [
       "metric                      threshold    value     idx\n",
       "--------------------------  -----------  --------  -----\n",
       "max f1                      0.365462     0.782768  158\n",
       "max f2                      0.121697     0.815152  295\n",
       "max f0point5                0.657606     0.855615  97\n",
       "max accuracy                0.57297      0.885773  112\n",
       "max precision               0.991217     1         0\n",
       "max absolute_MCC            0.57297      0.717666  112\n",
       "max min_per_class_accuracy  0.211976     0.821144  235"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at performance on test set (if it includes true lables)\n",
    "test_performance = model_gbm.model_performance(test)\n",
    "test_performance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Plots\n",
    "# Create Plots of Crime type vs Arrest Rate and Proportion of reported Crime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------------+-----------+---------+\n",
      "|          Crime_Type|crime_count|all_count|\n",
      "+--------------------+-----------+---------+\n",
      "|       OTHER OFFENSE|        183|      720|\n",
      "|   WEAPONS VIOLATION|         96|      118|\n",
      "|  DECEPTIVE PRACTICE|         25|      445|\n",
      "|            BURGLARY|         14|      458|\n",
      "|             BATTERY|        432|     1851|\n",
      "|             ROBBERY|         17|      357|\n",
      "| MOTOR VEHICLE THEFT|         17|      414|\n",
      "|        PROSTITUTION|        106|      106|\n",
      "|     CRIMINAL DAMAGE|         76|     1003|\n",
      "|          KIDNAPPING|          1|        7|\n",
      "|            GAMBLING|          3|        3|\n",
      "|LIQUOR LAW VIOLATION|         12|       12|\n",
      "+--------------------+-----------+---------+\n",
      "only showing top 12 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# Create table to report Crimetype, Arrest count per crime, total reported count per Crime  \n",
    "sqlContext.registerDataFrameAsTable(df_crimes, \"df_crimes\")\n",
    "allCrimes = sqlContext.sql(\"\"\"SELECT Primary_Type, count(*) as all_count FROM df_crimes GROUP BY Primary_Type\"\"\")\n",
    "crimesWithArrest = sqlContext.sql(\"SELECT Primary_Type, count(*) as crime_count FROM chicagoCrime WHERE Arrest = 'true' GROUP BY Primary_Type\")\n",
    "\n",
    "sqlContext.registerDataFrameAsTable(crimesWithArrest, \"crimesWithArrest\")\n",
    "sqlContext.registerDataFrameAsTable(allCrimes, \"allCrimes\")\n",
    "\n",
    "crime_type = sqlContext.sql(\"Select a.Primary_Type as Crime_Type, a.crime_count, b.all_count \\\n",
    "FROM crimesWithArrest a \\\n",
    "JOIN allCrimes b \\\n",
    "ON a.Primary_Type = b.Primary_Type \")\n",
    "\n",
    "crime_type.show(12)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#Copy Crime_type table from Spark to H2O\n",
    "crime_typeHF = hc.as_h2o_frame(crime_type,framename=\"crime_type\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Create Additional columns Arrest_rate and Crime_propotion \n",
    "crime_typeHF[\"Arrest_rate\"] = crime_typeHF[\"crime_count\"]/crime_typeHF[\"all_count\"]\n",
    "crime_typeHF[\"Crime_proportion\"] = crime_typeHF[\"all_count\"]/crime_typeHF[\"all_count\"].sum()\n",
    "crime_typeHF[\"Crime_Type\"] = crime_typeHF[\"Crime_Type\"].asfactor()\n",
    "# h2o.assign(crime_typeHF,crime_type)\n",
    "crime_typeHF.frame_id = \"Crime_type\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table>\n",
       "<tr><th>Crime_Type         </th><th style=\"text-align: right;\">  crime_count</th><th style=\"text-align: right;\">  all_count</th><th style=\"text-align: right;\">  Arrest_rate</th><th style=\"text-align: right;\">  Crime_proportion</th></tr>\n",
       "<tr><td>OTHER OFFENSE      </td><td style=\"text-align: right;\">          183</td><td style=\"text-align: right;\">        720</td><td style=\"text-align: right;\">    0.254167 </td><td style=\"text-align: right;\">       0.0721226  </td></tr>\n",
       "<tr><td>WEAPONS VIOLATION  </td><td style=\"text-align: right;\">           96</td><td style=\"text-align: right;\">        118</td><td style=\"text-align: right;\">    0.813559 </td><td style=\"text-align: right;\">       0.0118201  </td></tr>\n",
       "<tr><td>DECEPTIVE PRACTICE </td><td style=\"text-align: right;\">           25</td><td style=\"text-align: right;\">        445</td><td style=\"text-align: right;\">    0.0561798</td><td style=\"text-align: right;\">       0.0445758  </td></tr>\n",
       "<tr><td>BURGLARY           </td><td style=\"text-align: right;\">           14</td><td style=\"text-align: right;\">        458</td><td style=\"text-align: right;\">    0.0305677</td><td style=\"text-align: right;\">       0.045878   </td></tr>\n",
       "<tr><td>BATTERY            </td><td style=\"text-align: right;\">          432</td><td style=\"text-align: right;\">       1851</td><td style=\"text-align: right;\">    0.233387 </td><td style=\"text-align: right;\">       0.185415   </td></tr>\n",
       "<tr><td>ROBBERY            </td><td style=\"text-align: right;\">           17</td><td style=\"text-align: right;\">        357</td><td style=\"text-align: right;\">    0.047619 </td><td style=\"text-align: right;\">       0.0357608  </td></tr>\n",
       "<tr><td>MOTOR VEHICLE THEFT</td><td style=\"text-align: right;\">           17</td><td style=\"text-align: right;\">        414</td><td style=\"text-align: right;\">    0.0410628</td><td style=\"text-align: right;\">       0.0414705  </td></tr>\n",
       "<tr><td>PROSTITUTION       </td><td style=\"text-align: right;\">          106</td><td style=\"text-align: right;\">        106</td><td style=\"text-align: right;\">    1        </td><td style=\"text-align: right;\">       0.0106181  </td></tr>\n",
       "<tr><td>CRIMINAL DAMAGE    </td><td style=\"text-align: right;\">           76</td><td style=\"text-align: right;\">       1003</td><td style=\"text-align: right;\">    0.0757727</td><td style=\"text-align: right;\">       0.100471   </td></tr>\n",
       "<tr><td>KIDNAPPING         </td><td style=\"text-align: right;\">            1</td><td style=\"text-align: right;\">          7</td><td style=\"text-align: right;\">    0.142857 </td><td style=\"text-align: right;\">       0.000701192</td></tr>\n",
       "</table>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "crime_typeHF"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "H2OContext: ip=172.16.2.98, port=54329\n"
     ]
    },
    {
     "data": {
      "text/plain": []
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "hc\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "# Plot the below in Flow \n",
    "plot (g) -> g(\n",
    "  g.rect(\n",
    "    g.position \"Crime_Type\", \"Arrest_rate\"\n",
    "    g.fillColor g.value 'blue'\n",
    "    g.fillOpacity g.value 0.75\n",
    "  )\n",
    "  g.rect(\n",
    "    g.position \"Crime_Type\", \"Crime_proportion\"\n",
    "    g.fillColor g.value 'red'\n",
    "    g.fillOpacity g.value 0.65\n",
    "  )\n",
    "  g.from inspect \"data\", getFrame \"Crime_type\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "#hc.stop()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}