{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "toc": true
   },
   "source": [
    "<h1>Table of Contents<span class=\"tocSkip\"></span></h1>\n",
    "<div class=\"toc\"><ul class=\"toc-item\"><li><span><a href=\"#Spark-Exercise\" data-toc-modified-id=\"Spark-Exercise-1\"><span class=\"toc-item-num\">1&nbsp;&nbsp;</span>Spark Exercise</a></span><ul class=\"toc-item\"><li><span><a href=\"#Question-1\" data-toc-modified-id=\"Question-1-1.1\"><span class=\"toc-item-num\">1.1&nbsp;&nbsp;</span>Question 1</a></span></li><li><span><a href=\"#Question-2\" data-toc-modified-id=\"Question-2-1.2\"><span class=\"toc-item-num\">1.2&nbsp;&nbsp;</span>Question 2</a></span></li><li><span><a href=\"#Question-3\" data-toc-modified-id=\"Question-3-1.3\"><span class=\"toc-item-num\">1.3&nbsp;&nbsp;</span>Question 3</a></span></li><li><span><a href=\"#Question-4\" data-toc-modified-id=\"Question-4-1.4\"><span class=\"toc-item-num\">1.4&nbsp;&nbsp;</span>Question 4</a></span></li></ul></li><li><span><a href=\"#Reference\" data-toc-modified-id=\"Reference-2\"><span class=\"toc-item-num\">2&nbsp;&nbsp;</span>Reference</a></span></li></ul></div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       "@import url('http://fonts.googleapis.com/css?family=Source+Code+Pro');\n",
       "@import url('http://fonts.googleapis.com/css?family=Vollkorn');\n",
       "@import url('http://fonts.googleapis.com/css?family=Arimo');\n",
       "@import url('http://fonts.googleapis.com/css?family=Fira_sans');\n",
       "    \n",
       "    div.cell {\n",
       "        width: 1000px;\n",
       "        margin-left: 0% !important;\n",
       "        margin-right: auto;\n",
       "    }\n",
       "    div.text_cell code {\n",
       "        background: transparent;\n",
       "        color: #000000;\n",
       "        font-weight: 600;\n",
       "        font-size: 12pt;\n",
       "        font-style: bold;\n",
       "        font-family:  'Source Code Pro', Consolas, monocco, monospace;\n",
       "    }\n",
       "    h1 {\n",
       "        font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "\t}\n",
       "\t\n",
       "    div.input_area {\n",
       "        background: #F6F6F9;\n",
       "        border: 1px solid #586e75;\n",
       "    }\n",
       "\n",
       "    .text_cell_render h1 {\n",
       "        font-weight: 200;\n",
       "        font-size: 30pt;\n",
       "        line-height: 100%;\n",
       "        color:#c76c0c;\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 1em;\n",
       "        display: block;\n",
       "        white-space: wrap;\n",
       "        text-align: left;\n",
       "    } \n",
       "    h2 {\n",
       "        font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "        text-align: left;\n",
       "    }\n",
       "    .text_cell_render h2 {\n",
       "        font-weight: 200;\n",
       "        font-size: 16pt;\n",
       "        font-style: italic;\n",
       "        line-height: 100%;\n",
       "        color:#c76c0c;\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 1.5em;\n",
       "        display: block;\n",
       "        white-space: wrap;\n",
       "        text-align: left;\n",
       "    } \n",
       "    h3 {\n",
       "        font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "    }\n",
       "    .text_cell_render h3 {\n",
       "        font-weight: 200;\n",
       "        font-size: 14pt;\n",
       "        line-height: 100%;\n",
       "        color:#d77c0c;\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 2em;\n",
       "        display: block;\n",
       "        white-space: wrap;\n",
       "        text-align: left;\n",
       "    }\n",
       "    h4 {\n",
       "        font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "    }\n",
       "    .text_cell_render h4 {\n",
       "        font-weight: 100;\n",
       "        font-size: 14pt;\n",
       "        color:#d77c0c;\n",
       "        margin-bottom: 0.5em;\n",
       "        margin-top: 0.5em;\n",
       "        display: block;\n",
       "        white-space: nowrap;\n",
       "    }\n",
       "    h5 {\n",
       "        font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "    }\n",
       "    .text_cell_render h5 {\n",
       "        font-weight: 200;\n",
       "        font-style: normal;\n",
       "        color: #1d3b84;\n",
       "        font-size: 16pt;\n",
       "        margin-bottom: 0em;\n",
       "        margin-top: 0.5em;\n",
       "        display: block;\n",
       "        white-space: nowrap;\n",
       "    }\n",
       "    div.text_cell_render{\n",
       "        font-family: 'Fira sans', verdana,arial,sans-serif;\n",
       "        line-height: 125%;\n",
       "        font-size: 115%;\n",
       "        text-align:justify;\n",
       "        text-justify:inter-word;\n",
       "    }\n",
       "    div.output_wrapper{\n",
       "        margin-top:0.2em;\n",
       "        margin-bottom:0.2em;\n",
       "    }\n",
       "\n",
       "    code{\n",
       "      font-size: 70%;\n",
       "    }\n",
       "    .rendered_html code{\n",
       "    background-color: transparent;\n",
       "    }\n",
       "    ul{\n",
       "        margin: 2em;\n",
       "    }\n",
       "    ul li{\n",
       "        padding-left: 0.5em; \n",
       "        margin-bottom: 0.5em; \n",
       "        margin-top: 0.5em; \n",
       "    }\n",
       "    ul li li{\n",
       "        padding-left: 0.2em; \n",
       "        margin-bottom: 0.2em; \n",
       "        margin-top: 0.2em; \n",
       "    }\n",
       "    ol{\n",
       "        margin: 2em;\n",
       "    }\n",
       "    ol li{\n",
       "        padding-left: 0.5em; \n",
       "        margin-bottom: 0.5em; \n",
       "        margin-top: 0.5em; \n",
       "    }\n",
       "    ul li{\n",
       "        padding-left: 0.5em; \n",
       "        margin-bottom: 0.5em; \n",
       "        margin-top: 0.2em; \n",
       "    }\n",
       "    a:link{\n",
       "       font-weight: bold;\n",
       "       color:#447adb;\n",
       "    }\n",
       "    a:visited{\n",
       "       font-weight: bold;\n",
       "       color: #1d3b84;\n",
       "    }\n",
       "    a:hover{\n",
       "       font-weight: bold;\n",
       "       color: #1d3b84;\n",
       "    }\n",
       "    a:focus{\n",
       "       font-weight: bold;\n",
       "       color:#447adb;\n",
       "    }\n",
       "    a:active{\n",
       "       font-weight: bold;\n",
       "       color:#447adb;\n",
       "    }\n",
       "    .rendered_html :link {\n",
       "       text-decoration: underline; \n",
       "    }\n",
       "    .rendered_html :hover {\n",
       "       text-decoration: none; \n",
       "    }\n",
       "    .rendered_html :visited {\n",
       "      text-decoration: none;\n",
       "    }\n",
       "    .rendered_html :focus {\n",
       "      text-decoration: none;\n",
       "    }\n",
       "    .rendered_html :active {\n",
       "      text-decoration: none;\n",
       "    }\n",
       "    .warning{\n",
       "        color: rgb( 240, 20, 20 )\n",
       "    } \n",
       "    hr {\n",
       "      color: #f3f3f3;\n",
       "      background-color: #f3f3f3;\n",
       "      height: 1px;\n",
       "    }\n",
       "    blockquote{\n",
       "      display:block;\n",
       "      background: #fcfcfc;\n",
       "      border-left: 5px solid #c76c0c;\n",
       "      font-family: 'Open sans',verdana,arial,sans-serif;\n",
       "      width:680px;\n",
       "      padding: 10px 10px 10px 10px;\n",
       "      text-align:justify;\n",
       "      text-justify:inter-word;\n",
       "      }\n",
       "      blockquote p {\n",
       "        margin-bottom: 0;\n",
       "        line-height: 125%;\n",
       "        font-size: 100%;\n",
       "      }\n",
       "</style>\n",
       "<script>\n",
       "    MathJax.Hub.Config({\n",
       "                        TeX: {\n",
       "                           extensions: [\"AMSmath.js\"]\n",
       "                           },\n",
       "                tex2jax: {\n",
       "                    inlineMath: [ ['$','$'], [\"\\\\(\",\"\\\\)\"] ],\n",
       "                    displayMath: [ ['$$','$$'], [\"\\\\[\",\"\\\\]\"] ]\n",
       "                },\n",
       "                displayAlign: 'center', // Change this to 'center' to center equations.\n",
       "                \"HTML-CSS\": {\n",
       "                    scale:100,\n",
       "                        availableFonts: [],\n",
       "                        preferredFont:null,\n",
       "                        webFont: \"TeX\",\n",
       "                    styles: {'.MathJax_Display': {\"margin\": 4}}\n",
       "                }\n",
       "        });\n",
       "</script>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# code for loading the format for the notebook\n",
    "import os\n",
    "\n",
    "# path : store the current path to convert back to it later\n",
    "path = os.getcwd()\n",
    "os.chdir(os.path.join('..', 'notebook_format'))\n",
    "from formats import load_style\n",
    "load_style(plot_style = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Ethen 2017-11-05 10:34:01 \n",
      "\n",
      "CPython 3.5.2\n",
      "IPython 6.2.1\n",
      "\n",
      "numpy 1.13.3\n",
      "pandas 0.20.3\n",
      "matplotlib 2.1.0\n",
      "pyspark 2.2.0\n"
     ]
    }
   ],
   "source": [
    "os.chdir(path)\n",
    "\n",
    "# 1. magic for inline plot\n",
    "# 2. magic to print version\n",
    "# 3. magic so that the notebook will reload external python modules\n",
    "# 4. magic to enable retina (high resolution) plots\n",
    "# https://gist.github.com/minrk/3301035\n",
    "%matplotlib inline\n",
    "%load_ext watermark\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "%config InlineBackend.figure_format = 'retina'\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import pyspark.sql.functions as F\n",
    "from pyspark.conf import SparkConf\n",
    "from pyspark.sql import SparkSession\n",
    "\n",
    "# create the SparkSession class,\n",
    "# which is the entry point into all functionality in Spark\n",
    "# The .master part sets it to run on all cores on local, note\n",
    "# that we should leave out the .master part if we're actually\n",
    "# running the job on a cluster, or else we won't be actually\n",
    "# using the cluster\n",
    "spark = (SparkSession.\n",
    "         builder.\n",
    "         master('local[*]').\n",
    "         appName('crime').\n",
    "         config(conf = SparkConf()).\n",
    "         getOrCreate())\n",
    "\n",
    "# set the log level to ERROR to prevent \n",
    "# the terminal from showing too many information\n",
    "sc = spark.sparkContext\n",
    "sc.setLogLevel('ERROR')\n",
    "\n",
    "%watermark -a 'Ethen' -d -t -v -p numpy,pandas,matplotlib,pyspark"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Spark Exercise"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note that the following notebook is not a tutorial on the basics of spark, it assumes you're already somewhat familiar with it or can pick it up quickly by checking documentations along the way. Please use the following [link](https://drive.google.com/file/d/0Bz1lG0yegjarWWRoYjI1SWtfUlU/view?usp=sharing) to download the dataset.\n",
    "\n",
    "## Question 1\n",
    "\n",
    "By using SparkSQL generate a bar chart of average crime events by month. Find an explanation of results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Row(ID='10078659', Case Number='HY267429', Date='05/19/2015 11:57:00 PM', Block='010XX E 79TH ST', IUCR='143A', Primary Type='WEAPONS VIOLATION', Description='UNLAWFUL POSS OF HANDGUN', Location Description='STREET', Arrest='true', Domestic='false', Beat='0624', District='006', Ward='8', Community Area='44', FBI Code='15', X Coordinate='1184626', Y Coordinate='1852799', Year='2015', Updated On='05/26/2015 12:42:06 PM', Latitude='41.751242944', Longitude='-87.599004724', Location='(41.751242944, -87.599004724)')]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# have a peek at the data\n",
    "data_path = 'Crimes_-_2001_to_present.csv'\n",
    "df = spark.read.csv(data_path, sep = ',', header = True)\n",
    "df.take(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Row(ID='10078659', Case Number='HY267429', Date='05/19/2015 11:57:00 PM', Block='010XX E 79TH ST', IUCR='143A', Primary Type='WEAPONS VIOLATION', Description='UNLAWFUL POSS OF HANDGUN', Location Description='STREET', Arrest='true', Domestic='false', Beat='0624', District='006', Ward='8', Community Area='44', FBI Code='15', X Coordinate='1184626', Y Coordinate='1852799', Year='2015', Updated On='05/26/2015 12:42:06 PM', Latitude='41.751242944', Longitude='-87.599004724', Location='(41.751242944, -87.599004724)', Day='05/19/2015', Month='05')]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# extract the month and year from the date column\n",
    "split_col = F.split(df['Date'], ' ')\n",
    "df = df.withColumn('Day', split_col.getItem(0))\n",
    "split_col = F.split(df['Day'], '/')\n",
    "df = df.withColumn('Month', split_col.getItem(0))\n",
    "df = df.withColumn('Year', split_col.getItem(2))\n",
    "df.take(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>Average</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>01</td>\n",
       "      <td>30899.800000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>02</td>\n",
       "      <td>27197.133333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>03</td>\n",
       "      <td>32860.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>04</td>\n",
       "      <td>32948.733333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>05</td>\n",
       "      <td>34767.266667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>06</td>\n",
       "      <td>36050.428571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>07</td>\n",
       "      <td>37949.642857</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>08</td>\n",
       "      <td>37470.857143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>09</td>\n",
       "      <td>35192.428571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>35715.571429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>32051.857143</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>29979.571429</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Month       Average\n",
       "0     01  30899.800000\n",
       "1     02  27197.133333\n",
       "2     03  32860.333333\n",
       "3     04  32948.733333\n",
       "4     05  34767.266667\n",
       "5     06  36050.428571\n",
       "6     07  37949.642857\n",
       "7     08  37470.857143\n",
       "8     09  35192.428571\n",
       "9     10  35715.571429\n",
       "10    11  32051.857143\n",
       "11    12  29979.571429"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# register the DataFrame as a SQL temporary view, so \n",
    "# we can directly use SQL query to interact with spark DataFrame\n",
    "df.createOrReplaceTempView('crime')\n",
    "sql_query = (\"\"\"\n",
    "    SELECT Month, COUNT(*) / COUNT(DISTINCT Year) AS Average\n",
    "    FROM crime\n",
    "    GROUP BY Month\n",
    "    ORDER BY Month\n",
    "\"\"\")\n",
    "avg_month = spark.sql(sql_query).toPandas()\n",
    "avg_month.to_csv('avg_month.txt', index = False)\n",
    "avg_month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAA/cAAALxCAYAAAAZqFZmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAAWJQAAFiUBSVIk8AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzs3Xm4HGWZ/vH7DksMJOzIMsgmowQy\n7CAibmyOgYDCqMMig4jgjAouIIj6GxcExNERVEZARWVTXFiDgoCIjuAoEJBFkE12EAhZIIGQPL8/\n3rfpolNV3eecPudQh+/nuuqqrqp36+rq6n5qecsRIQAAAAAA0FzjRrsBAAAAAABgaAjuAQAAAABo\nOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcA\nAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQB9ZXsn25GH/Ua7PUCv\n2Hb7z/YDeX1ePtptGUtYrwDKENwDLwO2x9t+ovCn9fuj3SaMLNub2v687att32f7mTw8aPtXedlG\no93Oscj29ra/bPsPeX3Ptz3X9t9sX2z7KNvrjHY7m8j2MYX92lCG7Uf7vQBjke318v4u8v5vhR7z\nbWD76ZzvXtuThrutwFhAcA+8PLxD0kqF6X/hh/LlwfY6ts+XNEPS/5P0RkmvkjQhD2tK2ikvuyUH\n+puOVnvHEtub2L5K0m8lfVLSNkrre7ykZSWtLWlXScdJusf2ubbXG6XmjijOkKPpbP8ub7939qm8\nMfmdiIh7lPZ/Utr/ndQtj+1xkk6XtIykkHRgRMwZtkYCY8iSo90AACPiwDyeI2mSUmDxbknfHbUW\nYdjZ3k7S+ZJWzbMelfQjSVdLeljS85JWk7St0gGgjZUC/Y9IOmiw9UbE5ZI86IaPAbbfIelMpe+a\nJN0j6ceSfq/0OYyTtIak7SW9U9J6kt4l6SZJx4x0exvsJKVtusyrJF2SX18r6QM15dzdz0YBeJH/\nkbSnpB0lvdf2TyPiwpr0H1XaN0rSyRFx5XA3EBgrCO6BMc72WkoBmyR9TdLekl6jFPAT3I9RtteX\nNF1S6xLIkyQdHRFPlyS/WNJnbO8h6b9GqIljlu3XKQXySyuddfq0pP+KiAUlyc+z/UlJ+0n68si1\ncmyIiMckPVa2zPb8wuTTEXHzyLQKQFFEhO0DJd2sdILhFNu/i4gnO9Pafo3aBzjvknTkyLUUaD4u\nywfGvgOUvush6YeSfpDnb5d/RDE2naV2YH98RBxWEdi/ICIukLSlpF8Md+PGKtvjJZ2rFNhL0r9H\nxHEVgb0kKSIWRsQPJG0u6Y8j0EwAGFERcZ+kT+TJ1SV9szNNvhz/+0q3jLUux6/93QLwYgT3wBhm\n20rBvST9LiLuVgrwF+V5B5bly3n3Ltz/V5mukP61hfSL/WgX0k21fZbtu3NnOXNt32771Lp7vW0v\nWSj/O3neJrZPsX1n7hwubE8p5Fne9v62f2j7z7Zn216QOxe8Nnci98pu7y2XNcn252zflNv8lO3r\nbR9pe5my9nUp6xO2r7T9iO3ncpv+1/anbC/fS5tqyn+b0qX2UjpT8tle80bE7Ij4WUd5i90Lans3\n2+fZvj+v08fr0vdY3nTbD9ueZ/svtr9ke8WOvNvYPtv2PU4d0z1g+zu2X9XjulnV9n/a/r3tx/K6\nf8z2FbY/bPsVva6rCgco3UsvSb+MiFN6zRgRD0fEpTVt3932T/I6n5+3wRttf6Xb+3ePPWvbPsg1\nncz5xR3YreXkfU4dNT6eP7s7bH/V9qol+TewHZJ+VZh9hhfv4O4l0wO47bfa/rlTZ2DP5vHZtv+p\nJs+A91eFvJvmdLfnfc3Ttu+yfbrtbRev7UV5z8zlPt8l3QaF9n2mJt042wfa/o3tJ3O777D9dedO\nID3Ae89tr277hPwdfyZvx7+1fYDtrrfzOHUQ+6H8nX00f4f/nrfBT9hetiZvT+87py1dl635kt6Q\nZ726ZPvtuZPGfn0nhrpecxnbO+1Pb3f6vZzntK890/abeimjTkScJqm1j9vb9p4dST4h6fX59YkR\ncXWX9q7g1CHpbwrbwuN5Wzjc9sQu+dfP6S5y+k/yTP6OP2T7krztL9WljBn585mRp9e0fZztm23P\nyssOqCsD6KuIYGBgGKODpDcrHf0OSQcV5v8qz3tQ0hIVeSdImpXTXdVDXV8q1LVNyfJVJF1RSFM2\nLFK6HM8l+ZcspPuO0v2zz5WUMaWQ5+Yu9YWkmZJ26vLeNpR0X00Zt0hap9i+mrJ2kfT3Lm16RNIb\nhvC5/6RQ1vv7sB3tVCjvvZK+V9LmxyvS79elvP0kfaNmXdwsaZWc7xOSFlake1TSa7q8j/2V+p2o\nW/d/lTR5COvqj4WyduzT93iS0tUUde2eJ+mAmjIeyOku71LXQYUyty9Zfkxh+WskXVbTpvskrduR\nf4Mu7yN6aWeP622DgZZXsm1+SWm/VNbG+ZKmVpQzmP2VJR1bs423hm+per99Zk7z/ADWzWcq0kyU\ndGVNO2ZKequk3+XpO7tte5K2U7qNoqrMM7q0e0NJd3ZZPw9I2mKw77vbuizM7zYs9v3poU09fyf6\nvF6XkXROD234nqSlh/i9/AdJT6m9327t3ycr7cdC0u2SJnQpZ3dJT/awLWxVkX+9Htf79ZLWrGnH\njJxuhtJ/ridKyqjcNzMw9HsY9QYwMDAM36B0CX5IekbScoX5+xV+dHatyf/dnGaRpHVq0lnSvTnt\nbSXLJ0m6NS9fqBR87qd05mMbpT++Nxba9OmSMop/lm9W6gzuPqWOd7aT9DpJ/yFprUKe2yRdJ+kL\nSh3GbZOHf8nvrfVne7akV1e8t+Ul3V+o+5eS9pK0haR/VurRd5GkawppSoP7nH5BTvN4oV1bStpB\nKZCYXWjThoP4zK0X/8l7ZR+2o2LA0/oj83ulQH8rSW+SdHhF+m7BfWu9XVpYr1PVPgAVSsHRu/Pr\nP+ZtZyulzpmKf0ivqnkPHyiku0/pPs7dcn1vU+qT4Fm1/xAOeL3lbeX5XMYcSeP6sO7H6cUB1s1K\nV9xsLektSvfpzy8sf1dFOcMR3P8ub/s/LKzL3fTigL8zIFla0pSOeo7K84rDun1Yd0MN7ltB69WS\n9lH6nr5FKbhuBfx/l7RCn/ZXnyvkeSKvl+2UrsI5TNJDheXfrGh/P4P7Cwtpbsuf2dZKT9z4fN7G\n/650QCzUPbi/TWm/95ikTyl1mLaVpH9X6uCzcp+Ry1lN6cBnK93FSp1RbqH01IkfFZbNkrT+YN53\nt3Upaa28jV6v9v6kc/udImmZHre5QX0n+rhel5R0VSHdZUpXIL1FaZvfV+k70Fp+Wh++mwcUyjtX\n0hJKnV62/iO8vkv+vdQ+CPao0tNeds/bwk6STpD0tNrfpfUqtoXnlPqm+aiknXP+N+f2Fd/zNarY\nn6v9m/iA0vdhttJ+8i35c9in2/thYOjnMOoNYGBgGJ5BKaBu/bid3bFsGbWDyJ/WlPGmXv4I5R+x\nVrqjS5a3DhLMVMlZ/ZxmKUk/z+mek7R2x/Lin+WQ9GdJK3VZB//YZfnmhXVU+odF0n8X6jyxIs37\nOtq2WHCvdP/742oHshMrynqt2mf2fzmIz/1VhXbc16dtaaeO93dG1R+dkvTdgvuQdFJJmiWVDsy0\ntoeZeftYsiTtBYWyNi7bDtQOgE9XxZknpT/FrTNH3x7EenpjoR1X92ndH1Io8wpJ40vSvF7pAF4o\nncmaVJJmOIL7qs93CUm/LqTZaKDbSJ/W3VCD+5B0qsqvJPpCIc1/VGy/Pe+vJG2k9oGhB1VyMFXS\nKyXd0eXz6UtwL2mPwvLfqyRQlbSp2r8joe7BfSgdBF6rJM1ktQ+u/V9FOcUDecdUpPlo3Wfe7X0P\nZF2qyxULg9heB/Sd6ON6/Wxe/qyk3SrSWOkAaKu+bfvwfi8qlHdp4fUJXfKtpvZVheer4gy/pE3U\nvkJgsf85Sk8yqTwjn9N8vNCuPSrSzCikmamS/R0Dw0gO3HMPjF3/qhTES+1O9CRJEfGM0tlzSZpm\ne5WKMn6r9AgvKZ2lrdJatkgp8HuB7bWVLoeWpKMi4v/KCojU4dghSme2l5L0bzX1SdIHo6Sn3Y4y\n/9pl+Q1qPzHgnZ3Lne6/fl+e/JukIyrKOV3t+wir/IeklSXNlbRPRMytKOt2pTP4kvQ293gveUHx\ns3x0gHl7MVOpk7hFXVP25n61O1l6QUQ8L6l1v/pSSme3DsrzO32r8PrNJcuPUHq2/N8kHRIRz5U1\nJCJ+V6hzf6fO8QZiONb9YXk8X9L+EfFsZ4KIuEbtnvZXVPfvTr/8NCLOLGnPQqUnc7SUfSZN8KCk\nj0RElCz7b6VgXOrt/XXbX31Y6aCIJB0aEX/rTBDpyQDFx/l9tId6B+s/WtUqdWr2TEl7blS6jWAg\nPhQRD5SUdZvSmXhJ2rLzvnnbaypdcSWlq7z+X1nhEfF1pQNLkrRjWZ8GY9Rg1+uykj6WJ78WERer\nRP4OHKH2kynqHivZq4OVDkZK6XY1KV2F0K2PmMMkLZfzvjci5pUlioib1H76y562V+5Y/nREPFRX\nUUR8Te3HZHb2D1DmcxFxaw/pgGFDcA+MXa2g9CGle/I6tQL+pZUuu1tM/kFv/Xl/TVlnTrYnqP2n\n66qIuL8jye5qn8U6p67BEfF3pcv3pXZnRWXujYj/rSurjFNnahvY3tj2lPzHb1ZevHI+EFG0tdKl\n1lK6+qE0KMxO71J964/BZRHxRJe0VxVe162HMssVXpceQBiiC6oOTAzST6O6J/kZhdeX1gRHxXTr\nFRfkjqTekSd/1uUzlNrrfoLSlR0D0dd1b/sflM68SdL0iHiwJvm3lb5jUvuP8nBbLLAvKPb6v/5w\nN2SY/KTsYIokRcRMpcd0Sd3fXy/7q9Zn9pjS2chSEfEbpQBIknbqtaO0gcgdiLU6T7s2Iv5Sk7zb\nfq9opqRLapa3tplxktbtWLaD2o9v/k6Xg4vfLrweqe/CaBrKen2r0gFBSTq7rpL8XbgmTw70d6ms\nvIclfaQwa6Gkf6v6zhW0fksvjog5XdJelcdWu6O+UraXsP0Ptjds/T/I/xFa+91efg/O6iENMKx4\nzj0wBtneUO0fsjPzmbROrbPy6ykdCDixorgfqn0kfX+l++KK3qF2UPPDkvzbtJoladYA/ouuUbPs\nxl4Lsb2L0r2Hb1H70XBVVlG6f7Kl2Bt26RUHBZWPMMt/ljfLk3vmnpF7VbceyhT/7NT2FDxIPa/7\nHtUFDk8NIt1yHcvWk9Tqtf3jtj8+gLaN9rrfpPD6mspUkiLiUdt3S3p1R77hVPeZFA/EdH4mTVH3\n/qT2e+z2/mq/M7aXUfrcpHTpdNn+uuj3Sgd9llfqyPPeLukH6h8ltZ4aUbvfy9vd/Uq3A3Vze8VV\nEC1120xxX1z7XVBaPy0j9V0YTUNZr9sUXt/Up9/ngThX7YD43oiofRSo05NkXpsn97e9f136Dou1\nOf82H6x0gmMLpSu8qlRd4djyYEQ83iUNMOw4cw+MTcVH15UF3K2z8q1lm9reoiLdnWr/WXqP7aU7\nkrQuyX9a0s+0uJ4eNVdimZplM7tldvI/SpfLv0PdA3spna0tWqnw+jHVq7sMe2W1L7kdqLr1UKb4\n52K1QdZZp+u6H6DFLvctKJ6d6zVd53oe7PYnjf66L15G+kgP6VtpVq5N1T91z5+u+0yaotvztVvv\nsdv76/adKe5nBvI5S8PzWQ9kvyf1fgtKr+tTWnydDuS7MNzr56VmKOt1OH6fh9Nij9ccgBe12ekx\nuP8n6ZtKJ0O63YbV+f+gU79/G4FB4cw9MMbYXkIvvj/+5h6Pxh+o1PtvmR8q9dq8klKvxOflulZT\n+7LHn1dcrt3azzyvgV3mXHdpXrczW1J6Px/Mr++W9FWlDpDuk/R061Jw2werfZ913y9xzYr72p8p\n9Yrdq4Heu32/Uu/AK0t6le1X5nt1+6WXdf9SUlz3J0k6bQB5O28x6eYmpT/S4yRtbntcH/smQHM1\n7TuDl4/i/nFH9XYwR2rfAjTSiu09Q6lX/F493DF9itpX1P0qT89QOjg0r7Xvtn2hpGnq/v+A7zle\nEgjugbFnqqTVB5FvH9ufqLjf7cdKl+2PV7o0/7xWHrXPBJReIaDU87uU9jcP5ntVR8IhefykUs++\nf69It2LF/Fbelm5nOOqWF8/oLhURN3cpa9AiImxfpfSoICn1efCd4aqvAV70uQ/zun/K9g1Kj4+a\nqHSfcFl/F70q9s3Qy3e6laasT4fWH89uV+wt22U5+q+4nxnI5ywt/lm3PudugUjd5zyQ/V6vaYaq\n87tQ1/9EL+tH4rsgvXj/OHM49499UmzvEoNtr+3VlZ4IIUmXRMSuNcnr/iMALzlclg+MPa1L8hcq\nBeJ7dxm+mtOvqHbHYy8SEU8pPbZGkqYWep1t3e/2gNKzuMtcV3j9poo0w6HVS/IVNYG9lJ5DW+XP\nhddbd6mvcnlEzJd0S558ve3hPrBaDOYPG4H6XsruVLvTxJHY/orrfrGnAAzQTYXXi3VmWZSvoml1\n7FZ2j3erP4Buf1Qnd1neL6N15u8lJ/dEf2ee3Np2t/9mrf5UnlJ6AkRR63Mel+9PrlL3Of9V6ekM\nUpf9Xr60ubMj0uHQ83dB6Sqzls7vQrFfjKF+F/q9DY/Gd2K0fp8HJXdG29rmtx9Ch5Ibq30A7MdV\nifI9+ZsOsg5gVBDcA2NI/qPVOgJ9VUScERE/qhskfVHpOeJSu4f9MsXe9f819yLbuqTtrJrLjy9Q\n+56/TwxH784VlsrjynsDba+ldLldlT+qHRjuU9LfQNEBXdrz8zxetYe0QxIRv1S7I6wpSp9xT2wv\nZ7uXR/40Qu6c7MI8uZntnYa5ytPVvpz/n20fUpe4yPYatt/Wms6947eeHrFbfhxYlYPV/rN6Wcny\n1uOcJtsu7ewvd+w2Up998fFVA33k4FjU+sxWU/uM4mJsv1HSRnny8pKO1O4uvK4LzPerWpBvWbo6\nT77e9mtqyjmgZlk/Xan2owcP6nIApPid6/wuPCFpdn5duX5sb6V2x21VWttwv7bf0fhO/ErtJ3t8\nOD/+9aWu9Vu6tqT3DLKM4gHvuv4D9pM0aZB1AKOC4B4YW/ZTO6itPBpdFBGz1H5G+8454C3zS7Xv\nx9tf7bP2UjvwLyv/r2o/MuuNkr6R+wUoZXuc7XfnHv+H4o48fpPt9ToX2p4k6Ueq+ROVz7i3HvW0\nrqTjytLlHnvf3qU9X1e7w52vdwsy82P7PtylzDr7qN2L/FG2v56Dt7o6d5P0J6VbO8aSL6l9AOsM\n25vVJba9tu0D69JUybe1vLtQ38m2j8pngKrqG2d7P0k3aPGAo/UUi1dI+n7ZASbbr5N0VJ58UuW3\nyFyVx+MlHVrWBkn/o6F1WDUQxedL1wWPLxffVPuS8ZNsL9b7vO1V9OI+I75eUs5VhdeHlx1MtX2Q\n2geBq5zcSi7pe/mRp53l/JOkT3cppy/yY9N+kic3U8Vz7m1/ROl2GCkd/LiluDwfDPlNntzO9mKP\ndLO9kqTv9dCs1ja8Rv49GaoR/07k3/+v5ckNJJ3dw+/E22zXPlZumH1F7QMS37a9fV1i26vb/mDH\n7DsKr/cr+09ie1O1r2wEGoPgHhhbWmfen1f76HYvzs3jcZL+rSxBRDyv9nPqt5H0gfz6TxFxW1me\ngg+rfYn7h5Q6+Tvc9pttb2Z7e9vvtf0tpUv8f6zB9RtQ9P08niTpt7Y/YvsNtre1fajSZZ5vUOpk\nr87ncpuk9Ci1S2y/0/bm+U/Od3NdxUcELnZ5ZX5G+7slLVC6l/NS2z+zva/t19newvZOtj9q+6Jc\n5+GDeeO5vrsk7ab2PYqHSbrb9n/n9m9reyvbu9r+vO0ZSrde/ONg63ypiojb1T6bt7qkP9g+3fa/\n2N46r4d/tv1J21coPSJyII9Y6qzvWqUzSk8rfaeOk3S77WNt72Z7m/yZ72H7BEm3K3UOVdbD/nck\n/Tq/3lnSn2y/L7f5TbaPUwroWn/ID6l49vMZal+F8sW8HbzR9pb5wMLv8nvu9jz2voiIe9W+wuHg\n/J42dXrG9IZlwe1Ylvehx+TJtSRdb/tI29vlbeUjSp19tc4mfysiFvusIuJ6tZ9u8jZJF9l+e95f\nTbN9lqRT1eVzjogL1L4V6w2SrrN9YN7utrf9uVzGArVvKRjuy8o/pnYHo/9p+0Lb78j7zqm2z1bq\nNFNKZ+errpr5Zh5baf18PO8HXm/7o0rr+dVKB9vq/DaPl1A6APJm25ML23C33tVfZBS/E19Uu2+Q\ndyrtqz6bf482y9vge2x/1elxm79U96sahk0+0LOf0sGw5SVdZftHtvfO+9YtbO+cP9dfKHWi++GO\nMu5R++qUN0j6jdNJha1s72j7K0qPXFxCL749D3jpiwgGBoYxMCgF3JGHSweYd5LSPZYh6a816bYo\n1NEaPtJjHSsoHXDozF82LJC0TUf+JQvLv9NDfUtJuqSmjkVKf2oOKszbvqKsDZX+dFWVdYvS/c6t\n6W/UtGt7pXsGe1kPN/Zhu1hH6daIXuqLvM426ihjp8Ly/XqoszZ9r+UpnUlqpftMTbqetg2lPiWe\n6HE9XNCHdb+JUuDdS30LlZ73vHbF9/MXXfLPk3RAl/a8U+mKgqrvw+e6fR+UAtDW8rWG8pl01NU5\nXN6H9b/BQMsbyLaudEAkJN052G2yI48lHZs/i7rP+ltKnYlVlfOPSmeB677jU7p9t5Q6hbyyppyZ\nSmfJf5+n/1xRzgO9fAbdtr2cZkOlgwl16+cBSVt2qeu/avLPVrqy4cw8/XxFGcsqHZirKqf0PQxg\nHdR+J/q8XsdL+nYP215rX7HXUL+fJd+Txb5HXfLu2GU7Lw7/W5L/1YV1WLV97yLp/Dz9VEU7ZuTl\nM/qxThgYhjpw5h4YO4r3y59bmapEpDN9v8iTG9h+c0W66yUVe6ddoPbZ/G51PBUReyp1hnSy0tHw\np5SCmjmS/qJ02eW/KwUN/1dVVo/1LVC6n/7flc6qz1F6vN7fcpvfEhGf7bGsvyh1wPP53O5nlP4A\n3iDpU0qXUhcfAzirs4xCWb9T+vN9oNJTB+5TCswWKJ1l/4PSmaXdVN/ZX08i4m8RsYfSpaxfVApI\nHsh1zlf6c/QrpcBuw4iYGhG3VhTXaBFxvtLBjkOVtvcHlbaJ55Qef/Q7pT/9O6iic8kB1ndTRLxF\n6XaUryj1g/Bwru8Zpc9+uqQjJa0bEftGxH0l5cyJiLfnNv1M6fN7Tmkb/LPSpaOvjYjvd2nPeZJe\np3RlzCNK29wjSn9ed4iIzw3tHQ9MRHxH6RaQi5Q+i+fqc4xtkRyt9MjQU5U6tntG6bt6t9LtT6+P\niA9F6kuiqpy/Kh2IPVHSXUrb+Eyl7ftApcB1flX+QjlzlQ52HKR0lvqp3Ja/Kp0h3ywirlQ6cCvV\n7Pf6pbAv/rDSgYe/K23HTyi9vyOUvgvXVRaSyjlc0r/kMp5SWkf3KAW4m0fE9B7a8rRS54bHKwV4\nc5SCvEEbre9ERDwbER9UWrdfU+po7wml3+enlQ6oXCjp45LWj4ifjUS76kTEFUoH1Q9Ratv9Stv1\nAqVbCK9R+g68XSWdBUa6um0zpX3+7UrbwBylfk6+KmnTiCjrvwR4SXPEkPZDAABJtneQdEWe3D8i\nzhjN9gDAcHPqgG220pVS34uI949ykwDgZY0z9wDQH+8tvP59ZSoAGDveo3Ynruz3AGCUceYeALqw\n/WpJ90TF4/5sv0vpUmdLujoiSm9rAICmsL26pKejvING2Z6s1PP8qkq3Jf1DRMwuSwsAGBlLdk8C\nAC97H5M01faPlc5OPai0/1xf0l55sNK9fh8drUYCQB9tL+k02z9Vujf9LqV93JpKHY29X6lTOUn6\nFIE9AIw+gnsA6M16aj9LvMwcSftERLfHJwFAU6yg1KHeQRXLQ9LnI+KbFcsBACOIy/IBoAvb60va\nU6nn6FdLWkXpMVGzlHrZvVTSyRHx+Kg1EgD6yPYKSvu9XST9k9Ll9ysq9Z5+n9Il+d+OiFtGrZEA\ngBchuAcAAAAAoOHoLR8AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo5H4b3E2b5H0nKS\n7h3lpgAAAAAA+m9dSbMjYr2hFEJw/9K33IQJE1aaPHnySqPdEAAAAABAf912222aN2/ekMshuH/p\nu3fy5MkrXXfddaPdDgAAAABAn2255Za6/vrr7x1qOdxzDwAAAABAwxHcAwAAAADQcAT3AAAAAAA0\nHME9AAAAAAANR3APAAAAAEDDEdwDAAAAANBwBPcAAAAAADQcwT0AAAAAAA1HcA8AAAAAQMMR3AMA\nAAAA0HAE9wAAAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAAAADQcAT3AAAAAAA0HME9AAAAAAAN\nR3APAAAAAEDDEdwDAAAAANBwBPcAAAAAADQcwT0AAAAAAA1HcA8AAAAAQMMR3AMAAAAA0HAE9wAA\nAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAAAADQcAT3AAAAAAA0HME9AAAAAAANR3APAAAAAEDD\nLTnaDQAAABiIdY+aPtpNGDb3Hr/raDcBANBQnLkHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4A\nAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7n3AMAADTYukdNH+0mDJt7j991tJsAAI3B\nmXsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAA\nAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO\n4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAA\nAABoOIJ7AAAAAAAajuAeAAAAAICGW3K0GwAAAKqte9T00W7CsLn3+F1HuwkAAIwZnLkHAAAAAKDh\nCO4BAAAAAGg4gnsAAAAAABr39RTrAAAgAElEQVSO4B4AAAAAgIYjuAcAAAAAoOHoLR8AAAAY43jy\nBjD2ceYeAAAAAICGI7gHAAAAAKDh+hLc297K9hdt/9L2nbZn2X7W9oO2L7D9jop869qOHoatutS/\nk+2LbD9me77tu2yfaHu1Htq+ek57V877aC5rxx7yjrN9sO1rbD9le47tG2wfYXvpbvkBAAAAAOiH\nft1zf5CkQwrTcyUtkrSmpN0l7W77Z5L2jogFFWU8WlN+VR7Z/rSkY/Lkolz3+pIOlbS37R0i4uaK\nvJtIulLSynnWbEmrSNpN0q62j46I4yvyLiXpfElT86znJC2UtFke3pXrnlvzvgAAAAAAGLJ+XZZ/\njaSPSdpS0qSImBQREyStLekrOc1eko6qKiAiVq8ZbizLY3uq2oH9VyWtEBHLS5oiaYakVSVdYHt8\nSd4Jki5UCuxvkDQl510xl2VJx9repaLJxygF9vMlHSBpGUnLSpom6UlJW0s6per9AgAAAADQL30J\n7iPiBxHx9Yi4vnimOiLuj4hPSjozzzqgH/UVHJvH50XE4RExJ9d7i1KQ3TqLf3BJ3kMkrZPTTMt5\nFBGzI+JwpbPylnRcZ0bbq0s6LE8emd//wkgulnRgXrZ3vjoAAAAAAIBhM1Id6v0xj9fsV4G2N5a0\naZ78SufyiHhA0jl5ct+SIlrzzo6IB0uWt8rcwvZrO5btJWm8pFmSTi2p+wJJdygdHNin5m0AAAAA\nADBkI/Wc++3y+J4+lvnWPJ4l6Q8VaS6V9AFJ29ie2LqqwPYkpVsIWmnKXJvLXl7SjpJuL6n76oiY\nX5H/MkmvkbRDl/cBAACAPuKZ7gBejobtzL3tibY3sf0tSe/Js79Zk/4a27Ntz7N9j+0zbW9fU8VG\neXxbRCyqSHNrq3hJGxbmT87zJOmWsoy5zFZAv1HH4tZ0ad6Ouifbdk06AAAAAACGpK9n7m2vJen+\nkkXzJX0pIk6uyb6t0plySVo3D/vaPlHSxyIiOtKvkccP1ZRZXLZGxete8q/RMX8gdU/Mw5yatAAA\nAAAADFq/L8tfqPYj7VaUtLSk55U6pftWSfr5kk6W9CNJN0TE3HyWe3NJn1PqFO8wSY+p3Xley7J5\nPK+mPc8UXk8sydtr/okd8wdTd21wb/u6ikUbVswHAAAAAEBSny/Lj4iHW4+vkzRB0msl/VDS5yXN\nyJ3gFdM/EhEfiojftu6Hzz3OXx8Ru0v6SU56tO0V+tlWAAAAAADGimG75z4iFkXEHRHxfklfU3rm\n/Rm2B1LnkXm8rFKndkVP5/GEmvzLFF7PLbx+uvC6l/xzO+YPpe5SEbFl2SDpL93yAgAAAABe3kbq\nUXjfyOPN89CTiLhH0t/z5Podi1v3tNc9Xq+47OGSvL3mf7hj/kDqnhsR3G8PAAAAABg2I/UovOJz\n5F8tqer+8oEo9kY/rqLH/Fav9iHptsL8v+R5lrSxXvyYO0lSvsKg9Xz7WzsW35rL3ljVXujNvyYN\nAKCAx1cBAAAMzkiduV+v8LrrJeottteTtGqevKdj8a/zeHlJW1cUsUse/yEiXrgUP59J/1Oe3Lki\n7+ty2ZJ0RUXdb7T9ior8rXI78wIAAAAA0FdDDu5tL9HDc9yPyOPnJV1TyNstX6uH/HmSriwuiIhb\nJd3YUX6xXWtK2jtPnlVS9tl5vK/tzkfdSdLheXxdRHSe2f+5pGclrSDpoJK6pymd9Q9J55SUDQAA\nAABA3/TjzP2rJP3J9oH5OfeS0mXttjezfZbaAfA3ImJmIe9Vtj9le4rtJXI+297c9nmS/jWn+3JE\nPFlS99F5vJftE2xPymVsJOkiSZMk3S3ptJK8p0j6W05zcc4j25NsnyBpz446XhARj0g6MU+eYPu9\nhfZPlXR6XnZORNxUUjcAAAAAAH3Tr3vut5D0XUmyPV/p0vtJksYX0nxf0ic78q2jdHb+WEkLbM9W\n6mW+2Av9NyR9oazSiLjE9mclfVHp7P3HbT8tabmc5HFJe0TEsyV559neQ+my+S0k3ZLrn6h00CMk\nHR0Rl1W8589ImiJpqtLj/k6zvVDtXvL/KOmDFXkBAAAAAOibfpy5f0jSeySdKmmGpFlKl6svUOp4\n7ruSto+I90XE8x15j1A6q36jpCeVgvJFSh3cfU/SthFxaEREVeURcYzS/e3TJc1UOqBwt6STJE2J\niJtr8t6oFKCflPOMl/RELmvniDi+Ju8CSdOUAvhrlS7Tj7wOjszvmV7yAQAAAADDbshn7iPiOUnn\n5mGgeX8i6Sd9aMPlki4fZN5HJB2Wh4HmXaR0ef8pg6kbAAAAAIB+GKne8gEAAAAAwDAhuAcAAAAA\noOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAe\nAAAAAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAA\naDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gH\nAAAAAKDhCO4BAAAAAGi4JUe7AQDwcrXuUdNHuwnD5t7jdx3tJgAAALyscOYeAAAAAICG48w9gBHB\nWWoAAABg+HDmHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAA\nAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGm7J\n0W4AAAAAAIy0dY+aPtpNGDb3Hr/raDcBo4Az9wAAAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAA\nAADQcAT3AAAAAAA0HME9AAAAAAANR3APAAAAAEDDEdwDAAAAANBwBPcAAAAAADQcwT0AAAAAAA1H\ncA8AAAAAQMMR3AMAAAAA0HAE9wAAAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAAAADQcAT3AAAA\nAAA0HME9AAAAAAANt+RoNwBjw7pHTR/tJgybe4/fdbSbAAAAAAC1OHMPAAAAAEDDEdwDAAAAANBw\nBPcAAAAAADQcwT0AAAAAAA1HcA8AAAAAQMP1Jbi3vZXtL9r+pe07bc+y/aztB21fYPsdXfIvbfuT\ntmfYnmv7KdvX2D7YtnuofyfbF9l+zPZ823fZPtH2aj3kXT2nvSvnfTSXtWMPecflNl6T2zzH9g22\nj7C9dLf8AAAAAAD0Q78ehXeQpEMK03MlLZK0pqTdJe1u+2eS9o6IBcWMtpeTdKWkLfOsZyRNkLRt\nHqbZfmdEPF9Wse1PSzomTy7Kda8v6VBJe9veISJursi7Sa575TxrtqRVJO0maVfbR0fE8RV5l5J0\nvqSpedZzkhZK2iwP78p1zy3LDwAAAABAv/TrsvxrJH1MKUCfFBGTImKCpLUlfSWn2UvSUSV5T8v5\nnpQ0TdJESctIOkDSfKVA+/NlldqeqnZg/1VJK0TE8pKmSJohaVVJF9geX5J3gqQLlQL7GyRNyXlX\nzGVZ0rG2d6l4z8coBfbzc1uXkbRsfg9PStpa0ikVeQEAAAAA6Ju+BPcR8YOI+HpEXF88Ux0R90fE\nJyWdmWcdUMxne3NJ786T74uIiyNZGBE/UPtgwMdsv7Kk6mPz+LyIODwi5uR6b1EKsltn8Q8uyXuI\npHVymmk5jyJidkQcrnRW3pKO68xoe3VJh+XJI/P7X5jbfrGkA/OyvfPVAQAAAAAADJuR6lDvj3m8\nZsf8ffL49oi4sCTfqZJmKV2mv2dxge2NJW2aJ7/SkU8R8YCkc/LkviVlt+adHREPlixvlbmF7dd2\nLNtL0vjctlNL6r5A0h1KBwf26VwOAAAAAEA/jVRwv10e39Mx/615fFlZpoiYJ+m3eXKHiryzJP2h\not5L83gb2xNbM21PUvse/0sXy5Vcm8uWpM7O9Vp1Xx0R8yvyt95TZ7sBAAAAAOirYQvubU+0vYnt\nb0l6T579zcJyS9owT95SU9StebxRx/zW9G0RsahL3mJdkjQ5z6usO5d5e5e6e2n35F56/AcAAAAA\nYLD61Vu+JMn2WpLuL1k0X9KXIuLkwrzllDqgk6SHaoptLVujY/4aHcvr8nbmX6MizXDUPTEPc2rS\nyvZ1FYs2rJgPAAAAAICk/p+5Xyjp0Tw8l+c9r9Qp3bc60i5beD2vpsxn8nhix/xW/l7yduYfzboB\nAAAAAOirvp65j4iHJa0uSbbHSdpA0pFKj7J7v+2prV7p8WIRsWXZ/HxGf4sRbg4AAACAl5F1j5o+\n2k0YNvcev+toN2FEDNs99xGxKCLuiIj3S/qa0jPvz8hBvyQ9XUg+oaaoZfJ4bsf8Vv5e8nbmH826\nAQAAAADoq5HqLf8bebx5HiRpttpBcucj8opayx7umP9Qx/K6vJ35H6pIMxx1z42I2vvtAQAAAAAY\nipEK7ovPkX+1JEVESLotz9u4Jm+rZ/pbO+YXe6Oveh+tvMW6JOkveV5l3bnM1vPtq+rupd231aQB\nAAAAAGDIRiq4X6/wuniJ+q/zeOeyTLZfIemNefKKjsWtvMtL2rqi3l3y+A8R8cKl+PlM+p/q6pb0\nulx2Xd1vzG0s0yq3My8AAAAAAH015ODe9hI9PMf9iDx+XtI1hfnn5PGGtncryfcBpQB7nqTzigsi\n4lZJN3aUX2zXmpL2zpNnlZR9dh7va7vzUXeSdHgeXxcRt3cs+7mkZyWtIOmgkrqnKZ31D7XfIwAA\nAAAAw6IfZ+5fJelPtg/Mz7mXlC5rt72Z7bPUDoC/EREzW2ki4gZJ5+bJ79uemvMuYXt/SV/Oy/47\nIh4rqfvoPN7L9gm2J+X8G0m6SNIkSXdLOq0k7ymS/pbTXJzzyPYk2ydI2rOjjhdExCOSTsyTJ9h+\nr+0lcv6pkk7Py86JiJtK6gYAAAAAoG/69Si8LSR9V5Jsz1e69H6SpPGFNN+X9MmSvB9Qug9/S0nT\nbT8jaYlC3osl/WdZpRFxie3PSvqi0tn7j9t+WtJyOcnjkvaIiGdL8s6zvYfSZfNbSLrF9mylZ9KP\nUzrrfnREXFbxnj8jaYqkqZJ+KOk02wvV7iX/j5I+WJEXAAAAAIC+6ceZ+4ckvUfSqZJmSJqldLn6\nAqWO574rafuIeF9EPN+ZOSJmS9pO0lFKl9mH0iXv10o6RNLuZfkK+Y9Rur99uqSZSgcF7pZ0kqQp\nEXFzTd4blQL0k3Ke8ZKeyGXtHBHH1+RdIGmaUgB/bW5z5HVwZH7P9JIPAAAAABh2Qz5zHxHPKV1a\nf263tF3K+LLal+EPNP/lki4fZN5HJB2Wh4HmXaR0ef8pg6kbAAAAAIB+GKne8gEAAAAAwDAhuAcA\nAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAa\njuAeAAAAAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEA\nAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICG\nI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAA\nAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAAAKDh\nCO4BAAAAAGi4JUe7AcBYte5R00e7CcPm3uN3He0mAAAAACjgzD0AAAAAAA1HcA8AAAAAQMMR3AMA\nAAAA0HAE9wAAAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAAAADQcAT3AAAAAAA0HME9AAAAAAAN\nR3APAAAAAEDDEdwDAAAAANBwBPcAAAAAADQcwT0AAAAAAA1HcA8AAAAAQMMR3AMAAAAA0HAE9wAA\nAAAANBzBPQAAAAAADUdwDwAAAABAwxHcAwAAAADQcAT3AAAAAAA0HME9AAAAAAANR3APAAAAAEDD\nEdwDAAAAANBwBPcAAAAAADQcwT0AAAAAAA1HcA8AAAAAQMMR3AMAAAAA0HB9Ce5tr237o7Yvsn2f\n7Wdtz7F9o+3jba9RkW9d29HDsFWX+nfKdT9me77tu2yfaHu1Htq+ek57V877aC5rxx7yjrN9sO1r\nbD+V3/MNto+wvXS3/AAAAAAA9MOSQy3A9qsk3SvJhdmzJS0raZM8HGx7r4j4dU1Rj9YsW1BT/6cl\nHZMnF0maK2l9SYdK2tv2DhFxc0XeTSRdKWnlQrtXkbSbpF1tHx0Rx1fkXUrS+ZKm5lnPSVooabM8\nvCvXPbfmfQEAAAAAMGT9OHO/RB5Pl/QuSStFxPKSllEKfO+RtKKk822vXlVIRKxeM9xYlsf2VLUD\n+69KWiHXPUXSDEmrSrrA9viSvBMkXagU2N8gaUrOu2Iuy5KOtb1LRZOPye9vvqQD8vtdVtI0SU9K\n2lrSKVXvFwAAAACAfulHcD9T0uYRsVtE/DQiZkpSRDwXEb9QOwBeTtIhfaiv6Ng8Pi8iDo+IObnu\nW5SC7NZZ/INL8h4iaZ2cZlrOo4iYHRGHK52Vt6TjOjPmgxSH5ckjI+IHEbEwkoslHZiX7Z2vDgAA\nAAAAYNgMObiPiFlVZ9bz8r9IujZPbjnU+lpsbyxp0zz5lZJ6H5B0Tp7ct6SI1ryzI+LBkuWtMrew\n/dqOZXtJGi9plqRTS+q+QNIdSgcH9ql5GwAAAAAADNlI9Zb/RB4vUZtqYN6ax7Mk/aEizaV5vI3t\nia2ZtiepfaDh0sVyJdfmsiWps3O9Vt1XR8T8ivyX5fEOFcsBAAAAAOiLYQ/ubS8p6Q15srRju5zu\nGtuzbc+zfY/tM21vX1P0Rnl8W0Qsqkhza6t4SRsW5k9WuwPAW8oy5jJv76irs+7SvB11T7btmnQA\nAAAAAAzJkHvL78GHJK2u1JP9D2rSbav2mfJ187Cv7RMlfSwioiN96/F6D9WUWVy2RsXrXvJ3Pspv\nIHVPzMOcmrSyfV3Fog0r5gMAAAAAIGmYz9znzuRaHdJ9MyJu7UgyX9LJkt4kaVJErKDU6/yWki7K\naQ6T9KmS4pfN43k1TXim8Hpi4fWyhde95J/YMX8odQMAAAAA0FfDdube9hpKPc5PkHSdpCM700TE\nI0pn9ovzQtL1kna3fa7S4/WOtn1yRDw1XO0dbRFR2tlgPqO/xQg3BwAAAADQIMNy5t72Skodyq0n\n6a+Sdq3peK5O64DAslq8U7un83hCTf5lCq/nluTtNf/cjvlDqRsAAAAAgL7qe3Bve3mlHuinSLpP\n0k4R8ehgyoqIeyT9PU+u37G4dU/7mjVFFJc9XJK31/wPd8wfSN1zI6L2fnsAAAAAAIair8G97WUl\nXSJpK0mPKAX29/WzjoJib/RV76PVq31Iuq0w/y95niRtXJYxl9l6vn1nXwGt6dK8HXXfVpMGAAAA\nAIAh61twb3uCUid42yk9136niPjrEMtcT9KqefKejsW/zuPlJW1dUcQuefyHiHjhUvx8Jv1PeXLn\niryvy2VL0hUVdb/R9isq8rfK7cwLAAAAAEBf9SW4t720pJ9LequkpyTtEhF1z4Bv5ev2/Pdj83ie\npCuLC3LP+zfmySNKyl5T0t558qySss/O431z53+dDs/j6yLi9o5lP5f0rKQVJB1UUvc0pbP+Iemc\nkrIBAAAAAOibIQf3tpdQCpT/WelZ7m+PiOt7zH6V7U/ZnpLLkZPNbZ8n6V9zui9HxJMl+Y/O471s\nn2B7Ui5jI6WrCCZJulvSaSV5T5H0t5zm4pxHtifZPkHSnh11vCD38n9injzB9nsL7Z8q6fS87JyI\nuKm3VQEAAAAAwOD041F4b5C0V369lKTza07I3x8RxUvo11E6O3+spAW2Zyv1Ml/shf4bkr5QVlhE\nXGL7s5K+qHT2/uO2n5a0XE7yuKQ9IuLZkrzzbO+hdNn8FpJuyfVPVDroEZKOjojLKt7LZ5Q6DZwq\n6YeSTrO9UO1e8v8o6YNVKwIAAAAAgH7px2X5xTJeIWm1mmHVjrxHKJ1Vv1HSk0pB+SJJt0v6nqRt\nI+LQiAhViIhjlO5vny5ppqTxSmfrT5I0JSJursl7o1KAflLOM16pv4DpknaOiONr8i6QNE0pgL9W\n6TL9kDRD6RF+29NLPgAAAABgJAz5zH1EXCWp273zVXl/IuknfWjD5ZIuH2TeRyQdloeB5l2kdHn/\nKYOpGwAAAACAfuj7c+4BAAAAAMDIIrgHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYj\nuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAA\nAAAajuAeAAAAAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI\n7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAA\nAICGI7gHAAAAAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiC\newAAAAAAGo7gHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAA\nAKDhCO4BAAAAAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7g\nHgAAAACAhiO4BwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAAAKDhCO4BAAAA\nAGg4gnsAAAAAABqO4B4AAAAAgIYjuAcAAAAAoOEI7gEAAAAAaDiCewAAAAAAGo7gHgAAAACAhiO4\nBwAAAACg4QjuAQAAAABoOIJ7AAAAAAAajuAeAAAAAICGI7gHAAAAAKDhCO4BAAAAAGi4vgT3tte2\n/VHbF9m+z/aztufYvtH28bbX6JJ/aduftD3D9lzbT9m+xvbBtt1D/Tvluh+zPd/2XbZPtL1aD3lX\nz2nvynkfzWXt2EPecbmN1+Q2z7F9g+0jbC/dLT8AAAAAAP2w5FALsP0qSfdKKgbhsyUtK2mTPBxs\ne6+I+HVJ/uUkXSlpyzzrGUkTJG2bh2m23xkRz1fU/2lJx+TJRZLmSlpf0qGS9ra9Q0TcXJF3k1z3\nyoV2ryJpN0m72j46Io6vyLuUpPMlTc2znpO0UNJmeXhXrntuWX4AAAAAAPqlH2ful8jj6ZLeJWml\niFhe0jJKge89klaUdL7t1Uvyn6YU2D8paZqkiTnvAZLmKwXany+r2PZUtQP7r0paIdc9RdIMSatK\nusD2+JK8EyRdqBTY3yBpSs67Yi7Lko61vUvF+z4mv7/5ua3LKB3QmJbfy9aSTqnICwAAAABA3/Qj\nuJ8pafOI2C0ifhoRMyUpIp6LiF+oHQAvJ+mQYkbbm0t6d558X0RcHMnCiPiBpKPyso/ZfmVJ3cfm\n8XkRcXhEzMl136IUZLfO4h9ckvcQSevkNNNyHkXE7Ig4XOmsvCUd15kxH6Q4LE8eGRE/+P/t3Xm0\nLXdZJ/zvw5SE5CZRpgQEAW0hCS8gswLdTRiaDgReCKABaVAMgt0tCCIoukCkmRURuxmbSSE2Cogg\nNshg8yqDgAyLJKA2hinMmuFmIiTP+0fV5u7su8+5N/eee/apez6ftfaqXfWrp+p3ap1z7/7uqvrV\n2Ofu7ncm+bmx7dTx6gAAAAA4YPY73Hf3ed396XXaP5fkI+Ps7ReaHz5OP9/df76k/JVJzstwmf6D\n5xuq6oQktxlnX7hkv19Jcvo4+4gl254te1N3f3VJ+2ybt6uqWyy0nZLkkLFvr1yy77cn+YcMXw48\nfLEdAAAANtJmjZb/nXF69YXl9xin71lW1N0XJ/n/xtkT16g9L8lH19jvu8fpnarqiNnCqtqRXV80\nvHu3qsFHxm0nyeLgerN9f7C7L1mjfvYzLfYbAAAANtQBD/dVdY0kdx1nPzu3vJLccpw9Y51NnDlO\nj19YPps/q7uv2EPt/L6S5LjsGgBw6b7HbX5+D/vem34ftzcj/gMAAMC+2u/R8vfCf05yTIaR7F8/\nt/zIDAPQJck569TP2hYfp3fsQvt6tYv1x66xzoHY9xHj64J11k1VfWKNpluusRwAAACSHOAz9+Ng\ncrMB6f6gu8+caz587v3F62zmonF6xMLyWf3e1C7Wr3LfAAAAsKEO2Jn7qjo2w4jzhyX5RJKnHqh9\nHQy6e3GwwSTfP6N/u03uDgAAABNyQM7cV9UPZhhQ7mZJ/jHJ/ZYMPHfh3PvD1tnctcfpzjXq96Z2\nsX6V+wYAAIANteHhvqqOyjAC/a2SfCnJvbr7G0tWPT+7QvIN19nkrO1rC8vPWWhfr3ax/pw11jkQ\n+97Z3evebw8AAAD7Y0PDfVUdnuRdSe6Q5OsZgv2Xlq3b3Z3krHH2hHU2OxuZ/syF5fOj0a/1c8xq\n5/eVJJ8bl62573Gbs+fbr7Xvven3WeusAwAAAPttw8J9VR2W5B1JfjLDc+3v1d3/uIeyD4zTe6+x\nzUOT3H2cfd8atUclueMa27/POP1od3//UvzxTPrH19t3kjuP215v33cf+7jMbLuLtQAAALChNiTc\nV9W1krw1yT2SnJvkPt293jPgZ04fp7esqvsvaT8tQ8C+OMnb5hvGkfc/Pc4+ZUmfbpjk1HH2jUu2\n/aZx+ohx8L9FvzJOP9Hdn19oe2uSS5McneTnl+z75Axn/Tu7fkYAAAA4IPY73FfV1TME5ftmeJb7\nf+zuv9+b2u7+ZJI3j7Ovq6qTZtusqv+U5Plj24u7+5tLNvHr4/SUqnpBVe0Y64/PcBXBjiRfSPKq\nJbWvSPLFcZ13jjWpqqhq7UYAACAASURBVB1V9YIkD17Yx3y/v57kJePsC6rqkeNxyPgzvHZsO727\nP7On4wAAAAD7YyMehXfXJKeM76+Z5M+qaq11v9zdi5fQn5bkR5LcPslfVNVFSa6e5JCx/Z1JnrFs\nY939rqr6zSS/neHs/ZOq6sIkR46rfDvJA7v70iW1F1fVAzNcNn+7JGdU1fkZnkl/tQxn3X+9u9+z\nxs/yGxkGDTwpyRuSvKqqLs+uUfI/luRxax0IAAAA2CgbcVn+/DYOTXKDdV7XWyzu7vMz3Kf/tAyX\n2XeGS94/kuQXkjygu7+31s67+9kZ7m//iyT/muFLgS8k+f0kt+ruz65T++kMAf33x5pDMowX8BdJ\n7t3dz1un9rIkJ2cI8B8Z+9xJPpXkqUnuZpR8AAAANsN+n7nv7r9Osuap+r3cxnczXIL//D2tu0b9\ne5O8dx9rv57kCePrqtZekeHy/lfsy74BAABgI2z4c+4BAACAzSXcAwAAwMQJ9wAAADBxwj0AAABM\nnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAP\nAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAA\nEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfc\nAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAA\nwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ\n9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAA\nADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBx\nGxLuq2pHVT2gqn67qv6yqr5dVT2+brmH2t6L10P2sI07VNUfV9U5VXVJVX2pql5dVT+6F30/sqqe\nXVVnVdVFVfWdqnrfnvY5V//Qqnr/WHfRuJ1nV9WOvakHAACA/XWNDdrOPZO8bT+38e0kl6/Rdsla\nRVX1qCSvzvCzdJLzk9w4yWOS/HRVPaC7379G7Q8l+WCSm42LdiY5MsmJSU6sqpd19y+us+9XJjlt\nnP3e2M9bJnl6klOr6u7dfc5a9QAAALARNvKy/G8meVeS30ry2H2ov2N3H7PG653LCqrq1klelSHY\nvzHJDbr76CQ3TfJXSQ5P8paqut6S2krypxmC/dlJ7trdO5LsSPKrSa5I8viqOm2xdqx/fIZgf0WS\npyQ5Yqy/a5IvJrl5kjfvw3EAAACAq2Sjwv07uvsG3X2/7n5mhmC9GZ6V5JpJPp7kUd39rSTp7i8m\neXCSLyc5OsnTltQ+MMmdM4TzB3X3h8baS7r7hUl+f7aPqrrWfGFVHZLkmePsS7r7Rd196Vj/oSQP\nynAVwV2r6uQN+lkBAABgqQ0J99291uX0B0xVHZ3kpHH2dxf70N07k7x8nD11PFM/7xHj9L3d/akl\nu3hRhoB+TIbL9OfdK8n1x/bfWSzs7k8mee/CfgAAAOCAmPJo+XfLcNY+Sd6zxjrvHqfHJjluoe0e\nC+tcSXd/NckZ4+xiuJ/VfnZcb719L9YCAADAhtpK4f7NVfWvVXVpVX2lqt5SVfdbZ/3jx+nXu/s7\na6xz5pL1U1XXT3KdcfaMrG1Wf/zC8tn83tRer6quu856AAAAsF82arT8jXDHJBckuSzJjTLcM//g\nqvqTJD/T3d9dWP/YcbrmaPTdfXFVnZvhvvtj55rm3683mv2s7diF5Xvc90LbsRmeBrCmqvrEGk3r\nPkoQAAAAtsKZ+9cnuW+SH+juI7v7iAyX0L92bH9okj9YUnf4OL14D9u/aJwesaR2T/XLavd23xfN\nvV+sBwAAgA2z8jP33f3oJcs+l+TnqurbGR4z9/NV9Tvd/fnN7t9m6e7bL1s+ntG/3SZ3BwAAgAnZ\nCmfu1/NbGc6OV5L7L7RdOE4P28M2rj1Ody6p3VP9stq93fe1594v1gMAAMCG2dLhvrsvTPLZcfbm\nC82ze9pvuFZ9VR2W4X77JPnaktp16+favrawfI/7XmhbrAcAAIANs6XD/R7MRqM/pqqus8Y686Pc\nf3/k/O7+VnYNcHfCOvuY1Z+5sHw2vze13+rudQfTAwAAgP2xpcN9VR2e5Fbj7D8vNP9NhpH1k+Re\na2ziPuP0nCRnLbR9YJzee4193yi7wvv71qg9oaoWR9Jf3PdiLQAAAGyolYb7qqo9rPKbGe5r7yTv\nmm/o7vPmlj2pqq70s4xfDDxunD29u3th228ap/epqtss2feTMtzr/7XsCvMz70vyzQzH78mLheP2\nZl84vHHpTwYAAAAbZMPCfVVdd/ZK8gNzTUfPty2E8DdX1X+rqjtU1bXmtnWLqnpVkqeOi17f3YuX\nxifJMzKcvb9TkteN+05V3STJW5PcJMm5SZ6/pPbtST6a4Ri8raruMtYeUlVPTvLE2T66+7vzhd19\naZJnjrO/XFVPrqpDxvqfSPK2cbt/293vXOOQAQAAwIbYyEfhfWuN5R9emL9ZkrPH99dL8pAkv57k\n8qo6L8khufJz6P80u87AX0l3f7qqTkvy6iSPTPIzVXV+kqPGVS5Mcsp4j/1ibVfVQ5J8cOzTh6tq\nZ5JDs+u4vLy7X7XGvl9WVT+e5LQkL0ry3Kq6NLueaf+FJA9bVgsAAAAbadX33D8nyUuTfCzDZe6H\nZ+jTPyc5Pcl/6O6HjmfKl+ru1yf5iSRvTvKNDJfxfznJa5Lctrvfv07tV5LcduzH5zKE+gsyXIb/\nsO5+/Hqd7+7HJvmpcf2dY/3nkvy3cd/nrFMOAAAAG2LDztx3957un19W854k79mAfX88Q8jel9rz\nkzx9fO1L/ZszfLEAAAAAK7HqM/cAAADAfhLuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAA\nYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKE\newAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAA\nAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg4\n4R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4A\nAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAm\nTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgH\nAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJi4DQn3VbWjqh5QVb9d\nVX9ZVd+uqh5ft9yL+qtV1WOr6sNVdW5VXVBVn6yqp1TVtfai/g5V9cdVdU5VXVJVX6qqV1fVj+5F\n7ZFV9eyqOquqLqqq71TV+6rqIXv5sz+0qt4/1l00bufZVbVjb+oBAABgf11jg7ZzzyRv25fCqrpm\nkj9LctK46LtJLk9y2/H10Ko6sbt3rlH/qCSvzvCzdJLzk9w4yWOS/HRVPaC7379G7Q8l+WCSm42L\ndiY5MsmJSU6sqpd19y+u0/dXJjltnP1ekkuS3DLJ05OcWlV37+5z9nwUAAAAYN9t5GX530zyriS/\nleSxV6Hu2RmC/SVJHp3k2kkOT3Jykn9Jcsckr1hWWFW3TvKqDMH+jUlu0N1HJ7lpkr8at/OWqrre\nktpK8qcZgv3ZSe7a3TuS7Ejyq0muSPL4qjptsXasf3yGYH9FkqckOWKsv2uSLya5eZI3X4XjAAAA\nAPtko8L9O7r7Bt19v+5+ZoZgvUdVdUySJ4yzT+3u13f35T14Z5KfG9tOHYP8omcluWaSjyd5VHd/\nK0m6+4tJHpzky0mOTvK0JbUPTHLnDOH8Qd39obH2ku5+YZLfn+1j8daAqjokyTPH2Zd094u6+9Kx\n/kNJHpThKoK7VtXJe3MsAAAAYF9tSLjv7sv3sfSUJIckOS/JK5ds9+1J/iFJJXn4fFtVHZ1dl/L/\n7mIfxsv4Xz7OnjqeqZ/3iHH63u7+1JK+vShDQD8mw2X68+6V5Ppj++8s6fcnk7x3YT8AAABwQKx6\ntPx7jNMPdvcla6zznnG6GLDvluGs/fw6i949To9Nctwa+353lujuryY5Y419z2o/O6633r4XawEA\nAGBDrTrcHz9Oz1hnnTPH6XELZ99ntV/v7u/soXZ+/VTV9ZNc5yrs+/iF5Vel39erquuusx4AAADs\nl40aLX9fHTtO1xtRftZ2xPi6YG9ru/viqjo3w333x841zb/fm30fu7D8qvR7tv6311k3VfWJNZr2\n+ChBAAAAtrdVn7k/fJxevM46F829P+Iq1s7XL6vd230fsbB8f/oNAAAAG2rVZ+4Zdfftly0fz+jf\nbpO7AwAAwISs+sz9heP0sHXWufbc+51XsXa+flnt3u5758Ly/ek3AAAAbKhVh/vZfek3XGedWdvO\n7r5gbvkea6vqsAz32yfJ15bU7u2+v7aw/Kr0e1k9AAAAbJhVh/vZiPInrLPObGT6s9aoPaaqrpPl\n5ke5//7I+d39rewa4G5v9n3mwvKr0u9vdfe6g+kBAADA/lh1uP/AOL17VR26xjr3HqfvW1j+N0ku\nG9/fa43a+4zTc7L7lwOzfd87S1TVjbIrvC/ue1Z7QlUtjqS/uO/FWgAAANhQqw73b01yaYZL539+\nsbGqTk5yiySd5PT5tu4+L8m7xtknVdXVFmoPT/K4cfb07u6Fzb9pnN6nqm6zpG9PSlIZLqn/wELb\n+5J8M8Pxe/KSft8mu75weOOSbQMAAMCG2bBwX1XXnb2S/MBc09HzbfMhvLu/nuQl4+wLquqRVXX1\ncXsnJXnt2HZ6d39myW6fkeHs/Z2SvG7cd6rqJhm+OLhJknOTPH9J7duTfDTDMXhbVd1lrD2kqp6c\n5ImzfXT3d+cLu/vSJM8cZ3+5qp5cVYeM9T+R5G3jdv+2u9+51jEDAACAjbCRj8L71hrLP7wwf7Mk\nZ8/N/0aSWyU5Kckbkryqqi7PrtHmP5ZdZ+CvpLs/XVWnJXl1kkcm+ZmqOj/JUeMqFyY5ZbzHfrG2\nq+ohST449unDVbUzyaHZdVxe3t2vWmPfL6uqH09yWpIXJXluVV2aXc+0/0KShy2rBQAAgI206svy\n092XJTk5Q4D/SIbL9DvJp5I8NcndFkbJX6x/fZKfSPLmJN/I8Hi6Lyd5TZLbdvf716n9SpLbJnlO\nks9lCPUXZLgM/2Hd/fg99P2xSX5qXH/nWP+5JP9t3Pc565QDAADAhtiwM/fdXftRe0WSV4yvfan/\neIaQvS+15yd5+vjal/o3Z/hiAQAAAFZi5WfuAQAAgP0j3AMAAMDECfcAAAAwccI9AAAATJxwDwAA\nABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn\n3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMA\nAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDE\nCfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcA\nAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAw\nccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9\nAAAATJxwDwAAABMn3AMAAMDECfcAAAAwccI9AAAATJxwDwAAABMn3AMAAMDEbYlwX1WPrqrew2vn\nOvVXq6rHVtWHq+rcqrqgqj5ZVU+pqmvtxf7vUFV/XFXnVNUlVfWlqnp1Vf3oXtQeWVXPrqqzquqi\nqvpOVb2vqh5yVY8DAAAA7ItrrLoDCy5L8i9rtF24bGFVXTPJnyU5aVz03SSXJ7nt+HpoVZ3Y3Uu/\nHKiqRyV5dYZj0UnOT3LjJI9J8tNV9YDufv8atT+U5INJbjYu2pnkyCQnJjmxql7W3b+49o8LAAAA\n+29LnLmf86HuPmaN14+sUfPsDMH+kiSPTnLtJIcnOTnDFwV3TPKKZYVVdeskr8oQ7N+Y5AbdfXSS\nmyb5q3E7b6mq6y2prSR/miHYn53krt29I8mOJL+a5Iokj6+q067yUQAAAICrYKuF+6ukqo5J8oRx\n9qnd/fruvrwH70zyc2PbqWOQX/SsJNdM8vEkj+rubyVJd38xyYOTfDnJ0UmetqT2gUnunCHEP6i7\nPzTWXtLdL0zy+7N97M2tAQAAALCvJh3uk5yS5JAk5yV55WJjd789yT8kqSQPn2+rqqOz61L+3+3u\nyxdqdyZ5+Th76nimft4jxul7u/tTS/r2ogyX+R+T4TJ9AAAAOCCmHu7vMU4/2N2XrLHOe8bpYsC+\nW4az9vPrLHr3OD02yXFr7PvdWaK7v5rkjDX2DQAAABtmq4X7E6rqjKq6eBzx/rNV9eKqutka6x8/\nTs9Yoz1Jzhynxy2cfZ/Vfr27v7OH2vn1U1XXT3Kdq7Dv49dZBwAAAPbLVgv3181whvyiJIcmOSHJ\nE5OcUVUPX7L+seP0nHW2OWs7YnztdW13X5zk3IX1F9/vzb6PXWcdAAAA2C9b5VF45yR5RpK3JPnH\n7v5uVR2S5J5JXpjhzPfrq+or3f3BubrDx+nF62z7orn3RyS54CrUzuqPzpW/GDh87v3e7PuIddZJ\nklTVJ9ZouuWeagEAANjetkS47+73ZOG+9+6+NMm7qupvM4xm/6NJnpfkJze/hwAAALB1bYlwv57u\nPq+qnpPkNUnuUlXX7e5vj80XZjirftg6m7j23Pudc+8vHKfr1c7XL6vdU/2y2qW6+/bLlo9n9G+3\np3oAAAC2r612z/1aPjpOK8n84Hqze9pvuE7trG1nd18wt3yPtVV1WIYvD5Lka0tq93bfX1tnHQAA\nANgvUwn3a5mNRn/COuvMRqo/a43aY6rqOllufpT774+c393fSjK7emBv9n3mOusAAADAfplKuL/z\n3Puz595/YJzevaoOXaP23uP0fQvL/ybJZeP7e61Re59xek52/3Jgtu97Z4mqulF2Bf/FfQMAAMCG\nWXm4X3j2/LL2I5M8bZz9u/Gs+cxbk1ya4dL5n19Se3KSWyTpJKfPt3X3eUneNc4+qaqutlB7eJLH\njbOnd3cvbP5N4/Q+VXWbJV1/UobbCL6WXV8EAAAAwIZbebhP8sNV9ZGqekxV3WS2sKquVVX3TfK3\nSX4syRVJfm2+sLu/nuQl4+wLquqRVXX1sf6kJK8d207v7s8s2fczMpy9v1OS11XVdcfam2T44uAm\nGZ5z//wltW/PMBbA1ZK8raruMtYeUlVPTvLE2T66+7t7fzgAAADgqtkqo+XfeXylqi7JMBr9kUmu\nObZflORx3f3+JbW/keRWSU5K8oYkr6qqy7NrpPqPZdcZ+Cvp7k9X1WlJXp3kkUl+pqrOT3LUuMqF\nSU5ZuFpgVttV9ZAkH8wwyN+Hq2pnkkOz67i+vLtftXeHAAAAAPbNVjhz/40kv5TkzUk+nyHIHzVO\nP57hrPnx3f2Hy4q7+7IkJ2cI8B/JcJl+J/lUkqcmudvCKPmL9a9P8hPj/r+R4dF2X87w6L3brvGF\nwqz2K0lum+Q5ST6XIdRfkOEy/Id19+P36ggAAADAflj5mfvuvjjJS8fXvm7jiiSvGF/7Uv/xJD+1\nj7XnJ3n6+AIAAIBNtxXO3AMAAAD7QbgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPu\nAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAA\nYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKE\newAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAA\nAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg4\n4R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4A\nAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAm\nTrgHAACAiRPuAQAAYOKEewAAAJg44R4AAAAmTrgHAACAiRPuAQAAYOKE+w1QVcdU1Uuq6v9W1SVV\n9Y2qekdV3XPVfQMAAODgJ9zvp6q6dZLPJvmlJDdPcmmS6ya5f5K/qqqnrbB7AAAAbAPC/X6oqsOS\n/HmS6yT5ZJJbdfdRSX4gye8kqSTPqar7rK6XAAAAHOyE+/3zC0l+OMnOJCd39xlJ0t3nd/evJPmz\nDAH/uavrIgAAAAc74X7/PGKcvqm7v7qk/YXj9HZVdYtN6hMAAADbjHC/j6pqR5Lbj7PvXmO1jyQ5\nb3xvcD0AAAAOCOF+3x2X4ZL7JDlj2QrdfUWSz4+zx29GpwAAANh+hPt9d+zc+3PWWW/Wduw66wAA\nAMA+q+5edR8mqaoenuSN4+w1u/t7a6z3xiQPT/Ke7v4P62zvE2s03eawww67+nHHHbdf/T3QPvvV\n8/a80kTd6kZH7VOdY3JljsfuHJPdOSa7c0x255hcmeOxO8dkd47J7hyTK3M8Vuess87KxRdf/C/d\nfZ392Y5wv482MdzfKsNo/Gfve28PKrccp59baS+2Fsdkd47J7hyT3TkmV+Z47M4x2Z1jsjvH5Moc\nj905JrtzTK7spknO7+6b7c9GrrExfdmWLpx7f1iSC9ZY79rjdOd6G+vu26/XzmD2JYjjtYtjsjvH\nZHeOye4ckytzPHbnmOzOMdmdY3JljsfuHJPdOSYHhnvu9938ffY3XGe9WdvXDmBfAAAA2MaE+333\nuSSzexpOWLZCVV0tyez59mduRqcAAADYfoT7fdTdFyT5+Dh77zVWu3OS2egN7zvgnQIAAGBbEu73\nz5vG6SOqatmj7n5lnH6iuz+/pB0AAAD2m3C/f16R5ItJdiR5Z1UdnyRVtaOqXpDkweN6v76i/gEA\nALANeBTefqqq22S45H72TMLzkxyR4YuTTvLr3f28FXUPAACAbUC43wBVdUySX0ty/yQ3yhDw/y7J\ni7vbvfYAAAAcUMI9AAAATJx77gEAAGDihHsAAACYOOEeAAAAJk64BwAAgIkT7gEAAGDihHu2tKra\nUVUPqKrfrqq/rKpvV1WPr1uuun+rUFU3qaonVtU7qupLVXVpVV1QVZ+uqudV1bGr7uNmq6o7jL8j\n/7uq/qmqzhuPy1er6u1V9f+uuo+rVlVHVNWX5/5+Hr3qPm22qnr03M+/1mvnqvu5ClV1i6p6aVV9\nvqouHP+Gzqqq11TVv1t1/zbDXvxuzL+2xTGZqaqrVdXPVtV7q+pbVXVZVZ1bVR+tqqdX1Y5V93Ez\n1eDUqvqrqvrO+P/N2VX16qr6sVX370DY389j4+/QY6vqw+PvzgVV9cmqekpVXWszfoaNtj/HpKru\nUlVPqKo/qqrPVdUVY93zNqv/G21fj0dVHVpVp4x/P5+pqp3j39SXqup/VdW/38QfY/KuseoOwB7c\nM8nbVt2JraKqbpzk7CQ1t/j8JIcnufX4emxVndLdH9j8Hq7Mzyf5hbn5nUmuSHLDJA9I8oCqekuS\nU7v7shX0byt4dpIfWnUntojLkvzLGm0XbmZHtoKq+qUkL0wy+4C9c3x/y/F1RZL/s5rebapv7KH9\nyCSHJfluks8e+O5sDVV17STvSHLi3OLzMhyPO42v06rqxO7+wgq6uKnGIPq/ksy+NP5ekguS/HCS\nxyR5eFX9dHf/+Yq6eKDs8+exqrpmkj9LctK46LtJLk9y2/H10PH3Z2pfru7PZ9T/neSoDezLVrCv\nx+MdSe41N39phv+nbzy+HlZVL+nuJ+5/Fw9+ztwzBd9M8q4kv5XksSvuy6pdfZz+RZKHJvnB7j4q\nybUz/Kf5z0l+IMmfVdUxq+niSnw4yS8nuX2SHd29o7sPS3KTDKElSU5J8rQV9W+lqup2Sf5Lko+u\nui9bxIe6+5g1Xj+y6s5tpqr6hSQvyfBl//OT/PDc38+xSf5Tkg+tsIubZp3fiWO6+5gk/zCu+s7u\n/s4q+7rJfjNDsO8kv5bk6O4+OsmhSU5Ncm6GYPvqlfVwcz0vQ7D/XpInJDmyu38wQwh5S4YvgP64\nqg7Gf0v29fPYszN8RrkkyaMzfGY5PMnJGb5ovWOSV2xkRzfRvh6Ti5P8XZL/nuRnk3xq47u2Evty\nPK6Z5B+T/GqS47r70O4+IsmPJvmTcZ0nVNUvbnRnD0bV3avuA6ypqq7e3ZfPzd80Q4BNhn8APreK\nfq1KVR2V5Kbd/ek12m+Z5JMZPnQ9s7t/azP7t1VV1R8m+ZkkX9iG4e1qGUL9j2f4APX3Y9PPdvfr\nVtWvVRhvRXhtUXvwdQAAC3JJREFUkv/T3f9+tb1ZvfHf0zMyfNB+bHe/aqUd2sKq6rYZ/m1Nkgce\nhGdl11RVX8zwRelruvsxS9ofneHvKhm+cP7XTezepqqq6yf5coYrW57b3b++0H6NDH9TP5bkTd39\niM3v5YGxr5/HxhMNZyc5JMkTuvv3F9ofmOGsfie5bXd/ZsM7f4Dsz2fUJbV/neTfJXl+d0/yRMR+\n/I78ZJKPztfOtVWS92b4gvGfu/vmG93vg40z92xpy/7Qt7PuPm+tYD+2fy7JR8bZ229OrybhY+P0\nhivtxWr81yR3SPKy7v7knlZmW3lChmD/UcF+jx41TmdnpbaTG4zTtf79+MTc+2sf4L6s2onZdfvK\n7y02dvf3krx0nH1wVR2xWR070Pbj89gpGYL9eUleuWS7b89wVUwlefg+d3AF9ucz6sH4+XZff6bu\n/tBatT2chX7DOHuzqvrBfe3fdiHcw8Fndrno1ddda3v5yXH6z+uudZCpqhsl+e0M9xL/xoq7w9Yz\n+yB9+kp7scWNZ2Nnx+pNY4DbTs4epz++Rvvsi+RvdPdXD3x3VuqHx+m53f3NNdaZnZ08NMndDnyX\ntrx7jNMPdvcla6zznnF64hrtbG/zt0H5bLsHwj0cRMYPoXcdZ7fNgE/L1DA6/K2r6r8n+alx8R+s\nsk8r8NIkO5L8Sneft+rObCEnVNUZVXXxOGLzZ6vqxVV1s1V3bLOM9wNff5z95Dhy8zvGkb8vHkdv\nfuF4GfJ29x+z61i9fpUdWZHZVR0/W1VPG28PS1Vdq6p+KsmLM1xS/Sur6uAmmt3Lul7AmB+s+oQD\n2JepOH6cnrHOOmeO0+PGy7Bh3uzpJN9I8u1VdmQKhHs4uPznJMdkGN16230Iraofmj12JcPoxZ9O\n8osZBvH5ze7+Hyvt4CaqqpOTPCjJX3f3H626P1vMdZMcl+SiDGfXTkjyxCRnVNWkLgvdD/9m7v2/\nT/I3Se6fYWCjTnKLDGHtU1W13QPKo8fpp7v7YBn06qr4vQyDflWS5yY5t6rOzTAg2B9nOFP9gG3y\n78wXx+mOqlrr6SPHz73fdo+mXWJ2DM5ZZ51Z2xHjC5J8/wrEx42zr2uDxe2RcA8Hiaq6dYYPXkny\nB9195nrrH6Quz/DN7jcyPGonGUY0fm6GD6fbQlUdnuEqhcsyfOHD4Jwkz0hyqySHdvd1MnyQvF+G\nM0eHJXl9Vf3b1XVx0xw99/4ZGe55vUt3H5nhmJyU4f7yY5O8ZbwqaNsZ7++8/zi77b4wTb5/H+0T\nkzw5w7+nyfAIr9lnyB1JrreCrq3CX2f4dzVJnrLYWFWHJvmluUU7NqFPW93h4/Tidda5aO69cE+S\n71+N+sYMvxNfyq7PuKxDuIeDQFUdm2G02cMyDG701NX2aDW6+2tzj606LMPZxzdkeCTLdjoD+awM\no1u/eJt+ybNUd7+nu5/V3Wd093fHZZd297syjMvwTxkuqX3eKvu5Seb//+8kD+rujyZJd1/R3X+Z\n5OfG9lskefAm92+rODXDAGrfy/Ahc9sZRzv/2yS/k+EY3CbDh+1/k+HReDdP8pqqOug/eHf3N7Lr\nkW3/tap+u6puVFXXHB85+hcZ7suffQlyxSr6CQeJl2a4JP+7SR7u9sK9I9zDxI1nlt6T5GYZnhN6\nv3UGrdk2xoDyD+Ojm343Q9j9w/HRcAet8ZFdT8jwuKZnrbg7kzF+aHjOOHuXqrruKvuzCXbOvf/f\n3f35xRW6+y+y69nu99yUXm09s1Hy/3KdAdQOdm9Icqck/7O7H93dn+nuC7v7n7r7eUl+YVzvV7fJ\nF6hPSfKXGW5T+I0kX8kQPj6RYUC430wyexzguavo4BZz4Tg9bJ115p+ysHPNtdg2quo5GS7HvzzJ\nI7r7b1fcpck4qD/kwsFuHNjo3RkuM/5SknuNZxa4stmjiX48a4/4fLB4SYbBnp6e4RGxR8y/5tY7\nZFx2sD+66qr46DitDF+WHczm73/dLdgvabvxAezLllRVxyW54zi7LS/Jr6rjk9x7nH3xsnW6+w8z\njGZ9tSQnb1LXVmb88vz+GZ6g8M4k/3d8/XmS/5Dk+dl128s/rqKPW8zs35r1HkU7a9vZ3Rcc4P6w\nxVXV0zNcFdRJTuvuP11xlyZlW95DBweD8b7qd2V4hvnXMwT7L622V1vW/OOZfiRXfi7zwWb2qKY3\nrLtW8vLx9cUkNz2QHWJLOjPDJcN7+yX/dhzE6NHj9F+SvGOF/Vil4+ber/co0S8kuU62yb8l3X1F\nhkdI7vYYyfHy/GuOsx/ezH5tUWdmGGRwvas6ZoMQnnXgu8NWVlW/nOTZ4+wTuvu1q+zPFDlzDxNU\nVYdl+LD5kxnOmNyru50hWNv8WViX/LGWO8+9P3tVndgM3X1RdgWPW6yz6qzt7APaoS2mqq6e5GfG\n2dNnYzRsQ/P3jN9knfVmXyo66zqM05Akn+puYTX5wDi9+zjg4DKzq0Petwn9YYuqqsdnuI0ySZ7W\n3S9db32WE+5hYqrqWknemuQeGe7nu093r/f82INaVV19L56LOxvV+Hs5yM+kdPdNu7vWes2t+rPj\nspuuqq+baU+/I1V1ZJKnjbN/193fOvC9WrnZ1R33rardAn5V3S/Jj42z79q0Xm0N98quS4W35SX5\no0/PvT9t2QrjYzevP85+dNk620VV3SbJfxlnD/oBBvfSW5NcmuFWhZ9fbBx/f26R4eqg3a6EYHuo\nqkdl11ONntXdz19lf6ZMuGfLq6rrzl5JfmCu6ej5toN9oLTk+2eT3pTkvhnOkPzH7v771fZq5W6c\n5ONV9XPzzx2uqqtV1W2r6o3Z9YHipd39r0u3wsHuh6vqI1X1mKr6/hnIqrpWVd03w2jgP5bhTOWv\nraqTm+w1GS6ZvXqSt1bVnZLv/+3cN8n/HNf7SLZfuJ8NpHdmd39spT1Zoe7+QoYBW5PkiVX13Kq6\nfpKMY3Y8OsnrxvazM9x3flCrqntU1ZOr6kfH/5NTVUdV1S8keX+SQ5O8ubvfvNKOHgD78nmsu7+e\nYSyYJHlBVT1y7ridlGR22fXp3f2Zzfg5NtK+fkYd/37ma2e3chy2UDepcXH25XhU1SkZ/r+pJC/s\n7mdscrcPKtW9HW+jY0qqam9/SW/W3WcfyL6s2vj87f8zzl6SZL3Hgny5u++4TvtBoapumivfC3pJ\nhkvvdyQ5ZG756zIMzPK9bGNzf08/292vW2VfNtMavycXJjkyuz5UXZTkceMAYdtCVd08w7O7ZwPm\nXZAh7M8+UJ6Z4eqgr+5efXAar+L4eobRvZ/a3S9YcZdWanzU6vty5fvvL8iVn+H+jQxfNn9yM/u2\nCuMXGrNA+r0Mx+LoDMEkGc4+P6q7L9v83h1Y+/p5rKqumeFxvSeNiy7NMAr67N+ZjyW55xQH09uP\nY/K67PoScT2/1d3PvOo9W419OR5V9YXsun1yT4NCP7i7P7SP3dsWDKgH0zL/ze+h42st2+VxeOck\n+akMj+q6U5JjMwzsdEmGEYw/nOS1HqOy7X0jyS8luVuG53RfL8lRGQL+P2YILy/r7i+urIcr0N1f\nqKr/J8OtKw/K8AHriiR/n+RPMlztcuE6mzgYPSxDsL8iyR+tuC8r191fq6rbJ3lskgdneDrLUUnO\nT/JPGZ7t/tJtcitLkvxNkt9L8m8zjDWwI8OgrR/O8LjAd6+wb1tSd182Xn5/WoaBKo/P8CXipzJ8\nGfJ723hcC6782fYGe1j3WgeyIwcDZ+4BAABg4g76e5QBAADgYCfcAwAAwMQJ9wAAADBxwj0AAABM\nnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAP\nAAAAEyfcAwAAwMQJ9wAAADBxwj0AAABMnHAPAAAAEyfcAwAAwMT9/y/pIZ4V/wKvAAAAAElFTkSu\nQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x1083a9358>"
      ]
     },
     "metadata": {
      "image/png": {
       "height": 376,
       "width": 507
      }
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.rcParams['figure.figsize'] = 8, 6\n",
    "plt.rcParams['font.size'] = 12\n",
    "\n",
    "\n",
    "plt.bar(avg_month['Month'].astype('float64'), avg_month['Average'])\n",
    "plt.title('Average Crime Count Throughout the Year')\n",
    "plt.xticks(range(1, 13))\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Judging from the plot below, we can see that the average crime count tends to be higher during the summer. The reason behind this phenomenon might be:\n",
    "\n",
    "- Elevated temperature makes people more irritable, which leads to a rise in aggressive behavior and crime.\n",
    "- During the summer, people tend to leave their windows open more often to cool down or spend more time outside to enjoy outdoor activities. Both of them gives burglars more opportunity to break into people's home."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Question 2\n",
    "\n",
    "- Find the top 10 blocks in crime events in the last 3 years\n",
    "- Find the two beats that are adjacent with the highest correlation in the number of crime events (this will require you looking at the map to determine if the correlated beats are adjacent to each other) over the last 5 years\n",
    "- Determine if the number of crime events is different between Mayors Daly and Emanuel at a granularity of your choice (not only at the city level). Find an explanation of results. Side information: Rahm Emanuel is the current mayor of Chicago, and Richard Daley was his predecessor (left the office in 2011)\n",
    "\n",
    "The following section also contains a small example of how to use the `mapPartition` function."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('1901', '52'), ('1902', '70')]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# suppose our task if to find the maximum temperature of the year 1901, 1902\n",
    "temp = sc.parallelize(['1901,52', '1901,45', '1902,50', '1902,70', '2000,100'])\n",
    "\n",
    "# since map works with a single record, for each input there has to be an output\n",
    "# hence we have to write a separate map and filter function to filter out the record\n",
    "# that does not belong to 1901, 1902\n",
    "(temp.\n",
    " map(lambda x: x.split(',')).\n",
    " filter(lambda x: x[0] in {'1901', '1902'}).\n",
    " reduceByKey(lambda x, y: max(x, y)).\n",
    " collect())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('1901', '52'), ('1902', '70')]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def filter_year(iterable):\n",
    "    # loop through each data point in\n",
    "    # the chunk of data and do whatever\n",
    "    for line in iterable:\n",
    "        year, temperature = line.split(',')\n",
    "        if year in {'1901', '1902'}:\n",
    "            yield year, temperature\n",
    "\n",
    "# by using mapPartition we can use a function to work \n",
    "# with each partition (i.e. chunk of data), thus we \n",
    "# can combine the map and filter logic within each partition\n",
    "(temp.\n",
    " mapPartitions(filter_year).\n",
    " reduceByKey(lambda x, y: max(x, y)).\n",
    " collect())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "2013"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# extract the year from the date column\n",
    "data_path = 'Crimes_-_2001_to_present.csv'\n",
    "df = spark.read.csv(data_path, sep = ',', header = True)\n",
    "\n",
    "split_col = F.split(df['Date'], ' ')\n",
    "df = df.withColumn('Day', split_col.getItem(0))\n",
    "split_col = F.split(df['Day'], '/')\n",
    "df = df.withColumn('Month', split_col.getItem(0).cast('int'))\n",
    "df = df.withColumn('Year', split_col.getItem(2).cast('int'))\n",
    "\n",
    "# obtain the starting year for the notion of last three year\n",
    "unique_years = (df.\n",
    "                select(df['Year']).\n",
    "                distinct().\n",
    "                orderBy('Year').\n",
    "                rdd.map(lambda x: x.Year).\n",
    "                collect())\n",
    "n_years = 3\n",
    "year_threshold = unique_years[-n_years]\n",
    "year_threshold"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Block</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>001XX N STATE ST</td>\n",
       "      <td>1745</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0000X W TERMINAL ST</td>\n",
       "      <td>1340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>008XX N MICHIGAN AVE</td>\n",
       "      <td>1083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>076XX S CICERO AVE</td>\n",
       "      <td>1037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0000X N STATE ST</td>\n",
       "      <td>794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>051XX W MADISON ST</td>\n",
       "      <td>661</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>064XX S DR MARTIN LUTHER KING JR DR</td>\n",
       "      <td>628</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>083XX S STEWART AVE</td>\n",
       "      <td>604</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>046XX W NORTH AVE</td>\n",
       "      <td>571</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>009XX W BELMONT AVE</td>\n",
       "      <td>550</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                 Block  count\n",
       "0                     001XX N STATE ST   1745\n",
       "1                  0000X W TERMINAL ST   1340\n",
       "2                 008XX N MICHIGAN AVE   1083\n",
       "3                   076XX S CICERO AVE   1037\n",
       "4                     0000X N STATE ST    794\n",
       "5                   051XX W MADISON ST    661\n",
       "6  064XX S DR MARTIN LUTHER KING JR DR    628\n",
       "7                  083XX S STEWART AVE    604\n",
       "8                    046XX W NORTH AVE    571\n",
       "9                  009XX W BELMONT AVE    550"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# top 10 blocks in terms of crime events\n",
    "n_top = 10\n",
    "crime_per_block = (df.\n",
    "                   filter(df['Year'] >= year_threshold).\n",
    "                   groupBy('Block').\n",
    "                   count().\n",
    "                   orderBy('count', ascending = False).\n",
    "                   limit(n_top).\n",
    "                   toPandas())\n",
    "crime_per_block"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For part 1, the top 10 blocks in terms of number of crime events are places either in downtown Chicago or the South part of Chicago. This is probably due to the fact that those areas are densely populated, hence increases the chance of crime occurence.\n",
    "\n",
    "---\n",
    "\n",
    "For the problem of finding the two beats that are adjacent with the highest correlation in the number of crime events, I decided to use the [rdd `corr`](https://spark.apache.org/docs/2.1.0/mllib-statistics.html#correlations) function from the mllib package to perform the operation. Even thought there's a [DataFrame `corr`](http://spark.apache.org/docs/2.1.0/api/python/pyspark.sql.html#pyspark.sql.DataFrame.corr) function, I couldn't find an easy way to utilize that API."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(('0111', 2011), 1040),\n",
       " (('0111', 2012), 1445),\n",
       " (('0111', 2013), 1644),\n",
       " (('0111', 2014), 1632),\n",
       " (('0111', 2015), 530),\n",
       " (('0112', 2011), 736),\n",
       " (('0112', 2012), 1471),\n",
       " (('0112', 2013), 1499),\n",
       " (('0112', 2014), 1343),\n",
       " (('0112', 2015), 449)]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# compare correlation across years\n",
    "n_years = 5\n",
    "year_threshold = unique_years[-n_years]\n",
    "\n",
    "def filter_year(iterable):\n",
    "    for beat, year in iterable:\n",
    "        if year >= year_threshold:\n",
    "            yield (beat, year), 1\n",
    "\n",
    "# the sort by key is just to make the output\n",
    "# cleaner, we don't actually need it\n",
    "crime_per_group = (df.\n",
    "                   select(df['Beat'], df['Year']).\n",
    "                   rdd.mapPartitions(filter_year).\n",
    "                   reduceByKey(lambda x, y: x + y).\n",
    "                   sortByKey())\n",
    "crime_per_group.take(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('1613', '2011,517;2012,505;2013,431;2014,470;2015,166;'),\n",
       " ('1422', '2011,1535;2012,1465;2013,1249;2014,945;2015,302;'),\n",
       " ('1411', '2011,958;2012,936;2013,772;2014,721;2015,214;')]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def beat2key(x):\n",
    "    (beat, year), count = x\n",
    "    # choose a random delimiter to separate the\n",
    "    # two values and the next value that will be\n",
    "    # concatenated to it\n",
    "    value = str(year) + ',' + str(count) + ';'\n",
    "    return beat, value\n",
    "\n",
    "\n",
    "# combine the count for each beat together\n",
    "# into one string value\n",
    "crime_per_beat = (crime_per_group.\n",
    "                  map(beat2key).\n",
    "                  reduceByKey(lambda x, y: x + y))\n",
    "crime_per_beat.take(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('2011',\n",
       "  '1613,517;1422,1535;1411,958;0934,1340;1021,1518;0234,1040;0232,1022;2431,684;0114,18;1522,2559;1531,1798;1922,634;0431,1595;1614,680;0411,1534;0612,2272;2023,645;2011,1027;1733,1132;0631,1942;1831,1630;1222,786;2515,1609;1914,14;0533,1442;0123,1266;0225,8;0935,1221;1933,1050;0222,925;1713,824;2422,1487;1112,2095;0333,1125;2433,1195;1233,851;1813,542;0614,1346;2535,1709;2212,1429;1223,1057;1913,793;0922,948;0824,1760;2412,1232;1113,1599;0911,1335;1915,16;0311,882;1022,1366;1224,917;0914,1571;2211,933;1634,1096;0433,1163;0531,1268;2031,554;1623,1055;2512,1681;1013,1268;0322,1504;0122,1949;1232,409;2012,552;0715,1121;1653,286;1214,5;2522,1219;0622,889;1711,850;1421,1114;1722,1178;1732,1258;0213,737;2513,678;0422,1386;0613,1548;1023,895;0725,1677;2525,922;1431,1011;0511,2594;1433,1365;0512,2071;0214,722;1131,913;1132,1714;0231,310;0814,881;1213,859;0132,2183;0221,505;1133,1689;0825,2499;1612,494;0313,1502;0925,652;1125,473;1414,784;1811,915;1215,2;1211,1470;2013,623;1225,6;0811,1158;1014,1268;1934,15;1121,1797;0734,1115;0815,1546;1231,795;1921,15;0623,1730;1011,2029;1423,1067;2234,1587;1911,523;1123,1236;0835,1669;0722,1059;1834,2213;0424,1814;2532,1783;1513,1175;0423,2834;0735,1753;1115,1344;2523,1113;0931,1051;1731,1303;2524,1095;0312,1840;2222,1371;0235,8;1511,1484;0334,1281;1832,1335;1434,1204;1723,1611;1114,998;0223,897;1512,1083;1822,870;2534,1762;1655,101;1412,1012;1935,13;0733,1398;0713,2036;2022,572;1821,1499;0314,1056;0813,1767;0133,587;1134,1529;0212,446;0834,1676;0421,3004;0913,1310;0321,2162;1532,1873;1432,1146;2024,752;0632,1835;0532,1319;1924,1270;0633,1000;1633,1316;1651,430;1632,857;2531,1480;0833,1818;2514,1375;0711,1453;0524,1600;1221,9;0624,2520;1712,1281;1652,181;1523,1794;1654,383;0233,1426;0921,656;1925,24;1124,1114;0111,1040;0723,1536;1135,1716;0611,1856;0732,1152;0121,12;1012,1007;0414,2383;1833,1798;0523,1881;2424,1068;0113,850;0923,1040;2423,810;1024,1238;2223,1470;0522,1987;1424,1377;0821,1203;1611,616;2233,1430;1923,1358;1235,11;1413,876;0324,1859;0513,1772;0224,722;2533,2286;1624,1022;2411,1141;0731,1209;1932,1157;2032,548;1631,749;1031,1449;0124,717;0924,1405;0634,1452;0832,2179;0432,1523;1912,694;0215,14;1122,1792;2521,1526;0131,1331;0323,1758;0932,1204;0812,847;1622,1238;0112,736;1812,959;1212,1250;2432,1066;2221,1104;2033,486;0831,2136;0434,828;0912,1787;1621,342;0714,1266;0621,2243;1034,1078;1033,1038;0211,919;1533,2309;0412,1709;0915,1281;1724,1007;2511,1025;0332,1747;0822,1599;1234,12;1111,1545;0726,1429;0933,990;1823,664;2232,1422;0413,1618;0331,1636;1931,937;0712,1904;1814,861;2413,1514;0823,2574;2213,911;1032,786;1824,1234;1524,1412;0724,1209;')]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_count_per_year(iterable):\n",
    "    for beat, value in iterable:\n",
    "        # filter out the last ';'\n",
    "        year_and_count = value.split(';')[:-1]\n",
    "        if len(year_and_count) == n_years:\n",
    "            year_and_count = [tuple(e.split(',')) for e in year_and_count]\n",
    "            for year, count in year_and_count:\n",
    "                value = beat + ',' + count + ';'\n",
    "                yield year, value\n",
    "\n",
    "# convert the single string value into numeric value and\n",
    "# filter out any beat that does not have all the values,\n",
    "# i.e. does not have records for all the year;\n",
    "# also convert year as key since the correlation function\n",
    "# assumes that each column is a record as oppose to each row\n",
    "count_per_year = (crime_per_beat.\n",
    "                  mapPartitions(get_count_per_year).\n",
    "                  reduceByKey(lambda x, y: x + y))\n",
    "count_per_year.take(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "274"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_unique_beats(count_per_year):\n",
    "    unique_beats = []\n",
    "    _, value = count_per_year\n",
    "    beat_and_count = value.split(';')[:-1]\n",
    "    beat_and_count = [tuple(e.split(',')) for e in beat_and_count]\n",
    "    beat_and_count = sorted(beat_and_count)\n",
    "    for beat, _ in beat_and_count:\n",
    "        unique_beats.append(beat)\n",
    "    \n",
    "    return unique_beats\n",
    "\n",
    "\n",
    "# load all the distinct beat into memory,\n",
    "# feasible since there's only around 300 of them\n",
    "unique_beats = get_unique_beats(count_per_year.first())\n",
    "len(unique_beats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 1.        ,  0.95677052,  0.897641  , ...,  0.7133157 ,\n",
       "         0.71141596,  0.54571851],\n",
       "       [ 0.95677052,  1.        ,  0.87074554, ...,  0.64021917,\n",
       "         0.6539807 ,  0.48406046],\n",
       "       [ 0.897641  ,  0.87074554,  1.        , ...,  0.93041864,\n",
       "         0.94033636,  0.84316419],\n",
       "       ..., \n",
       "       [ 0.7133157 ,  0.64021917,  0.93041864, ...,  1.        ,\n",
       "         0.99396707,  0.97564078],\n",
       "       [ 0.71141596,  0.6539807 ,  0.94033636, ...,  0.99396707,\n",
       "         1.        ,  0.97296516],\n",
       "       [ 0.54571851,  0.48406046,  0.84316419, ...,  0.97564078,\n",
       "         0.97296516,  1.        ]])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyspark.mllib.stat import Statistics\n",
    "\n",
    "\n",
    "def get_beat_count(x):\n",
    "    _, value = x\n",
    "    beat_and_count = value.split(';')[:-1]\n",
    "    beat_and_count = [tuple(e.split(',')) for e in beat_and_count]\n",
    "    beat_and_count = sorted(beat_and_count)\n",
    "    count = [int(count) for _, count in beat_and_count]\n",
    "    return count      \n",
    "\n",
    "\n",
    "raw_count = count_per_year.map(get_beat_count)\n",
    "corr_m = Statistics.corr(raw_count, method = 'pearson')\n",
    "corr_m.flags['WRITEABLE'] = True\n",
    "corr_m"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def most_similar_pair(pairwise, topn = 3, ascending = False):\n",
    "    \"\"\"\n",
    "    Obtain the topn most/least similar pair of the input\n",
    "    similarity matrix.\n",
    "\n",
    "    Paramters\n",
    "    ---------\n",
    "    pairwise : 2d ndarray\n",
    "        Pairwise similarity matrix.\n",
    "\n",
    "    topn : int, default 3\n",
    "        Top N most similar/dissimilar pair's N.\n",
    "\n",
    "    ascending : bool, default False\n",
    "        Whether to find the most similar of dissimilar pair.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    rows, cols : 1d ndarray\n",
    "        Indices of the pair.\n",
    "    \"\"\"\n",
    "    pairwise = pairwise.copy()\n",
    "    if not ascending:\n",
    "        # the item is always most similar to itself\n",
    "        # exclude that from the calculation if\n",
    "        # we're computing the most similiar\n",
    "        np.fill_diagonal(pairwise, 0.)\n",
    "\n",
    "    # flatten the 2d correlation matrix and\n",
    "    # sort it by decreasing order\n",
    "    corr_result = np.ravel(pairwise)\n",
    "    indices = np.argsort(corr_result)\n",
    "    if not ascending:\n",
    "        indices = indices[::-1]\n",
    "\n",
    "    # we then grab the top n elements;\n",
    "    # since correlation matrix is symmetric,\n",
    "    # we take the largest n * 2 and with a stride of 2\n",
    "    largest_indices = indices[:(topn * 2):2]\n",
    "\n",
    "    # obtain the corresponding index in the\n",
    "    # original correlation matrix (e.g. element (3, 2)\n",
    "    # contains the largest value thus it comes first)\n",
    "    rows, cols = np.unravel_index(largest_indices, pairwise.shape)\n",
    "    return rows, cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([ 41, 242, 162, 264,  80,  44, 122, 208,  51, 247, 252, 187, 159,\n",
       "        180, 169, 169, 204, 175,  37,  42,  45, 253,  41,  98, 199,  89,\n",
       "        226,  57, 212, 271, 164,  18,  60,  70, 173, 254,  40,  98, 254,\n",
       "        158,  65, 264,  42, 242, 262,  33, 133,  60, 205,  70,  99,  53,\n",
       "        190, 271,  87,  37, 207, 257,  53, 202, 240,  22, 260,  71,  36,\n",
       "        201,  35, 176, 249, 164,  44,  55,  41,  88, 273,  32, 254,  51,\n",
       "        267, 125,  56,  37, 128, 258, 264, 229, 164, 244,  88, 235, 232,\n",
       "        171, 256, 190,  27,  43, 207,  35, 139, 197]),\n",
       " array([182, 233,  86, 216, 142,  49,  35,  29,  30,  72, 176, 182, 210,\n",
       "        128, 215,  36, 201, 246, 105, 167,  88,  73, 187,  48, 233, 216,\n",
       "        231, 170,  52, 252, 234, 238,  92,  92, 205, 197,  47, 249, 118,\n",
       "         54,  68,  89, 262, 199, 202, 122, 134,  93,  94,  60, 189,  76,\n",
       "        179, 182,  27, 128, 124,  66, 125, 200,  87, 216, 256, 142, 215,\n",
       "        239,  33, 271,  48,  71,  32,  36, 271,  30, 204, 255,  63,  88,\n",
       "        170, 215, 180, 123,  56,  80,  22, 264,  51,  26, 164, 214,  22,\n",
       "         39, 253,  33,  53, 111, 250,  98,   6, 118]))"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "n_largest = 100\n",
    "rows, cols = most_similar_pair(corr_m, topn = n_largest)\n",
    "rows, cols"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "After computing the year-level crime number correlation between all the pairwise beats combination and outputting the top 100 combination, the two adjacent beats that have the highest correlation is listed below (this is done by manually inspecting the top 100 pairs)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>beat1</th>\n",
       "      <th>beat2</th>\n",
       "      <th>corr</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>1121</td>\n",
       "      <td>1122</td>\n",
       "      <td>0.999749</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   beat1 beat2      corr\n",
       "46  1121  1122  0.999749"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results = []\n",
    "for i, j in zip(rows, cols):\n",
    "    result = [unique_beats[i], unique_beats[j], corr_m[i, j]]\n",
    "    results.append(result)\n",
    "\n",
    "df_corr = pd.DataFrame(results, columns = ['beat1', 'beat2', 'corr'])\n",
    "df_corr[ (df_corr['beat1'] == '1121') & (df_corr['beat2'] == '1122') ]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "For part 3, determining if the number of crime events is different between Mayors Daly and Emanuel, we will compute the monthly crime occurences per beat and perform a pair t-test to evaluate whether the difference is significant or not."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+-----+------------+----------+-----+\n",
      "|Beat|Month|EmanuelCount|DaleyCount| Diff|\n",
      "+----+-----+------------+----------+-----+\n",
      "|0332|    4|         465|      1672|-1207|\n",
      "|0412|    4|         478|      1789|-1311|\n",
      "|0413|    8|         434|      1877|-1443|\n",
      "|0434|    7|         253|       859| -606|\n",
      "|0624|    6|         540|      2863|-2323|\n",
      "|1113|    1|         503|      1518|-1015|\n",
      "|1331|    8|          91|      1358|-1267|\n",
      "|1532|    8|         472|      2437|-1965|\n",
      "|1613|    9|         138|       611| -473|\n",
      "|1655|    5|          33|        16|   17|\n",
      "|1814|    7|         326|      1186| -860|\n",
      "|1834|    8|         582|      2654|-2072|\n",
      "|2031|    3|         144|       697| -553|\n",
      "|2211|    7|         240|      1255|-1015|\n",
      "|2423|   11|         144|      1005| -861|\n",
      "|2522|    4|         327|      1346|-1019|\n",
      "|2534|    8|         424|      2220|-1796|\n",
      "|0123|    2|         339|      1154| -815|\n",
      "|0212|    7|         312|      1355|-1043|\n",
      "|0214|    7|         222|       934| -712|\n",
      "+----+-----+------------+----------+-----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df = spark.read.csv(data_path, sep = ',', header = True)\n",
    "split_col = F.split(df['Date'], ' ')\n",
    "df = df.withColumn('Day', split_col.getItem(0))\n",
    "split_col = F.split(df['Day'], '/')\n",
    "df = df.withColumn('Month', split_col.getItem(0).cast('int'))\n",
    "df = df.withColumn('Year', split_col.getItem(2).cast('int'))\n",
    "\n",
    "emanuel = (df.\n",
    "           filter(df['Year'] > 2011).\n",
    "           groupBy('Beat', 'Month').\n",
    "           count().\n",
    "           withColumnRenamed('count', 'EmanuelCount'))\n",
    "\n",
    "daley = (df.\n",
    "         filter(df['Year'] <= 2011).\n",
    "         groupBy('Beat', 'Month').\n",
    "         count().\n",
    "         withColumnRenamed('count', 'DaleyCount'))\n",
    "\n",
    "joined = (emanuel.\n",
    "          join(daley, on = ['Beat', 'Month']).\n",
    "          withColumn('Diff', (F.col('EmanuelCount') - F.col('DaleyCount')).alias('double')))\n",
    "joined.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(-1061.9955960070463, 512.3930046068413, 3406)]"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# compute the pair t-test\n",
    "test_info = (joined.\n",
    "             select(F.mean(F.col('Diff')).alias('Mean'),\n",
    "                    F.stddev(F.col('Diff')).alias('Std'),\n",
    "                    F.count(F.col('Diff')).alias('Count')).\n",
    "             rdd.map(lambda x: (x.Mean, x.Std, x.Count)).\n",
    "             collect())\n",
    "test_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "-120.96001879348958"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# the t value is extremely large,\n",
    "# indicating that there is a difference in crime\n",
    "mean, std, count = test_info[0]\n",
    "standard_error = std / np.sqrt(count)\n",
    "tvalue = mean / standard_error\n",
    "tvalue"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We observe a large pair t-statistic when comparing monthly crime numbers between mayor Daley and Emanuel. This indicates the data provides strong evidence against the null hypothesis in which there is no difference in the number of crimes between the two mayors. And based on the way the number is calculated, a large negative number means there's significantly less number of crimes when Rahm Emanuel (the current mayor of Chicago) is mayor."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Question 3\n",
    "\n",
    "Predict the number of crime events in the next week at the beat level.\n",
    "\n",
    "- Framed the problem as a supervised machine learning problem, thus lagged features were created. i.e. we use week 1, 2, 3's number to predict the number for week 4. We could have framed it as a time series problem, but here we're simply using it as a chance to familarize ourselves with Spark ML's API\n",
    "- Trained a RandomForest model for every beat level. This decision is due to the fact that if we were to only train one model, it would require us to one-hot encode around 300 different beat values, which is often times not ideal as a categorical variable with too many distinct levels often leads to overfitting\n",
    "- For the RandomForest model, 3-fold cross validation and a grid search on the `maxDepth` parameter was performed\n",
    "\n",
    "---\n",
    "\n",
    "A quick intro into the SQL window function:\n",
    "\n",
    "Window functions operate on a set of rows and return a single value for each row from the underlying query. When we use a window function in a query, we define the window using the `OVER()` clause, which has the folloiwing capabilities:\n",
    "\n",
    "- Defines window partitions to form groups of rows. (PARTITION BY clause)\n",
    "- Orders rows within a partition. (ORDER BY clause)\n",
    "\n",
    "e.g.\n",
    "\n",
    "The following query uses the `AVG()` window function with the `PARTITION BY` clause to determine the average car sales for each dealer in Q1:\n",
    "\n",
    "```sql\n",
    "SELECT \n",
    "    emp_name, dealer_id, sales, \n",
    "    AVG(sales) OVER (PARTITION BY dealer_id) AS avgsales \n",
    "FROM \n",
    "    q1_sales;\n",
    "```\n",
    "\n",
    "<img src=\"img/window_query.png\" width=\"40%\" height=\"40%\">\n",
    "\n",
    "For more information, including different types of window function the following link gives a very nice overview. [Drill Documentation: SQL Window Functions Introduction](https://drill.apache.org/docs/sql-window-functions-introduction/)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+---+---+\n",
      "| id|num|\n",
      "+---+---+\n",
      "|  4|9.0|\n",
      "|  3|7.0|\n",
      "|  2|3.0|\n",
      "|  1|5.0|\n",
      "+---+---+\n",
      "\n",
      "+---+---+-------+\n",
      "| id|num|new_col|\n",
      "+---+---+-------+\n",
      "|  1|5.0|    3.0|\n",
      "|  2|3.0|    7.0|\n",
      "|  3|7.0|    9.0|\n",
      "+---+---+-------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from pyspark.sql.window import Window\n",
    "\n",
    "# Reference\n",
    "# ---------\n",
    "# https://stackoverflow.com/questions/34295642/spark-add-new-column-to-dataframe-with-value-from-previous-row\n",
    "d = sc.parallelize([(4, 9.0), (3, 7.0), (2, 3.0), (1, 5.0)]).toDF(['id', 'num'])\n",
    "d.show()\n",
    "\n",
    "# here we define a window to create a lagged feature,\n",
    "# so every row gets shifted up by 1 (determined by the\n",
    "# count parameter of the lag function); remember to drop\n",
    "# the na value since there's no record before id1\n",
    "w = Window().partitionBy().orderBy('id')\n",
    "d = (d.\n",
    "     withColumn('new_col', F.lag(F.col('num'), count = -1).over(w)).\n",
    "     na.drop())\n",
    "d.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Row(ID='10078659', Case Number='HY267429', Date='05/19/2015 11:57:00 PM', Block='010XX E 79TH ST', IUCR='143A', Primary Type='WEAPONS VIOLATION', Description='UNLAWFUL POSS OF HANDGUN', Location Description='STREET', Arrest='true', Domestic='false', Beat='0624', District='006', Ward='8', Community Area='44', FBI Code='15', X Coordinate='1184626', Y Coordinate='1852799', Year='2015', Updated On='05/26/2015 12:42:06 PM', Latitude='41.751242944', Longitude='-87.599004724', Location='(41.751242944, -87.599004724)', DateTime=datetime.datetime(2015, 5, 19, 0, 0), WeekOfYear='21', Time='201521')]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from pyspark.sql.window import Window\n",
    "from pyspark.sql.types import StringType\n",
    "\n",
    "\n",
    "data_path = 'Crimes_-_2001_to_present.csv'\n",
    "df = spark.read.csv(data_path, sep = ',', header = True)\n",
    "\n",
    "\n",
    "def date2weeknumber(date):\n",
    "    \"\"\"\n",
    "    append a 0 in front of single digit week number,\n",
    "    this is used when sorting the week number so that\n",
    "    week 02 will still come before week 10\n",
    "    \"\"\"\n",
    "    if len(date) == 2:\n",
    "        return date\n",
    "    else:\n",
    "        return '0' + date\n",
    "\n",
    "udf_date2weeknumber = F.udf(date2weeknumber, StringType())\n",
    "\n",
    "# convert the Date column to spark timestamp format to retrieve the time,\n",
    "# date formats follow the formats at java.text.SimpleDateFormat\n",
    "#\n",
    "# Reference\n",
    "# ---------\n",
    "# http://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html\n",
    "# https://stackoverflow.com/questions/25006607/how-to-get-day-of-week-in-sparksql\n",
    "timestamp_format = 'MM/dd/yyyy'\n",
    "crime = (df.\n",
    "         withColumn('DateTime', \n",
    "                    F.unix_timestamp(F.col('Date'), timestamp_format).cast('timestamp')).\n",
    "         withColumn('WeekOfYear', F.weekofyear(F.col('DateTime')).cast('string')).\n",
    "         withColumn('WeekOfYear', udf_date2weeknumber(F.col('WeekOfYear'))).\n",
    "         withColumn('Year', F.year(F.col('DateTime')).cast('string')).\n",
    "         withColumn('Time', F.concat(F.col('Year'), F.col('WeekOfYear'))))\n",
    "crime.take(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----+------+-----+\n",
      "|Beat|  Time|label|\n",
      "+----+------+-----+\n",
      "|0111|200101|   35|\n",
      "|0111|200102|   36|\n",
      "|0111|200103|   43|\n",
      "|0111|200104|   29|\n",
      "|0111|200105|   23|\n",
      "|0111|200106|   36|\n",
      "|0111|200107|   21|\n",
      "|0111|200108|   26|\n",
      "|0111|200109|   29|\n",
      "|0111|200110|   32|\n",
      "|0111|200111|   36|\n",
      "|0111|200112|   29|\n",
      "|0111|200113|   34|\n",
      "|0111|200114|   33|\n",
      "|0111|200115|   22|\n",
      "|0111|200116|   28|\n",
      "|0111|200117|   44|\n",
      "|0111|200118|   40|\n",
      "|0111|200119|   32|\n",
      "|0111|200120|   33|\n",
      "+----+------+-----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# 1. don't really need orderBy, it's \n",
    "# just to make the output cleaner;\n",
    "# 2. rename the count column to label to\n",
    "# make it the label column for the modeling part;\n",
    "# 3. we will be using the crime count dataframe a lot\n",
    "# to perform the modeling, thus cache it to store it\n",
    "# in memory\n",
    "crime_count = (crime.\n",
    "               select('Beat', 'Time').\n",
    "               groupby('Beat', 'Time').\n",
    "               count().\n",
    "               withColumnRenamed('count', 'label').\n",
    "               orderBy('Beat', 'Time').\n",
    "               cache())\n",
    "\n",
    "crime_count.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# find all the distinct beat and train the model on each\n",
    "# beat to generate the prediction for next week\n",
    "beats = (crime_count.\n",
    "         select('Beat').\n",
    "         distinct().\n",
    "         rdd.map(lambda x: x.Beat).\n",
    "         collect())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----+\n",
      "|  Time|label|\n",
      "+------+-----+\n",
      "|200101|   33|\n",
      "|200102|   24|\n",
      "|200103|   29|\n",
      "|200104|   22|\n",
      "|200105|   32|\n",
      "|200106|   35|\n",
      "|200107|   32|\n",
      "|200108|   38|\n",
      "|200109|   34|\n",
      "|200110|   40|\n",
      "|200111|   28|\n",
      "|200112|   41|\n",
      "|200113|   29|\n",
      "|200114|   27|\n",
      "|200115|   34|\n",
      "|200116|   34|\n",
      "|200117|   33|\n",
      "|200118|   45|\n",
      "|200119|   36|\n",
      "|200120|   38|\n",
      "+------+-----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# we will use one of the beat to demonstrate the modeling for now\n",
    "beat = '1011'\n",
    "subset = (crime_count.\n",
    "          filter(F.col('Beat') == beat).\n",
    "          drop('Beat'))\n",
    "subset.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def create_lagged_features(spark_df, window_col, feature_col, n_lags):\n",
    "    \"\"\"\n",
    "    Reference\n",
    "    ---------\n",
    "    https://stackoverflow.com/questions/34295642/spark-add-new-column-to-dataframe-with-value-from-previous-row\n",
    "    \"\"\"\n",
    "    w = Window().partitionBy().orderBy(window_col)\n",
    "    lagged = (subset.\n",
    "              withColumn('Lag1', F.lag(F.col(feature_col), count = 1).over(w)).\n",
    "              na.drop())\n",
    "    \n",
    "    for lag in range(1, n_lags):\n",
    "        previous_col = 'Lag' + str(lag)\n",
    "        current_col = 'Lag' + str(lag + 1)\n",
    "        lagged = (lagged.\n",
    "                  withColumn(current_col, F.lag(F.col(previous_col), count = 1).over(w)).\n",
    "                  na.drop())\n",
    "    \n",
    "    # after creating the lag sort the time by descending order\n",
    "    # so it will be easier to project to the future\n",
    "    lagged = lagged.orderBy(window_col, ascending = False)\n",
    "    return lagged"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----+----+----+----+----+\n",
      "|  Time|label|Lag1|Lag2|Lag3|Lag4|\n",
      "+------+-----+----+----+----+----+\n",
      "|201521|    8|  28|  43|  26|  26|\n",
      "|201520|   28|  43|  26|  26|  24|\n",
      "|201519|   43|  26|  26|  24|  34|\n",
      "|201518|   26|  26|  24|  34|  42|\n",
      "|201517|   26|  24|  34|  42|  25|\n",
      "|201516|   24|  34|  42|  25|  24|\n",
      "|201515|   34|  42|  25|  24|  33|\n",
      "|201514|   42|  25|  24|  33|  28|\n",
      "|201513|   25|  24|  33|  28|  29|\n",
      "|201512|   24|  33|  28|  29|  26|\n",
      "|201511|   33|  28|  29|  26|  26|\n",
      "|201510|   28|  29|  26|  26|  29|\n",
      "|201509|   29|  26|  26|  29|  36|\n",
      "|201508|   26|  26|  29|  36|  31|\n",
      "|201507|   26|  29|  36|  31|  33|\n",
      "|201506|   29|  36|  31|  33|  18|\n",
      "|201505|   36|  31|  33|  18|   8|\n",
      "|201504|   31|  33|  18|   8|  30|\n",
      "|201503|   33|  18|   8|  30|  26|\n",
      "|201502|   18|   8|  30|  26|  18|\n",
      "+------+-----+----+----+----+----+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# after creating the lagged features\n",
    "n_lags = 4\n",
    "window_col = 'Time'\n",
    "feature_col = 'label'\n",
    "lagged = create_lagged_features(subset, window_col, feature_col, n_lags)\n",
    "lagged.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from pyspark.ml import Pipeline\n",
    "from pyspark.ml.feature import VectorAssembler\n",
    "from pyspark.ml.evaluation import RegressionEvaluator\n",
    "from pyspark.ml.regression import RandomForestRegressor\n",
    "from pyspark.ml.tuning import CrossValidator, ParamGridBuilder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+-----+----+----+----+----+--------------------+------------------+\n",
      "|  Time|label|Lag1|Lag2|Lag3|Lag4|            features|        prediction|\n",
      "+------+-----+----+----+----+----+--------------------+------------------+\n",
      "|201521|    8|  28|  43|  26|  26|[28.0,43.0,26.0,2...|29.834430178389503|\n",
      "|201520|   28|  43|  26|  26|  24|[43.0,26.0,26.0,2...| 34.39803203817977|\n",
      "|201519|   43|  26|  26|  24|  34|[26.0,26.0,24.0,3...|31.521093155626154|\n",
      "|201518|   26|  26|  24|  34|  42|[26.0,24.0,34.0,4...| 34.41316196440313|\n",
      "|201517|   26|  24|  34|  42|  25|[24.0,34.0,42.0,2...|34.674297044281694|\n",
      "|201516|   24|  34|  42|  25|  24|[34.0,42.0,25.0,2...| 32.26878850183798|\n",
      "|201515|   34|  42|  25|  24|  33|[42.0,25.0,24.0,3...| 35.18136116275889|\n",
      "|201514|   42|  25|  24|  33|  28|[25.0,24.0,33.0,2...| 32.24527325803077|\n",
      "|201513|   25|  24|  33|  28|  29|[24.0,33.0,28.0,2...|31.935435215560865|\n",
      "|201512|   24|  33|  28|  29|  26|[33.0,28.0,29.0,2...|32.834217465625365|\n",
      "|201511|   33|  28|  29|  26|  26|[28.0,29.0,26.0,2...| 31.57906156600725|\n",
      "|201510|   28|  29|  26|  26|  29|[29.0,26.0,26.0,2...|31.375338669773843|\n",
      "|201509|   29|  26|  26|  29|  36|[26.0,26.0,29.0,3...| 31.47505117394048|\n",
      "|201508|   26|  26|  29|  36|  31|[26.0,29.0,36.0,3...| 32.32712515844954|\n",
      "|201507|   26|  29|  36|  31|  33|[29.0,36.0,31.0,3...|32.472301862647015|\n",
      "|201506|   29|  36|  31|  33|  18|[36.0,31.0,33.0,1...| 34.41050384619531|\n",
      "|201505|   36|  31|  33|  18|   8|[31.0,33.0,18.0,8.0]| 32.49777146755035|\n",
      "|201504|   31|  33|  18|   8|  30|[33.0,18.0,8.0,30.0]| 33.98415110131278|\n",
      "|201503|   33|  18|   8|  30|  26|[18.0,8.0,30.0,26.0]| 32.91346389481189|\n",
      "|201502|   18|   8|  30|  26|  18|[8.0,30.0,26.0,18.0]|30.391942175504155|\n",
      "+------+-----+----+----+----+----+--------------------+------------------+\n",
      "only showing top 20 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# assemble all the columns into one single 'features' column and train\n",
    "# a randomforest model (includes cross validation and grid search on the\n",
    "# maxDepth parameter); we will use r squared to pick to best model; when\n",
    "# having a multi-step process, the recommended way is to define every\n",
    "# stage inside a pipeline\n",
    "input_cols = ['Lag' + str(j) for j in range(1, n_lags + 1)]\n",
    "assembler = VectorAssembler(inputCols = input_cols, outputCol = 'features')\n",
    "rf = RandomForestRegressor(numTrees = 30)\n",
    "stages = [assembler, rf]\n",
    "pipeline = Pipeline(stages = stages)\n",
    "param_grid = ParamGridBuilder().addGrid(rf.maxDepth, [5, 6, 7]).build()\n",
    "evaluator = RegressionEvaluator(labelCol = 'label', \n",
    "                                predictionCol = 'prediction', \n",
    "                                metricName = 'r2')\n",
    "rf_grid = CrossValidator(\n",
    "    estimator = pipeline,\n",
    "    estimatorParamMaps = param_grid,\n",
    "    evaluator = evaluator,\n",
    "    numFolds = 3\n",
    ").fit(lagged)\n",
    "lagged_fitted = rf_grid.transform(lagged)\n",
    "lagged_fitted.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.3799936891891459"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "eval_metric = evaluator.evaluate(lagged_fitted)\n",
    "eval_metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+------+----+----+----+------------------+--------------------+-----------------+\n",
      "|  Time|Lag2|Lag3|Lag4|              Lag1|            features|       prediction|\n",
      "+------+----+----+----+------------------+--------------------+-----------------+\n",
      "|201521|  28|  43|  26|29.834430178389503|[29.8344301783895...|35.18939227938604|\n",
      "+------+----+----+----+------------------+--------------------+-----------------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# to predict for next week, the prediction column will now become one of\n",
    "# the latestest lagged feature and everything will also get shifted down\n",
    "# by 1 time slot (i.e. prediction -> Lag1, Lag1 -> Lag2 and so on)\n",
    "next_week = lagged_fitted.limit(1).drop('features', 'label', 'Lag' + str(n_lags))\n",
    "\n",
    "for lag in reversed(range(1, n_lags)):\n",
    "    current_col = 'Lag' + str(lag)\n",
    "    next_col = 'Lag' + str(lag + 1)\n",
    "    next_week = next_week.withColumnRenamed(current_col, next_col)\n",
    "    \n",
    "next_week = next_week.withColumnRenamed('prediction', 'Lag1')\n",
    "\n",
    "next_week_pred = rf_grid.transform(next_week)\n",
    "prediction = (next_week_pred.\n",
    "              rdd.map(lambda x: x.prediction).\n",
    "              collect()[0])\n",
    "next_week_pred.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following section ties everything together: loop through all the beats, create the lagged feature, make the prediction for the next week, report the prediction and the model's performance."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 2/2 [00:16<00:00,  8.46s/it]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>beat</th>\n",
       "      <th>prediction</th>\n",
       "      <th>r2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1011</td>\n",
       "      <td>35.189392</td>\n",
       "      <td>0.379994</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2033</td>\n",
       "      <td>7.075241</td>\n",
       "      <td>0.596205</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   beat  prediction        r2\n",
       "0  1011   35.189392  0.379994\n",
       "1  2033    7.075241  0.596205"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "# switch to loop over the unqiue_beats variable to predict on all the beats\n",
    "results = []\n",
    "for beat in tqdm(['1011', '2033']):\n",
    "    subset = (crime_count.\n",
    "              filter(F.col('Beat') == beat).\n",
    "              drop('Beat'))\n",
    "    \n",
    "    # some beats might not have enough records,\n",
    "    # here we simply leave them out\n",
    "    if subset.count() > 30:\n",
    "\n",
    "        # create lagged column for 4 time period\n",
    "        # e.g. use week 1, 2, 3's number to predict the\n",
    "        # number for week 4\n",
    "        n_lags = 4\n",
    "        window_col = 'Time'\n",
    "        feature_col = 'label'\n",
    "        lagged = create_lagged_features(subset, window_col, feature_col, n_lags)\n",
    "\n",
    "        # tune the max depth of a random forest with a 5 fold cross validation\n",
    "        input_cols = ['Lag' + str(j) for j in range(1, n_lags + 1)]\n",
    "        assembler = VectorAssembler(inputCols = input_cols, outputCol = 'features')\n",
    "        rf = RandomForestRegressor(numTrees = 30)\n",
    "        stages = [assembler, rf]\n",
    "        pipeline = Pipeline(stages = stages)\n",
    "        param_grid = ParamGridBuilder().addGrid(rf.maxDepth, [5, 6, 7]).build()\n",
    "        evaluator = RegressionEvaluator(labelCol = 'label', \n",
    "                                        predictionCol = 'prediction', \n",
    "                                        metricName = 'r2')\n",
    "        rf_grid = CrossValidator(\n",
    "            estimator = pipeline,\n",
    "            estimatorParamMaps = param_grid,\n",
    "            evaluator = evaluator,\n",
    "            numFolds = 3\n",
    "        ).fit(lagged)\n",
    "        lagged_fitted = rf_grid.transform(lagged)\n",
    "\n",
    "        # transform the data to perform the prediction for the next week\n",
    "        next_week = (lagged_fitted.\n",
    "                     limit(1).\n",
    "                     drop('features', 'label', 'Lag' + str(n_lags)))\n",
    "\n",
    "        for lag in reversed(range(1, n_lags)):\n",
    "            current_col = 'Lag' + str(lag)\n",
    "            next_col = 'Lag' + str(lag + 1)\n",
    "            next_week = next_week.withColumnRenamed(current_col, next_col)\n",
    "\n",
    "        next_week = next_week.withColumnRenamed('prediction', 'Lag1')\n",
    "\n",
    "        next_week_pred = rf_grid.transform(next_week)\n",
    "        prediction = (next_week_pred.\n",
    "                      rdd.map(lambda x: x.prediction).\n",
    "                      collect()[0])\n",
    "\n",
    "        eval_metric = evaluator.evaluate(lagged_fitted)\n",
    "\n",
    "        result = beat, prediction, eval_metric\n",
    "        results.append(result)\n",
    "    \n",
    "df_results = pd.DataFrame(results, columns = ['beat', 'prediction', 'r2'])\n",
    "df_results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Question 4\n",
    "\n",
    "Find patterns of crimes with arrest with respect to time of the day, day of the week, and month.\n",
    "\n",
    "Base on the results below, we see crimes mostly happen during the evening, the summer and on weekdays. The rationale behind why summer time has the most crimes was already mentioned in question 1 and the fact that most crime happens during the evening is not that surprising as well as that's when there are less people on the street to help you and the lack of sunlight also gives the criminial an advantage (i.e. you might not notice them approaching or remember how their face looks like).\n",
    "\n",
    "As for why there is a huge drop in the number of crimes during Sunday, one possible reason is people tend to spend the evening at home to get ready for Monday. Thus there's less chance of being a subject of criminal activities."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[Row(ID='10078659', Case Number='HY267429', Date='05/19/2015 11:57:00 PM', Block='010XX E 79TH ST', IUCR='143A', Primary Type='WEAPONS VIOLATION', Description='UNLAWFUL POSS OF HANDGUN', Location Description='STREET', Arrest=True, Domestic='false', Beat='0624', District='006', Ward='8', Community Area='44', FBI Code='15', X Coordinate='1184626', Y Coordinate='1852799', Year='2015', Updated On='05/26/2015 12:42:06 PM', Latitude='41.751242944', Longitude='-87.599004724', Location='(41.751242944, -87.599004724)', DateTime=None)]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data_path = 'Crimes_-_2001_to_present.csv'\n",
    "df = spark.read.csv(data_path, sep = ',', header = True)\n",
    "\n",
    "# convert the Date column to spark timestamp format to retrieve the time,\n",
    "# date formats follow the formats at java.text.SimpleDateFormat\n",
    "#\n",
    "# Reference\n",
    "# ---------\n",
    "# http://docs.oracle.com/javase/6/docs/api/java/text/SimpleDateFormat.html\n",
    "# https://stackoverflow.com/questions/25006607/how-to-get-day-of-week-in-sparksql\n",
    "timestamp_format = 'MM/dd/yyyy HH:mm:ss aa'\n",
    "crime = (df.\n",
    "         withColumn('Arrest', df['Arrest'].cast('boolean')).\n",
    "         filter(F.col('Arrest')).\n",
    "         withColumn('DateTime', \n",
    "                    F.unix_timestamp(F.col('Date'), timestamp_format).cast('timestamp')))\n",
    "crime.take(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Hour</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1133689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>51649</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2.0</td>\n",
       "      <td>41354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>31338</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>21620</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5.0</td>\n",
       "      <td>14717</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6.0</td>\n",
       "      <td>18029</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7.0</td>\n",
       "      <td>23888</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8.0</td>\n",
       "      <td>37567</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9.0</td>\n",
       "      <td>48687</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10.0</td>\n",
       "      <td>67762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11.0</td>\n",
       "      <td>89522</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12.0</td>\n",
       "      <td>94380</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Hour    count\n",
       "0    NaN  1133689\n",
       "1    1.0    51649\n",
       "2    2.0    41354\n",
       "3    3.0    31338\n",
       "4    4.0    21620\n",
       "5    5.0    14717\n",
       "6    6.0    18029\n",
       "7    7.0    23888\n",
       "8    8.0    37567\n",
       "9    9.0    48687\n",
       "10  10.0    67762\n",
       "11  11.0    89522\n",
       "12  12.0    94380"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "crime_per_hour = (crime.\n",
    "                  select(F.hour(F.col('DateTime')).alias('Hour')).\n",
    "                  groupby('Hour').\n",
    "                  count().\n",
    "                  orderBy('Hour').\n",
    "                  toPandas())\n",
    "crime_per_hour.to_csv('crime_per_hour.csv', index = False)\n",
    "crime_per_hour"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Day</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Wednesday</td>\n",
       "      <td>78440</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Tuesday</td>\n",
       "      <td>75405</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>None</td>\n",
       "      <td>1133689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Friday</td>\n",
       "      <td>80250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Thursday</td>\n",
       "      <td>78759</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Saturday</td>\n",
       "      <td>82115</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Monday</td>\n",
       "      <td>69794</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Sunday</td>\n",
       "      <td>75750</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Day    count\n",
       "0  Wednesday    78440\n",
       "1    Tuesday    75405\n",
       "2       None  1133689\n",
       "3     Friday    80250\n",
       "4   Thursday    78759\n",
       "5   Saturday    82115\n",
       "6     Monday    69794\n",
       "7     Sunday    75750"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 'EEEE' stands for the full name of the weekday\n",
    "crime_per_day = (crime.\n",
    "                 select(F.date_format(F.col('DateTime'), 'EEEE').alias('Day')).\n",
    "                 groupby('Day').\n",
    "                 count().\n",
    "                 toPandas())\n",
    "crime_per_day.to_csv('crime_per_day.csv', index = False)\n",
    "crime_per_day"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Month</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NaN</td>\n",
       "      <td>1133689</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.0</td>\n",
       "      <td>45640</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2.0</td>\n",
       "      <td>42593</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3.0</td>\n",
       "      <td>48676</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4.0</td>\n",
       "      <td>45137</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5.0</td>\n",
       "      <td>47924</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6.0</td>\n",
       "      <td>44360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7.0</td>\n",
       "      <td>46000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8.0</td>\n",
       "      <td>46633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9.0</td>\n",
       "      <td>45210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10.0</td>\n",
       "      <td>46118</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11.0</td>\n",
       "      <td>43085</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12.0</td>\n",
       "      <td>39137</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    Month    count\n",
       "0     NaN  1133689\n",
       "1     1.0    45640\n",
       "2     2.0    42593\n",
       "3     3.0    48676\n",
       "4     4.0    45137\n",
       "5     5.0    47924\n",
       "6     6.0    44360\n",
       "7     7.0    46000\n",
       "8     8.0    46633\n",
       "9     9.0    45210\n",
       "10   10.0    46118\n",
       "11   11.0    43085\n",
       "12   12.0    39137"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "crime_per_month = (crime.\n",
    "                   select(F.month(F.col('DateTime')).alias('Month')).\n",
    "                   groupby('Month').\n",
    "                   count().\n",
    "                   orderBy('Month').\n",
    "                   toPandas())\n",
    "crime_per_month.to_csv('crime_per_month.csv', index = False)\n",
    "crime_per_month"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# terminate the spark session\n",
    "spark.stop()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Reference\n",
    "\n",
    "- [Drill Documentation: SQL Window Functions Introduction](https://drill.apache.org/docs/sql-window-functions-introduction/)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  },
  "toc": {
   "nav_menu": {
    "height": "92px",
    "width": "275px"
   },
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": true,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": true
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}