{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder\n",
    "from sklearn.compose import ColumnTransformer\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.model_selection import  cross_validate\n",
    "from sklearn.base import BaseEstimator\n",
    "from sklearn.metrics import (make_scorer, mean_absolute_error)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.21.2'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn import __version__\n",
    "__version__\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from _compute_median import _read_all_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = _read_all_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>address_type</th>\n",
       "      <th>agency</th>\n",
       "      <th>agency_name</th>\n",
       "      <th>bbl</th>\n",
       "      <th>borough</th>\n",
       "      <th>bridge_highway_direction</th>\n",
       "      <th>bridge_highway_name</th>\n",
       "      <th>bridge_highway_segment</th>\n",
       "      <th>city</th>\n",
       "      <th>closed_date</th>\n",
       "      <th>...</th>\n",
       "      <th>resolution_description</th>\n",
       "      <th>road_ramp</th>\n",
       "      <th>status</th>\n",
       "      <th>street_name</th>\n",
       "      <th>taxi_company_borough</th>\n",
       "      <th>taxi_pick_up_location</th>\n",
       "      <th>unique_key</th>\n",
       "      <th>x_coordinate_state_plane</th>\n",
       "      <th>y_coordinate_state_plane</th>\n",
       "      <th>vehicle_type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ADDRESS</td>\n",
       "      <td>DOHMH</td>\n",
       "      <td>Department of Health and Mental Hygiene</td>\n",
       "      <td>5.080220e+09</td>\n",
       "      <td>STATEN ISLAND</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>STATEN ISLAND</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>The Department of Health and Mental Hygiene wi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Open</td>\n",
       "      <td>WOOD AVENUE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43058507</td>\n",
       "      <td>916296.0</td>\n",
       "      <td>126389.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ADDRESS</td>\n",
       "      <td>DOHMH</td>\n",
       "      <td>Department of Health and Mental Hygiene</td>\n",
       "      <td>4.097000e+09</td>\n",
       "      <td>QUEENS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Jamaica</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>The Department of Health and Mental Hygiene wi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Open</td>\n",
       "      <td>87 AVENUE</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43058506</td>\n",
       "      <td>1035684.0</td>\n",
       "      <td>196858.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>INTERSECTION</td>\n",
       "      <td>DOHMH</td>\n",
       "      <td>Department of Health and Mental Hygiene</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BROOKLYN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>BROOKLYN</td>\n",
       "      <td>NaT</td>\n",
       "      <td>...</td>\n",
       "      <td>The Department of Health and Mental Hygiene wi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Open</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43060680</td>\n",
       "      <td>1023962.0</td>\n",
       "      <td>182899.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ADDRESS</td>\n",
       "      <td>DOHMH</td>\n",
       "      <td>Department of Health and Mental Hygiene</td>\n",
       "      <td>4.104670e+09</td>\n",
       "      <td>QUEENS</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Hollis</td>\n",
       "      <td>2019-06-25</td>\n",
       "      <td>...</td>\n",
       "      <td>The Department of Health and Mental Hygiene wi...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Closed</td>\n",
       "      <td>196 STREET</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>43056246</td>\n",
       "      <td>1049383.0</td>\n",
       "      <td>200048.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4 rows × 41 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   address_type agency                              agency_name           bbl  \\\n",
       "0       ADDRESS  DOHMH  Department of Health and Mental Hygiene  5.080220e+09   \n",
       "1       ADDRESS  DOHMH  Department of Health and Mental Hygiene  4.097000e+09   \n",
       "2  INTERSECTION  DOHMH  Department of Health and Mental Hygiene           NaN   \n",
       "3       ADDRESS  DOHMH  Department of Health and Mental Hygiene  4.104670e+09   \n",
       "\n",
       "         borough bridge_highway_direction bridge_highway_name  \\\n",
       "0  STATEN ISLAND                      NaN                 NaN   \n",
       "1         QUEENS                      NaN                 NaN   \n",
       "2       BROOKLYN                      NaN                 NaN   \n",
       "3         QUEENS                      NaN                 NaN   \n",
       "\n",
       "  bridge_highway_segment           city closed_date  ...  \\\n",
       "0                    NaN  STATEN ISLAND         NaT  ...   \n",
       "1                    NaN        Jamaica         NaT  ...   \n",
       "2                    NaN       BROOKLYN         NaT  ...   \n",
       "3                    NaN         Hollis  2019-06-25  ...   \n",
       "\n",
       "                              resolution_description road_ramp  status  \\\n",
       "0  The Department of Health and Mental Hygiene wi...       NaN    Open   \n",
       "1  The Department of Health and Mental Hygiene wi...       NaN    Open   \n",
       "2  The Department of Health and Mental Hygiene wi...       NaN    Open   \n",
       "3  The Department of Health and Mental Hygiene wi...       NaN  Closed   \n",
       "\n",
       "   street_name taxi_company_borough taxi_pick_up_location unique_key  \\\n",
       "0  WOOD AVENUE                  NaN                   NaN   43058507   \n",
       "1    87 AVENUE                  NaN                   NaN   43058506   \n",
       "2          NaN                  NaN                   NaN   43060680   \n",
       "3   196 STREET                  NaN                   NaN   43056246   \n",
       "\n",
       "  x_coordinate_state_plane y_coordinate_state_plane vehicle_type  \n",
       "0                 916296.0                 126389.0          NaN  \n",
       "1                1035684.0                 196858.0          NaN  \n",
       "2                1023962.0                 182899.0          NaN  \n",
       "3                1049383.0                 200048.0          NaN  \n",
       "\n",
       "[4 rows x 41 columns]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head(4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['address_type', 'agency', 'agency_name', 'bbl', 'borough',\n",
       "       'bridge_highway_direction', 'bridge_highway_name',\n",
       "       'bridge_highway_segment', 'city', 'closed_date', 'community_board',\n",
       "       'complaint_type', 'created_date', 'cross_street_1', 'cross_street_2',\n",
       "       'descriptor', 'due_date', 'facility_type', 'incident_address',\n",
       "       'incident_zip', 'intersection_street_1', 'intersection_street_2',\n",
       "       'landmark', 'latitude', 'location', 'location_type', 'longitude',\n",
       "       'open_data_channel_type', 'park_borough', 'park_facility_name',\n",
       "       'resolution_action_updated_date', 'resolution_description', 'road_ramp',\n",
       "       'status', 'street_name', 'taxi_company_borough',\n",
       "       'taxi_pick_up_location', 'unique_key', 'x_coordinate_state_plane',\n",
       "       'y_coordinate_state_plane', 'vehicle_type'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "features = ['complaint_type', 'latitude','longitude', 'created_date']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['time_to_action'] = (data['resolution_action_updated_date'] - data['created_date']) # / pd.np.timedelta64(1, 'M')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_ = data.loc[data.complaint_type.str.contains('Noise'), features + ['time_to_action']]\n",
    "data_ = data_[data_.notnull().all(1)]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data_['time_to_action'] = (data_['time_to_action'].dt.seconds / 3600).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = data_['time_to_action']\n",
    "X = data_.drop('time_to_action', axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "40698"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(X)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Little cleaning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Noise - Commercial', 'Noise - Street/Sidewalk', 'Noise - Vehicle',\n",
       "       'Noise - Residential', 'Noise', 'Noise - Park',\n",
       "       'Noise - House of Worship', 'Collection Truck Noise'], dtype=object)"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X['complaint_type'].unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "proper_names = {\n",
    "    'Noise - Commercial':'commercial', \n",
    "    'Noise - Residential':'residential',\n",
    "    'Noise - Street/Sidewalk':'street',\n",
    "    'Noise - Vehicle':'vehicle', \n",
    "    'Noise - Park':'park',\n",
    "    'Noise':'other', \n",
    "    'Noise - House of Worship':'worship', \n",
    "    'Collection Truck Noise':'truck'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X['complaint_type'] = X['complaint_type'].map(proper_names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>complaint_type</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>created_date</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>commercial</td>\n",
       "      <td>40.717302</td>\n",
       "      <td>-73.949248</td>\n",
       "      <td>2019-06-23 00:00:00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>street</td>\n",
       "      <td>40.837576</td>\n",
       "      <td>-73.889396</td>\n",
       "      <td>2019-06-23 00:00:08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>vehicle</td>\n",
       "      <td>40.833693</td>\n",
       "      <td>-73.913846</td>\n",
       "      <td>2019-06-23 00:00:16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>residential</td>\n",
       "      <td>40.823469</td>\n",
       "      <td>-73.924460</td>\n",
       "      <td>2019-06-23 00:00:25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>street</td>\n",
       "      <td>40.848693</td>\n",
       "      <td>-73.903279</td>\n",
       "      <td>2019-06-23 00:00:28</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   complaint_type   latitude  longitude        created_date\n",
       "7      commercial  40.717302 -73.949248 2019-06-23 00:00:00\n",
       "10         street  40.837576 -73.889396 2019-06-23 00:00:08\n",
       "11        vehicle  40.833693 -73.913846 2019-06-23 00:00:16\n",
       "12    residential  40.823469 -73.924460 2019-06-23 00:00:25\n",
       "13         street  40.848693 -73.903279 2019-06-23 00:00:28"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.head(5)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Feature Generation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "# class TimeTransformer(BaseEstimator):\n",
    "#     cols = None\n",
    "    \n",
    "#     def __init__(self, cols=None):\n",
    "#         self.cols = cols\n",
    "    \n",
    "#     def fit(self, X=None, y=None, groups=None):\n",
    "        \n",
    "#         if self.cols is None:\n",
    "#             self.cols = X.select_dtypes(include=pd.np.datetime64).columns\n",
    "#         return self\n",
    "    \n",
    "#     def transform(self, X, y=None, groups=None, cols=None):\n",
    "        \n",
    "#         for col in self.cols:\n",
    "#             dates = X[col]\n",
    "#             X = X.drop(col, axis=1)\n",
    "#             X[f'{col}_dow'] = dates.dt.dayofweek\n",
    "#             X[f'{col}_doy'] = dates.dt.dayofyear\n",
    "#             X[f'{col}_tod'] = dates.dt.second\n",
    "\n",
    "#         return X\n",
    "\n",
    "from ml import TimeTransformer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "t = TimeTransformer(cols=['created_date'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "# X.select_dtypes(include=pd.np.datetime64)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>complaint_type</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>created_date_dow</th>\n",
       "      <th>created_date_doy</th>\n",
       "      <th>created_date_tod</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>commercial</td>\n",
       "      <td>40.717302</td>\n",
       "      <td>-73.949248</td>\n",
       "      <td>6</td>\n",
       "      <td>174</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>street</td>\n",
       "      <td>40.837576</td>\n",
       "      <td>-73.889396</td>\n",
       "      <td>6</td>\n",
       "      <td>174</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>vehicle</td>\n",
       "      <td>40.833693</td>\n",
       "      <td>-73.913846</td>\n",
       "      <td>6</td>\n",
       "      <td>174</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   complaint_type   latitude  longitude  created_date_dow  created_date_doy  \\\n",
       "7      commercial  40.717302 -73.949248                 6               174   \n",
       "10         street  40.837576 -73.889396                 6               174   \n",
       "11        vehicle  40.833693 -73.913846                 6               174   \n",
       "\n",
       "    created_date_tod  \n",
       "7                  0  \n",
       "10                 8  \n",
       "11                16  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "t.fit(X).transform(X).head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "cats = X['complaint_type'].unique().tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "ct = ColumnTransformer(\n",
    "        transformers=[\n",
    "            ('ordinal', OrdinalEncoder(categories=[cats,]), [0]),\n",
    "            ('time', TimeTransformer(cols=['created_date']), [3])\n",
    "        ], remainder='passthrough')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = RandomForestRegressor(n_estimators=100, random_state=2019)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "pipe = Pipeline(steps=[('preprocessor', ct),\n",
    "                        ('model', model)])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cross-validate"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.\n",
      "[Parallel(n_jobs=3)]: Done   5 out of   5 | elapsed:   27.7s finished\n"
     ]
    }
   ],
   "source": [
    "cv = cross_validate(pipe, X, y, cv=5, scoring=make_scorer(mean_absolute_error),\n",
    "                    verbose=1, n_jobs=3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>fit_time</th>\n",
       "      <th>score_time</th>\n",
       "      <th>test_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13.226047</td>\n",
       "      <td>0.170526</td>\n",
       "      <td>2.843741</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>13.476237</td>\n",
       "      <td>0.211020</td>\n",
       "      <td>3.919784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>13.026504</td>\n",
       "      <td>0.177489</td>\n",
       "      <td>3.015327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>12.111729</td>\n",
       "      <td>0.160368</td>\n",
       "      <td>3.072551</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>12.182463</td>\n",
       "      <td>0.100503</td>\n",
       "      <td>2.752961</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    fit_time  score_time  test_score\n",
       "0  13.226047    0.170526    2.843741\n",
       "1  13.476237    0.211020    3.919784\n",
       "2  13.026504    0.177489    3.015327\n",
       "3  12.111729    0.160368    3.072551\n",
       "4  12.182463    0.100503    2.752961"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(cv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "3.1208729127942547"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(cv)['test_score'].mean()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train and store Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Pipeline(memory=None,\n",
       "         steps=[('preprocessor',\n",
       "                 ColumnTransformer(n_jobs=None, remainder='passthrough',\n",
       "                                   sparse_threshold=0.3,\n",
       "                                   transformer_weights=None,\n",
       "                                   transformers=[('ordinal',\n",
       "                                                  OrdinalEncoder(categories=[['commercial',\n",
       "                                                                              'street',\n",
       "                                                                              'vehicle',\n",
       "                                                                              'residential',\n",
       "                                                                              'other',\n",
       "                                                                              'park',\n",
       "                                                                              'worship',\n",
       "                                                                              'truck']],\n",
       "                                                                 dtype=<class 'numpy.float64'>),\n",
       "                                                  [0]),\n",
       "                                                 ('time',\n",
       "                                                  TimeTransformer(cols=['create...\n",
       "                                   verbose=False)),\n",
       "                ('model',\n",
       "                 RandomForestRegressor(bootstrap=True, criterion='mse',\n",
       "                                       max_depth=None, max_features='auto',\n",
       "                                       max_leaf_nodes=None,\n",
       "                                       min_impurity_decrease=0.0,\n",
       "                                       min_impurity_split=None,\n",
       "                                       min_samples_leaf=1, min_samples_split=2,\n",
       "                                       min_weight_fraction_leaf=0.0,\n",
       "                                       n_estimators=100, n_jobs=None,\n",
       "                                       oob_score=False, random_state=2019,\n",
       "                                       verbose=0, warm_start=False))],\n",
       "         verbose=False)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "11.37"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe.predict(X.head(1))[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "# import joblib\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('./model.pkl', 'wb') as f:\n",
    "    joblib.dump(pipe, f)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Testing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [],
   "source": [
    "singleton = pd.DataFrame([{'complaint_type':'dummy', \n",
    "                           'latitude':1.1111, \n",
    "                           'longitude':1.1111,\n",
    "                           'created_date':pd.to_datetime('2019-01-01')}])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [],
   "source": [
    "BODY = {\n",
    "    'complaint_type': 'residential',\n",
    "    'lat': \"40.636626\",\n",
    "    'lon': \"-73.951694\",\n",
    "    \"date\": \"2019-06-08 00:00:09\"\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "metadata": {},
   "outputs": [],
   "source": [
    "mapping = {\n",
    "    'lon': 'longitude',\n",
    "    'lat': 'latitude',\n",
    "    'date': 'created_date'\n",
    "}\n",
    "\n",
    "dtypes = {\n",
    "    'lon': float,\n",
    "    'lat': float,\n",
    "    'date': pd.to_datetime\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "metadata": {},
   "outputs": [],
   "source": [
    "singleton.loc[0, 'complaint_type'] = BODY['complaint_type']\n",
    "\n",
    "for k, col in mapping.items():\n",
    "    singleton.loc[0, col] = dtypes[k](BODY.get(k, pd.np.nan))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>complaint_type</th>\n",
       "      <th>created_date</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>residential</td>\n",
       "      <td>2019-06-08 00:00:09</td>\n",
       "      <td>40.636626</td>\n",
       "      <td>-73.951694</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  complaint_type        created_date   latitude  longitude\n",
       "0    residential 2019-06-08 00:00:09  40.636626 -73.951694"
      ]
     },
     "execution_count": 71,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "singleton"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "complaint_type            object\n",
       "created_date      datetime64[ns]\n",
       "latitude                 float64\n",
       "longitude                float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 72,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "singleton.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 73,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "complaint_type            object\n",
       "latitude                 float64\n",
       "longitude                float64\n",
       "created_date      datetime64[ns]\n",
       "dtype: object"
      ]
     },
     "execution_count": 73,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "X.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.89"
      ]
     },
     "execution_count": 74,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pipe.predict(singleton[['complaint_type', 'latitude', 'longitude','created_date']])[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 75,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "complaint_type            object\n",
       "latitude                 float64\n",
       "longitude                float64\n",
       "created_date      datetime64[ns]\n",
       "dtype: object"
      ]
     },
     "execution_count": 75,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "singleton[['complaint_type', 'latitude', 'longitude','created_date']].dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}