{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pickle\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from scipy.sparse import hstack\n",
    "\n",
    "import eli5\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.model_selection import TimeSeriesSplit, cross_val_score, GridSearchCV\n",
    "from sklearn.metrics import roc_auc_score\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from matplotlib import pyplot as plt\n",
    "import seaborn as sns\n",
    "from IPython.display import display_html\n",
    "import re"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "PATH_TO_DATA = '/Users/user/Dropbox/ods/alice/'\n",
    "SEED = 17"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prepare_sparse_features(path_to_train, path_to_test, path_to_site_dict,\n",
    "                           vectorizer_params):\n",
    "    times = ['time%s' % i for i in range(1, 11)]\n",
    "    train_df = pd.read_csv(path_to_train,\n",
    "                       index_col='session_id', parse_dates=times)\n",
    "    test_df = pd.read_csv(path_to_test,\n",
    "                      index_col='session_id', parse_dates=times)\n",
    "\n",
    "    # Sort the data by time\n",
    "    train_df = train_df.sort_values(by='time1')\n",
    "    train_df = train_df.loc[(train_df.time1 < '2014-04-16') & (train_df.time1 > '2013-02-12')]\n",
    "\n",
    "    \n",
    "    # Reading site -> id mapping provided by competition organizers \n",
    "    with open(path_to_site_dict, 'rb') as f:\n",
    "        site2id = pickle.load(f)\n",
    "    # Create an inverse id _> site mapping\n",
    "    id2site = {v:k.replace('www.', '') for (k, v) in site2id.items()}\n",
    "    # We treat site with id 0 as \"unknown\"\n",
    "    id2site[0] = 'unknown'\n",
    "    \n",
    "    # Transform data into format which can be fed into TfidfVectorizer\n",
    "    # This time we prefer to represent sessions with site names, not site ids. \n",
    "    # It's less efficient but thus it'll be more convenient to interpret model weights.\n",
    "    sites = ['site%s' % i for i in range(1, 11)]\n",
    "    train_sessions = train_df[sites].fillna(0).astype('int').apply(lambda row: \n",
    "                                                     ' '.join([id2site[i] for i in row]), axis=1).tolist()\n",
    "    test_sessions = test_df[sites].fillna(0).astype('int').apply(lambda row: \n",
    "                                                     ' '.join([id2site[i] for i in row]), axis=1).tolist()\n",
    "        \n",
    "    # We'll tell TfidfVectorizer that we'd like to split data by whitespaces only \n",
    "    # So that it doesn't split by dots (we wouldn't like to have 'mail.google.com' \n",
    "    # To be split into 'mail', 'google' and 'com')\n",
    "    vectorizer = TfidfVectorizer(**vectorizer_params)\n",
    "    X_train = vectorizer.fit_transform(train_sessions)\n",
    "    X_test = vectorizer.transform(test_sessions)\n",
    "    y_train = train_df['target'].astype('int').values\n",
    "    \n",
    "    # We'll need site visit times for further feature engineering\n",
    "    train_times, test_times = train_df[times], test_df[times]\n",
    "    \n",
    "    return X_train, X_test, y_train, vectorizer, train_times, test_times"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 48.1 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "X_train_sites, X_test_sites, y_train, vectorizer, train_times, test_times = prepare_sparse_features(\n",
    "    path_to_train=os.path.join(PATH_TO_DATA, 'train_sessions.csv'),\n",
    "    path_to_test=os.path.join(PATH_TO_DATA, 'test_sessions.csv'),\n",
    "    path_to_site_dict=os.path.join(PATH_TO_DATA, 'site_dic.pkl'),\n",
    "    vectorizer_params={'ngram_range': (1, 5), \n",
    "                       'max_features': 25000,\n",
    "                       'sublinear_tf': True,\n",
    "                       'tokenizer': lambda s: s.split()}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.axes._subplots.AxesSubplot at 0x1ce8977eeb8>"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAD4CAYAAAAtrdtxAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAX/UlEQVR4nO3de7CkdX3n8fdHEO8IyKAI4w7ljpZoraizSMXEKLg4UFFAxMB6mShbY1xw1U12xWgFolIlSdRSV7GwmDgYw2VFwmiNgVmCuqmVy6DIVWVElBEcRkFl11oM+t0/+jdrZ+hz6Pn16TPnOO9XVVc//e3n+fbvgfPMp59Ld6eqkCSpxyN29gAkSYuXISJJ6maISJK6GSKSpG6GiCSp2+47ewDzbd99961ly5bt7GFI0qJy3XXX/biqlmxf3+VCZNmyZWzcuHFnD0OSFpUk3x9V93CWJKmbISJJ6maISJK6GSKSpG6GiCSpmyEiSepmiEiSuhkikqRuhogkqdsu94n1XdH6c4+eaPmjT14/RyOR9NvGPRFJUjdDRJLUzRCRJHUzRCRJ3QwRSVI3Q0SS1M0QkSR1M0QkSd2mFiJJHp3kmiTfTHJzkr9o9YOSXJ3ktiQXJtmj1R/VHm9qzy8b6vWuVv92kpcP1Ve22qYkp01rXSRJo01zT+QB4PCqei5wCLAyyWHAWcCHq2o5cB9wcpv/ZOC+qvrXwIfbfCQ5GDgReDawEvhEkt2S7AZ8HDgKOBg4qc0rSZonUwuRGvjf7eEj262Aw4HPtfpa4Ng2fUx7THv+iCRp9Quq6oGq+h6wCTi03TZV1e1V9UvggjavJGmeTPWcSNtjuB64B9gAfBf4aVU92GbZDBzQpg8A7gRoz/8MeNJwfbtlZqpLkubJVL+Asap+BRySZC/gEuBZo2Zr95nhuZnqowKwRtRIshpYDfC0pz3tYUa9833jk6+YaPnn/fEX5mgkkjS7ebk6q6p+CnwZOAzYK8m28DoQuKtNbwaWArTnnwjcO1zfbpmZ6qNe/5yqWlFVK5YsWTIXqyRJYrpXZy1peyAkeQzwMuBW4Erg1W22VcClbXpde0x7/h+rqlr9xHb11kHAcuAa4Fpgebvaaw8GJ9/XTWt9JEkPNc3DWfsDa9tVVI8ALqqqLya5BbggyfuBbwDntvnPBT6TZBODPZATAarq5iQXAbcADwKntMNkJDkVuAzYDVhTVTdPcX0ErFl75ETLv2nV5XM0EkkLwdRCpKpuAJ43on47gyurtq//X+CEGXqdCZw5or4e8BeTJGkn8RPrkqRuhogkqZshIknqZohIkroZIpKkboaIJKmbISJJ6maISJK6GSKSpG6GiCSpmyEiSepmiEiSuhkikqRuhogkqZshIknqZohIkroZIpKkboaIJKmbISJJ6maISJK6GSKSpG6GiCSpmyEiSepmiEiSuk0tRJIsTXJlkluT3Jzkba1+RpIfJrm+3Y4eWuZdSTYl+XaSlw/VV7bapiSnDdUPSnJ1ktuSXJhkj2mtjyTpoaa5J/Ig8CdV9SzgMOCUJAe35z5cVYe023qA9tyJwLOBlcAnkuyWZDfg48BRwMHASUN9zmq9lgP3ASdPcX0kSdvZfVqNq+pu4O42fX+SW4EDZlnkGOCCqnoA+F6STcCh7blNVXU7QJILgGNav8OBf9/mWQucAZw91+uiXdfRl5zVvez64945hyORFqZ5OSeSZBnwPODqVjo1yQ1J1iTZu9UOAO4cWmxzq81UfxLw06p6cLv6qNdfnWRjko1bt26dgzWSJME8hEiSxwMXA2+vqp8z2FN4OnAIgz2VD26bdcTi1VF/aLHqnKpaUVUrlixZsoNrIEmaydQOZwEkeSSDAPlsVX0eoKq2DD3/KeCL7eFmYOnQ4gcCd7XpUfUfA3sl2b3tjQzPL0maB9O8OivAucCtVfWhofr+Q7MdB9zUptcBJyZ5VJKDgOXANcC1wPJ2JdYeDE6+r6uqAq4EXt2WXwVcOq31kSQ91DT3RF4EvB64Mcn1rfZnDK6uOoTBoac7gDcDVNXNSS4CbmFwZdcpVfUrgCSnApcBuwFrqurm1u+dwAVJ3g98g0FoSZLmyTSvzvonRp+3WD/LMmcCZ46orx+1XLti69Dt65Kk+eEn1iVJ3QwRSVI3Q0SS1G2ql/juKu7+xLsnWn7///iQ00CStCi4JyJJ6maISJK6GSKSpG6GiCSpmyEiSepmiEiSuhkikqRuhogkqZshIknqZohIkroZIpKkboaIJKmbISJJ6maISJK6GSKSpG6GiCSpmyEiSepmiEiSuhkikqRuU/uN9SRLgfOApwC/Bs6pqo8k2Qe4EFgG3AG8pqruSxLgI8DRwC+AP6qqr7deq4D3tNbvr6q1rf4C4NPAY4D1wNuqqqa1Tlr4jrr0pImW/9Ix58/RSKRdwzT3RB4E/qSqngUcBpyS5GDgNOCKqloOXNEeAxwFLG+31cDZAC10TgdeCBwKnJ5k77bM2W3ebcutnOL6SJK2M7U9kaq6G7i7Td+f5FbgAOAY4CVttrXAl4F3tvp5bU/iqiR7Jdm/zbuhqu4FSLIBWJnky8CeVfW1Vj8POBb40rTWSZrEH1x87kTLf/H4k+doJNLcmZdzIkmWAc8Drgae3AJmW9Ds12Y7ALhzaLHNrTZbffOI+qjXX51kY5KNW7dunXR1JEnN1EMkyeOBi4G3V9XPZ5t1RK066g8tVp1TVSuqasWSJUsebsiSpDFNNUSSPJJBgHy2qj7fylvaYSra/T2tvhlYOrT4gcBdD1M/cERdkjRPphYi7Wqrc4Fbq+pDQ0+tA1a16VXApUP1N2TgMOBn7XDXZcCRSfZuJ9SPBC5rz92f5LD2Wm8Y6iVJmgdTO7EOvAh4PXBjkutb7c+ADwAXJTkZ+AFwQntuPYPLezcxuMT3jQBVdW+S9wHXtvneu+0kO/AWfnOJ75fwpLokzatpXp31T4w+bwFwxIj5Czhlhl5rgDUj6huB50wwTEnSBPzEuiSpmyEiSepmiEiSuhkikqRuY4VIkivGqUmSdi2zXp2V5NHAY4F922c0tl1ttSfw1CmPTZK0wD3cJb5vBt7OIDCu4zch8nPg41MclyRpEZg1RKrqI8BHkry1qj42T2OSJC0SY33YsKo+luR3GPyQ1O5D9fOmNC5J0iIwVogk+QzwdOB64FetXAx+uVCStIsa92tPVgAH+9OzkqRh44bITQx+K/3uKY5F0g54xecunmj5L7z6+DkaiXZl44bIvsAtSa4BHthWrKpXTmVUkqRFYdwQOWOag5AkLU7jXp31lWkPRJK0+Ix7ddb9/Ob3y/cAHgn8n6rac1oDkyQtfOPuiTxh+HGSY4FDpzIiSdKi0fXLhlX190lOm+vBaNfz/gtfPtHy7/nDy+ZoJJJ6jHs461VDDx/B4HMjfmZEknZx4+6JvGJo+kHgDuCYOR+NJGlRGfecyBunPRBJ0uIz7o9SHZjkkiT3JNmS5OIkB057cJKkhW3cn8f9G2Adg98VOQD4QqtJknZh44bIkqr6m6p6sN0+DSyZ4rgkSYvAuCHy4ySvS7Jbu70O+MlsCyRZ0w5/3TRUOyPJD5Nc325HDz33riSbknw7ycuH6itbbdPwZcVJDkpydZLbklyYZI/xV1uSNBfGDZE3Aa8BfsTgm3xfDTzcyfZPAytH1D9cVYe023qAJAcDJwLPbst8YltgMfgZ3qOAg4GT2rwAZ7Vey4H7gJPHXBdJ0hwZN0TeB6yqqiVVtR+DUDljtgWq6qvAvWP2Pwa4oKoeqKrvAZsYfCL+UGBTVd1eVb8ELgCOSRLgcOBzbfm1wLFjvpYkaY6MGyL/pqru2/agqu4Fntf5mqcmuaEd7tq71Q4A7hyaZ3OrzVR/EvDTqnpwu/pISVYn2Zhk49atWzuHLUna3rgh8oihf/BJsg99X5lyNoOf2T2EwWGxD25rOWLe6qiPVFXnVNWKqlqxZInXA0jSXBk3CD4I/K8kn2Pwj/VrgDN39MWqasu26SSfAr7YHm4Glg7NeiBwV5seVf8xsFeS3dveyPD8kqR5MtaeSFWdBxwPbAG2Aq+qqs/s6Isl2X/o4XEMfnYXBp9BOTHJo5IcBCwHrgGuBZa3K7H2YHDyfV37rfcrGZzgB1gFXLqj45EkTWbsQ1JVdQtwy7jzJzkfeAmwb5LNwOnAS5IcwmBv5g7gza33zUkuav0fBE6pql+1PqcClwG7AWuq6ub2Eu8ELkjyfuAbwLnjjk2SNDe6vgp+HFV10ojyjP/QV9WZjDhE1i4DXj+ifjv+pokk7VTjnliXJOkhDBFJUjdDRJLUzRCRJHUzRCRJ3QwRSVI3Q0SS1M0QkSR1M0QkSd0MEUlSN0NEktTNEJEkdTNEJEndpvYtvpIWl+MuvrJ72UuOf+kcjkSLiXsikqRuhogkqZshIknqZohIkroZIpKkboaIJKmbISJJ6maISJK6GSKSpG5TC5Eka5Lck+Smodo+STYkua3d793qSfLRJJuS3JDk+UPLrGrz35Zk1VD9BUlubMt8NEmmtS6SpNGmuSfyaWDldrXTgCuqajlwRXsMcBSwvN1WA2fDIHSA04EXAocCp28LnjbP6qHltn8tSdKUTS1EquqrwL3blY8B1rbptcCxQ/XzauAqYK8k+wMvBzZU1b1VdR+wAVjZntuzqr5WVQWcN9RLkjRP5vucyJOr6m6Adr9fqx8A3Dk03+ZWm62+eUR9pCSrk2xMsnHr1q0Tr4QkaWChnFgfdT6jOuojVdU5VbWiqlYsWbKkc4iSpO3Nd4hsaYeiaPf3tPpmYOnQfAcCdz1M/cARdUnSPJrvEFkHbLvCahVw6VD9De0qrcOAn7XDXZcBRybZu51QPxK4rD13f5LD2lVZbxjqJUmaJ1P7Uaok5wMvAfZNspnBVVYfAC5KcjLwA+CENvt64GhgE/AL4I0AVXVvkvcB17b53ltV207Wv4XBFWCPAb7UbpKkeTS1EKmqk2Z46ogR8xZwygx91gBrRtQ3As+ZZIySpMkslBPrkqRFyBCRJHUzRCRJ3QwRSVI3Q0SS1M0QkSR1M0QkSd0MEUlSN0NEktTNEJEkdTNEJEndDBFJUjdDRJLUzRCRJHUzRCRJ3QwRSVI3Q0SS1M0QkSR1M0QkSd0MEUlSN0NEktTNEJEkdTNEJEnddkqIJLkjyY1Jrk+ysdX2SbIhyW3tfu9WT5KPJtmU5IYkzx/qs6rNf1uSVTtjXSRpV7Yz90ReWlWHVNWK9vg04IqqWg5c0R4DHAUsb7fVwNkwCB3gdOCFwKHA6duCR5I0PxbS4axjgLVtei1w7FD9vBq4Ctgryf7Ay4ENVXVvVd0HbABWzvegJWlXtrNCpIDLk1yXZHWrPbmq7gZo9/u1+gHAnUPLbm61meoPkWR1ko1JNm7dunUOV0OSdm2776TXfVFV3ZVkP2BDkm/NMm9G1GqW+kOLVecA5wCsWLFi5DySpB23U/ZEququdn8PcAmDcxpb2mEq2v09bfbNwNKhxQ8E7pqlLkmaJ/MeIkkel+QJ26aBI4GbgHXAtiusVgGXtul1wBvaVVqHAT9rh7suA45Msnc7oX5kq0mS5snOOJz1ZOCSJNte/++q6h+SXAtclORk4AfACW3+9cDRwCbgF8AbAarq3iTvA65t8723qu6dv9WQJM17iFTV7cBzR9R/Ahwxol7AKTP0WgOsmesxSpLGs5Au8ZUkLTKGiCSpmyEiSepmiEiSuhkikqRuO+sT65J+i/3hxd+ZaPkLj3/GHI1E0+aeiCSpmyEiSepmiEiSuhkikqRuhogkqZshIknqZohIkroZIpKkboaIJKmbISJJ6ubXnkjSBG77b1smWn75qU+eo5HsHO6JSJK6uSciacE75/P3TLT86lftN0cjmb4ffejmiZZ/yn9+9r94fM/Hrpio335vfcivlv8LhoikXc4/fnZr97KHv3bJHI5k8fNwliSpmyEiSepmiEiSuhkikqRuiz5EkqxM8u0km5KctrPHI0m7kkV9dVaS3YCPA/8O2Axcm2RdVd3ycMtuPftvu193yVte172sJP02Wex7IocCm6rq9qr6JXABcMxOHpMk7TJSVTt7DN2SvBpYWVX/oT1+PfDCqjp1u/lWA6vbw2cC3x6j/b7Aj+doqHPZa6H3W8hjm+t+C3lsc91vIY9tofdbyGPbkX7/qqoe8iGZRX04C8iI2kNSsarOAc7ZocbJxqpa0TuwafVa6P0W8tjmut9CHttc91vIY1vo/Rby2Oai32I/nLUZWDr0+EDgrp00Fkna5Sz2ELkWWJ7koCR7ACcC63bymCRpl7GoD2dV1YNJTgUuA3YD1lTVZN9e9hs7dPhrHnst9H4LeWxz3W8hj22u+y3ksS30fgt5bBP3W9Qn1iVJO9diP5wlSdqJDBFJUjdDZEiSZya5fuj28yRvn7DnO5LcnOSmJOcnefQEvd7W+tzcM64ka5Lck+Smodo+STYkua3d7z1hvxPa+H6dZIcuG5yh318l+VaSG5JckmSvCfu9r/W6PsnlSZ7a22vouT9NUkn2nXBsZyT54dDf39GT9Gv1t7avBbo5yV9OMLYLh8Z1R5LrJxlbkkOSXNX6bUxy6IT9npvka0luTPKFJHuO2WtpkiuT3Nr+G72t1bu2i1n67fB2MUuvrm1iln5d28T/V1XeRtwYnKj/EYMP2PT2OAD4HvCY9vgi4I86ez0HuAl4LIMLIv4HsHwHe7wYeD5w01DtL4HT2vRpwFkT9nsWgw90fhlYMQfjOxLYvU2fNQfj23No+j8Bn+zt1epLGVzY8X1g3wnHdgbwp51/H6P6vbT9nTyqPd5vknUdev6DwJ9POLbLgaPa9NHAlyfsdy3w+236TcD7xuy1P/D8Nv0E4DvAwb3bxSz9dni7mKVX1zYxS7+ubWLbzT2RmR0BfLeqvj9hn92BxyTZnUEA9H6O5VnAVVX1i6p6EPgKcNyONKiqrwL3blc+BljbptcCx07Sr6purapxvhFg3H6Xt/UFuIrBZ4Em6ffzoYePY8SHU8ft1XwY+K/j9hmjX5cZ+r0F+EBVPdDmGes3ZmcbW5IArwHOn3BsBWzbW3giO7BdzNDvmcBX2/QG4Pgxe91dVV9v0/cDtzJ489e1XczUr2e7mKVX1zYxS7+ubWIbQ2RmJ7IDG8ooVfVD4K+BHwB3Az+rqss7290EvDjJk5I8lsG7t6UPs8w4nlxVd7fx3g0s5B+jfhPwpUmbJDkzyZ3Aa4E/n6DPK4EfVtU3Jx3TkFPboYU1O3JocQbPAH4vydVJvpLk387B+H4P2FJVt03Y5+3AX7X/D38NvGvCfjcBr2zTJ9CxbSRZBjwPuJo52C626zeRWXp1bRPb95tkmzBERsjgg4uvBP77hH32ZvCO5iDgqcDjknR9BXBV3cpg13UD8A/AN4EHZ13ot0iSdzNY389O2quq3l1VS1uvUx9u/hnG81jg3UwQQiOcDTwdOITBm44PTthvd2Bv4DDgvwAXtT2JSZzEhG+umrcA72j/H94BnDthvzcBpyS5jsGhml/uyMJJHg9cDLx9u3fmXeay30y9ereJUf0m2SYMkdGOAr5eVVsm7PMy4HtVtbWq/hn4PPA7vc2q6tyqen5VvZjB7vyk7wYBtiTZH6Ddj3XIYz4lWQX8AfDaagdu58jfMeZhjxGezuDNwTeT3MHgkMLXkzyldzBVtaWqflVVvwY+xeBbqiexGfh8DVwD/JrBl+11aYdkXwVcOOG4AFYx2B5g8GZtonWtqm9V1ZFV9QIGIffdcZdN8kgG/6h+tqq2jal7u5ihX5eZevVuE2OMbYe3CUNktLl6t/UD4LAkj23vAI9gcByyS5L92v3TGGzMczHGdQw2aNr9pXPQc84kWQm8E3hlVf1iDvotH3r4SuBbPX2q6saq2q+qllXVMgb/YD+/qn40wdj2H3p4HINDNJP4e+Dw1vsZwB5M9u2vLwO+VVWbJxwXDM6B/H6bPpwJ3xANbRuPAN4DfHLM5cJgL+jWqvrQ0FNd28Us/XbYTL16t4lZ+k22TezIWfhd4cbg5PdPgCfOUb+/aP9TbgI+Q7tSprPX/wRuYXAo64iO5c9ncJjknxn8o3cy8CTgCgYb8RXAPhP2O65NPwBsAS6bsN8m4E7g+nYb+8qRGfpd3P5f3AB8gcGJxa5e2z1/Bzt2ddaosX0GuLGNbR2w/4T99gD+tq3v14HDJ1lX4NPAH8/R393vAte1v+WrgRdM2O9tDK42+g7wAdq3cYzR63cZnEi+Yehv7Oje7WKWfju8XczSq2ubmKVf1zax7ebXnkiSunk4S5LUzRCRJHUzRCRJ3QwRSVI3Q0SS1M0QkSR1M0QkSd3+H+JXSLslz06hAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Lets draw the distribution of all session start hours\n",
    "session_start_hour = train_times['time1'].apply(lambda ts: ts.hour).values\n",
    "sns.countplot(session_start_hour)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now the same separately for Alice and everybody else."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtMAAAGDCAYAAADpkpxbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3dfZxdZX3v/c9XIqKCBiRQhGg4Nj6gPSLmRk7VimAROJXgA4q3SlQ8aT1gfa5Ye1eqcm5tfTgVLb6wIKBUQMBDtFFIEXxqQQIiT1FJUSESIQjiA7fY4O/+Y19TN2FmMrOy98xk5vN+vfZr9r7WWtf67Zmda3+z9rXXSlUhSZIkafIeNN0FSJIkSVsrw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmNeclOS3J+9r9Zyf53nTXJEkaW5JLk7xuuuuQwDCtOaYNwHclechoy6vq61X1hKmuS5LmsiSvTnJtknuS/CTJSUnmt2XHJ/nMdNcojcUwrTkjySLg2UABh01rMZIkAJK8FfgA8HbgkcB+wGOBVUm2HfK+k8QspC3iC0hzyVHAZcBpwLLRVkiyf5J1fY8XJjk/yYYkP03ysb5lr02yph3pvjDJY4f9BCRpNknyCOBvgDdU1Zer6j+q6ofAS+kF6tcBfwm8LMkvk3ynb/PHJvlmkl8kuSjJzn397pfkX5P8LMl3kuzft+zSJCck+SZwD/Bf2pHxm1pfP0jyiuE/e80WhmnNJUcBZ7bb85PsOt7KSbYBvgj8CFgE7A6c1ZYdTm+AfxGwAPg68NlhFS5Js9QfAtsB5/c3VtUvgS/R+zTxfwFnV9X2VfXUvtX+b+A1wC7AtsDbAJLsDvwz8D5gp9Z+XpIFfdu+ClgO7ABsAD4KHFJVO7Sarh7s09RsZpjWnJDkWfSOcpxTVVcC/05vIB7PvsCjgbdX1a+q6tdV9Y227E+B/7eq1lTVRnqD/d4enZakSdkZuKONo5ta35aP5VNV9f2q+v+Ac4C9W/srgZVVtbKqfltVq4DVwKF9255WVde3/W4Efgs8JclDq2p9VV2/pU9Mc4dhWnPFMuCiqrqjPf4nxpjq0Wch8KMxBvnHAn/fPkL8GXAnEHpHryVJE3MHsHOSeaMs260tH8tP+u7fA2zf7j8WOGJkfG5j9LNafyNuGblTVb8CXgb8GbA+yT8neeLkn4rmqtFevNKskuSh9ObfbZNkZPB9CDA/yVPH3pJbgMckmTdKoL4FOKGqzhx8xZI0Z/wbcC+9KXPnjDQmeThwCL3pdJM9SHEL8Omq+h/jrFP3e1B1IXBhe794H/BJelNMpM3yyLTmgsOB+4C96H0MuDfwJHrznI8aZ7tv0fuY8f1JHp5kuyTPbMs+AbwzyZMBkjwyyRHDegKSNBtV1d30voB4YpKDkzy4nXnpc8A64NPAbcCiSZx14zPAC5I8P8k2bezeP8keo62cZNckh7UAfy/wS3rvGdKEGKY1FyyjN7fu5qr6ycgN+BjwCsb4hKaq7gNeAPw+cDO9gf1lbdnn6Z3K6awkPweuo3cURZI0CVX1t/SOQH8Q+DlwOb2jywdW1b30gjXAT5NcNYH+bgGWtj43tL7eztiZ50HAW4Fb6U3Zew7wP7s+H809qarNryVJkiTpATwyLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHW/VFW3beeedatGjRdJchSZN25ZVX3lFVC6a7jqnkmC1pazbWuL1Vh+lFixaxevXq6S5DkiYtyY+mu4ap5pgtaWs21rjtNA9JkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjqaN90FSJpaa074ypTt60nvOmDK9iVNt5WnHNppu0OPXjngSiRNJY9MS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJakWSjJdkm+leQ7Sa5P8jetfc8klye5McnZSbZt7Q9pj9e25Yv6+npna/9ekuf3tR/c2tYmOW6qn6MkzQSGaUmane4FDqiqpwJ7Awcn2Q/4APCRqloM3AUc3dY/Grirqn4f+EhbjyR7AUcCTwYOBv4hyTZJtgE+DhwC7AW8vK0rSXOKYVqSZqHq+WV7+OB2K+AA4NzWfjpweLu/tD2mLT8wSVr7WVV1b1X9AFgL7Ntua6vqpqr6DXBWW1eS5hTDtCTNUu0I8tXA7cAq4N+Bn1XVxrbKOmD3dn934BaAtvxu4FH97ZtsM1b7pjUsT7I6yeoNGzYM6qlJ0owxb7oLkCQNR1XdB+ydZD7weeBJo63WfmaMZWO1j3Ywph7QUHUycDLAkiVLHrB8Jvj2J17Qabun/dkXBlyJpK2RR6YlaZarqp8BlwL7AfOTjBxI2QO4td1fBywEaMsfCdzZ377JNmO1S9KcYpiWpFkoyYJ2RJokDwWeB6wBLgFe0lZbBlzQ7q9oj2nLv1JV1dqPbGf72BNYDHwLuAJY3M4Osi29LymuGP4zk6SZxWkekjQ77Qac3s668SDgnKr6YpIbgLOSvA/4NnBKW/8U4NNJ1tI7In0kQFVdn+Qc4AZgI3BMmz5CkmOBC4FtgFOr6vqpe3qSNDMYpiVpFqqqa4CnjdJ+E70zcWza/mvgiDH6OgE4YZT2lcDKLS5WkrZiQw3TSd4MvI7el1KuBV5D72jJWcBOwFXAq6rqN0keApwBPB34KfCyqvrhMOuTJGmmOfX0gzpt99plFw24EkkTMbQ500l2B/4cWFJVT6H3MeCRTPKCAZIkSdJMNewvIM4DHtq+Gf4wYD2Tv2CAJEmSNCMNLUxX1Y+BDwI30wvRdwNXMvkLBtyPFwCQJEnSTDHMaR470jvavCfwaODhwCGjrLq5Cwbcv6Hq5KpaUlVLFixYMKhyJUmSpEkb5jSP5wE/qKoNVfUfwPnAHzL5CwZIkiRJM9Iww/TNwH5JHtbmPh9I7zylk71ggCRJkjQjDXPO9OX0vkh4Fb3T4j0IOBl4B/CWdmGAR3H/CwY8qrW/BThuWLVJkiRJgzDU80xX1buBd2/SPOkLBkiSJEkz0bBPjSdJkiTNWoZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlaRZKsjDJJUnWJLk+yRtb+/FJfpzk6nY7tG+bdyZZm+R7SZ7f135wa1ub5Li+9j2TXJ7kxiRnJ9l2ap+lJE2/edNdgCRpKDYCb62qq5LsAFyZZFVb9pGq+mD/ykn2Ao4Engw8GviXJI9viz8O/DGwDrgiyYqqugH4QOvrrCSfAI4GThr6M9NW59DPf6Dztitf+I4BViINnkemJWkWqqr1VXVVu/8LYA2w+zibLAXOqqp7q+oHwFpg33ZbW1U3VdVvgLOApUkCHACc27Y/HTh8OM9GkmYuw7QkzXJJFgFPAy5vTccmuSbJqUl2bG27A7f0bbautY3V/ijgZ1W1cZP2Tfe9PMnqJKs3bNgwoGckSTOHYVqSZrEk2wPnAW+qqp/Tm4bxOGBvYD3woZFVR9m8OrTfv6Hq5KpaUlVLFixY0OEZSNLM5pxpSZqlkjyYXpA+s6rOB6iq2/qWfxL4Ynu4DljYt/kewK3t/mjtdwDzk8xrR6f715ekOcMj05I0C7U5zacAa6rqw33tu/Wt9kLgunZ/BXBkkock2RNYDHwLuAJY3M7csS29LymuqKoCLgFe0rZfBlwwzOckSTORR6YlaXZ6JvAq4NokV7e2vwRenmRvelMyfgj8KUBVXZ/kHOAGemcCOaaq7gNIcixwIbANcGpVXd/6ewdwVpL3Ad+mF94laU4xTEvSLFRV32D0ec0rx9nmBOCEUdpXjrZdVd1E72wfkjRnOc1DkiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI88zLUnaKq3/h3d12m63//mAU2lLUmcemZYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR15OXFJ0+L444+flfuSJM0tHpmWJEmSOjJMS5IkSR0NNUwnmZ/k3CTfTbImyX9LslOSVUlubD93bOsmyUeTrE1yTZJ9hlmbJEmStKWGfWT674EvV9UTgacCa4DjgIurajFwcXsMcAiwuN2WAycNuTZJkiRpiwwtTCd5BPBHwCkAVfWbqvoZsBQ4va12OnB4u78UOKN6LgPmJ9ltWPVJkiRJW2qYR6b/C7AB+FSSbyf5xyQPB3atqvUA7ecubf3dgVv6tl/X2iRJkqQZaZhheh6wD3BSVT0N+BW/m9IxmozSVg9YKVmeZHWS1Rs2bBhMpZIkSVIHwwzT64B1VXV5e3wuvXB928j0jfbz9r71F/Ztvwdw66adVtXJVbWkqpYsWLBgaMVLkiRJmzO0MF1VPwFuSfKE1nQgcAOwAljW2pYBF7T7K4Cj2lk99gPuHpkOIkmSJM1Ew74C4huAM5NsC9wEvIZegD8nydHAzcARbd2VwKHAWuCetq4kSZI0Yw01TFfV1cCSURYdOMq6BRwzzHokSZKkQfIKiJIkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZqFkixMckmSNUmuT/LG1r5TklVJbmw/d2ztSfLRJGuTXJNkn76+lrX1b0yyrK/96Umubdt8NEmm/plK0vQa6uXEJUnTZiPw1qq6KskOwJVJVgGvBi6uqvcnOQ44DngHcAiwuN2eAZwEPCPJTsC7gSVAtX5WVNVdbZ3lwGXASuBg4EtT+Bw1BQ654OWdtvvS0s8OuBJpZjJMS9IsVFXrgfXt/i+SrAF2B5YC+7fVTgcupRemlwJnVFUBlyWZn2S3tu6qqroToAXyg5NcCjyiqv6ttZ8BHI5hWkP0J+ed0nnbL7746AFWIv2O0zwkaZZLsgh4GnA5sGsL2iOBe5e22u7ALX2brWtt47WvG6VdkuYUw7QkzWJJtgfOA95UVT8fb9VR2qpD+6b7X55kdZLVGzZsmEjJkrRVMUxL0iyV5MH0gvSZVXV+a76tTd+g/by9ta8DFvZtvgdw62ba9xil/X6q6uSqWlJVSxYsWLDlT0qSZhjDtCTNQu3MGqcAa6rqw32LVgAjZ+RYBlzQ135UO6vHfsDdbRrIhcBBSXZsZ/44CLiwLftFkv3avo7q60uS5gy/gChJs9MzgVcB1ya5urX9JfB+4JwkRwM3A0e0ZSuBQ4G1wD3AawCq6s4k7wWuaOu9Z+TLiMDrgdOAh9L74qFfPpQ05ximJWkWqqpvMPq8ZoADR1m/gGPG6OtU4NRR2lcDT9mCMiVpq+c0D0mSJKkjw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR1NKEwnuXgibZKkwXL8laSZbd54C5NsBzwM2DnJjkDaokcAjx5ybZI0Zzn+StLWYdwwDfwp8CZ6A/eV/G4w/znw8SHWJUlzneOvJG0Fxg3TVfX3wN8neUNVnThFNUnSnOf4K0lbh80dmQagqk5M8ofAov5tquqMIdUlScLxV5JmugmF6SSfBh4HXA3c15oLcDCXpCFy/JWkmW1CYRpYAuxVVTXMYiRJD+D4K0kz2ETPM30d8HvDLESSNCrHX0mawSZ6ZHpn4IYk3wLuHWmsqsOGUpUkaYTjryTNYBMN08cPswhJ0piOn+4CJEljm+jZPL467EIkSQ/k+CtJM9tEz+bxC3rfHgfYFngw8KuqesSwCpMkOf5K0kw30SPTO/Q/TnI4sO9QKpIk/SfHX0ma2SZ6No/7qar/Axww4FokSZvh+CtJM8tEp3m8qO/hg+id99RznkrSkDn+SoP3gnPP67ztF17y4gFWotlgomfzeEHf/Y3AD4GlA69GkrQpx19JmsEmOmf6NcMuRJL0QI6/kjSzTXSaxx7AicAz6X28+A3gjVW1boi1SdKc5/irrt539vM7bfdXL7twwJVIs9tEv4D4KWAF8Ghgd+ALrU2SNFyOv5I0g000TC+oqk9V1cZ2Ow1YMMS6JEk9jr+SNINNNEzfkeSVSbZpt1cCPx1mYZIkwPFXkma0iYbp1wIvBX4CrAdeAvilGEkaPsdfSZrBJnpqvPcCy6rqLoAkOwEfpDfIS5KGx/FXkmawiR6Z/q8jAzlAVd0JPG04JUmS+jj+StIMNtEw/aAkO448aEdGJnpUW5LUneOvJM1gEx2QPwT8a5Jz6Z3n9KXACUOrSpI0wvFXkmawCR2ZrqozgBcDtwEbgBdV1aeHWZgkqfv4m+TUJLcnua6v7fgkP05ydbsd2rfsnUnWJvlekuf3tR/c2tYmOa6vfc8klye5McnZSbYd1HOWpK3JhD8qrKobgBuGWIskaRQdx9/TgI8BZ2zS/pGq+mB/Q5K9gCOBJ9O7OMy/JHl8W/xx4I+BdcAVSVa0ej7Q+jorySeAo4GTJlmjJG31JjpnWpK0FamqrwF3TnD1pcBZVXVvVf0AWAvs225rq+qmqvoNcBawNEmAA4Bz2/anA4cP9AlI0lbCMC1Jc8uxSa5p00BGvti4O3BL3zrrWttY7Y8CflZVGzdpf4Aky5OsTrJ6w4YNg3wekjQjDD1Mtyt2fTvJF9vjUefZJXlIe7y2LV807NokaY45CXgcsDe9C8B8qLVnlHWrQ/sDG6tOrqolVbVkwQKvgi5p9pmKI9NvBNb0PR6ZZ7cYuIvePDvaz7uq6veBj7T1JEkDUlW3VdV9VfVb4JP0pnFA78jywr5V9wBuHaf9DmB+knmbtEvSnDPUMJ1kD+C/A//YHo83z25pe0xbfmBbX5I0AEl263v4QmDkTB8rgCPbJ4R7AouBbwFXAIvbJ4rb0vuS4oqqKuASepc2B1gGXDAVz0GSZpphn/j/fwN/AezQHo83z+4/5+ZV1cYkd7f17+jvMMlyYDnAYx7zmKEWL0lbqySfBfYHdk6yDng3sH+SvelNyfgh8KcAVXV9knPonTFkI3BMVd3X+jkWuBDYBji1qq5vu3gHcFaS9wHfBk6ZoqcmSTPK0MJ0kj8Bbq+qK5PsP9I8yqo1gWW/a6g6GTgZYMmSJaPO0ZOkua6qXj5K85iBt6pOYJSLwVTVSmDlKO038btpIpI0Zw3zyPQzgcPaRQG2Ax5B70j1/CTz2tHp/nl2I3Pz1rV5eI9k4qd1kiRJkqbc0OZMV9U7q2qPqlpEb57dV6rqFYw9z25Fe0xb/pU2L0+SJEmakabjPNPvAN6SZC29OdEjHzueAjyqtb8FOG6M7SVJkqQZYdhfQASgqi4FLm33R51nV1W/Bo6YinokSZKkQfAKiJIkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOjJMS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjuZNdwEajmee+Mwp29c33/DNKduXJEnSTOKRaUmSJKkjw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHXrRFkiSpgxeed0mn7T7/4ucOuBJNJ49MS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlaRZKcmqS25Nc19e2U5JVSW5sP3ds7Uny0SRrk1yTZJ++bZa19W9Msqyv/elJrm3bfDRJpvYZStLMYJiWpNnpNODgTdqOAy6uqsXAxe0xwCHA4nZbDpwEvfANvBt4BrAv8O6RAN7WWd633ab7kqQ5wTAtSbNQVX0NuHOT5qXA6e3+6cDhfe1nVM9lwPwkuwHPB1ZV1Z1VdRewCji4LXtEVf1bVRVwRl9fkjSnGKYlae7YtarWA7Sfu7T23YFb+tZb19rGa183SrskzTmGaUnSaPOdq0P7AztOlidZnWT1hg0btqBESZqZDNOSNHfc1qZo0H7e3trXAQv71tsDuHUz7XuM0v4AVXVyVS2pqiULFiwYyJOQpJnEMC1Jc8cKYOSMHMuAC/raj2pn9dgPuLtNA7kQOCjJju2LhwcBF7Zlv0iyXzuLx1F9fUnSnDJvuguQJA1eks8C+wM7J1lH76wc7wfOSXI0cDNwRFt9JXAosBa4B3gNQFXdmeS9wBVtvfdU1ciXGl9P74whDwW+1G6SNOcYpiVpFqqql4+x6MBR1i3gmDH6ORU4dZT21cBTtqRGSZoNnOYhSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLUkWFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR1ZJiWJEmSOhpamE6yMMklSdYkuT7JG1v7TklWJbmx/dyxtSfJR5OsTXJNkn2GVZskSZI0CMM8Mr0ReGtVPQnYDzgmyV7AccDFVbUYuLg9BjgEWNxuy4GThlibJEmStMWGFqaran1VXdXu/wJYA+wOLAVOb6udDhze7i8Fzqiey4D5SXYbVn2SJEnSlpqSOdNJFgFPAy4Hdq2q9dAL3MAubbXdgVv6NlvX2jbta3mS1UlWb9iwYZhlS5IkSeMaephOsj1wHvCmqvr5eKuO0lYPaKg6uaqWVNWSBQsWDKpMSZIkadLmDbPzJA+mF6TPrKrzW/NtSXarqvVtGsftrX0dsLBv8z2AW4dZn4bvq3/0nCnb13O+9tUp25ckSRIM92weAU4B1lTVh/sWrQCWtfvLgAv62o9qZ/XYD7h7ZDqIJEmSNBMN88j0M4FXAdcmubq1/SXwfuCcJEcDNwNHtGUrgUOBtcA9wGuGWJskSZK0xYYWpqvqG4w+DxrgwFHWL+CYYdUjSZIkDZpXQJQkSZI6MkxLkiRJHQ31bB6SNNOd87l9p2xfLz3iW1O2L0nS1PDItCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLUkWFakiRJ6sgwLUlzTJIfJrk2ydVJVre2nZKsSnJj+7lja0+SjyZZm+SaJPv09bOsrX9jkmXT9XwkaToZpiVpbnpuVe1dVUva4+OAi6tqMXBxewxwCLC43ZYDJ0EvfAPvBp4B7Au8eySAS9JcYpiWJAEsBU5v908HDu9rP6N6LgPmJ9kNeD6wqqrurKq7gFXAwVNdtCRNN8O0JM09BVyU5Moky1vbrlW1HqD93KW17w7c0rftutY2Vvv9JFmeZHWS1Rs2bBjw05Ck6TdvuguQJE25Z1bVrUl2AVYl+e4462aUthqn/f4NVScDJwMsWbLkAcslaWvnkWlJmmOq6tb283bg8/TmPN/Wpm/Qft7eVl8HLOzbfA/g1nHaJWlOMUxL0hyS5OFJdhi5DxwEXAesAEbOyLEMuKDdXwEc1c7qsR9wd5sGciFwUJId2xcPD2ptkjSnOM1DkuaWXYHPJ4Hee8A/VdWXk1wBnJPkaOBm4Ii2/krgUGAtcA/wGoCqujPJe4Er2nrvqao7p+5pSNLMYJiWpDmkqm4CnjpK+0+BA0dpL+CYMfo6FTh10DVK0tbEaR6SJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLU0bzpLkCSJGkue9l53++03dkvfvyAK1EXHpmWJEmSOjJMS5IkSR0ZpiVJkqSODNOSJElSR4ZpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdGaYlSZKkjgzTkiRJUkeGaUmSJKkjw7QkSZLUkWFakiRJ6mjedBcgSZKk2efGj93WabvFx+464EqGyyPTkiRJUkez5sj0099+xpTu78q/O2pK96fZ4YRXvmTK9vWuz5w7ZfuSJGlYfvLh6ztt93tvefKAKxndrAnTkiRJc9nJ59/eabvlL9plwJXMTLefeHGn7XZ5w4HjLneahyRJktSRR6YlSZL0n75y5oZO2x3wigUDrmTr4JFpSZIkqSPDtCRJktSRYVqSJEnqyDAtSZIkdWSYliRJkjoyTEuSJEkdzagwneTgJN9LsjbJcdNdjyRpfI7bkua6GXOe6STbAB8H/hhYB1yRZEVV3TC9lUmSRtN13N5w0mc67W/B61/ZaTtJGqYZE6aBfYG1VXUTQJKzgKWAYVpb7GNv/cKU7evYD71gyvYlTTPHbUlz3kwK07sDt/Q9Xgc8Y5pq6ezm9/zBlO3rMX997ZTtS5JGMSvGbUnaEqmq6a4BgCRHAM+vqte1x68C9q2qN2yy3nJgeXv4BOB7W7jrnYE7trCPQbCOmVUDWMemrOP+trSOx1bVVn3t3YmM2x3G7EH9fe1n7vUzk2qxn9nZz6jj9kw6Mr0OWNj3eA/g1k1XqqqTgZMHtdMkq6tqyaD6s47ZUYN1WMfWUsc02+y4Pdkxe1C/V/uZe/3MpFrsZ271M5PO5nEFsDjJnkm2BY4EVt9/S8kAAAusSURBVExzTZKksTluS5rzZsyR6aramORY4EJgG+DUqrp+msuSJI3BcVuSZlCYBqiqlcDKKd7twKaMbCHr+J2ZUANYx6as4/5mSh3Tagjj9qB+r/Yz9/qZSbXYzxzqZ8Z8AVGSJEna2sykOdOSJEnSVmXOhukkb0xyXZLrk7xpivd9apLbk1zX13ZEq+W3SYZ+hoAxavi7JN9Nck2SzyeZP011vLfVcHWSi5I8ejrq6Fv2tiSVZOfpqCPJ8Ul+3H4fVyc5dDrqaO1vaJeOvj7J3051DUnO7vs9/DDJ1cOsYZw69k5yWatjdZJ9h13HbJfkCX1/26uT/Lzr2Jzkze01el2SzybZrmM/nd4nxnjN7JRkVZIb288dO/Yz6feKQYz3gxqrBzXWDmqsHNRYN6jxalDjzRj9PDXJvyW5NskXkjxiM30sTHJJkjXt9/DG1j6p1/I4/UzqtTxOP5N9LY/VT/fsUVVz7gY8BbgOeBi9eeP/Aiyewv3/EbAPcF1f25PonYP1UmDJNNVwEDCv3f8A8IFpquMRfff/HPjEdNTR2hfS+3LVj4Cdp+n3cTzwtmHvewJ1PLf9W3lIe7zLdPxN+pZ/CPjrafpdXAQc0u4fClw6lX+f2X6j92XGn9A7p+tkt90d+AHw0Pb4HODVHfrp/D4xxmvmb4Hj2v3jJjK+Duq9YhDj/aDG6kGNtYMaKwc11g1qvBrUeDNGP1cAz2n3Xwu8dzN97Abs0+7vAHwf2Guyr+Vx+pnUa3mcfib7Wh6rn87ZY64emX4ScFlV3VNVG4GvAi+cqp1X1deAOzdpW1NVW3oBmi2t4aL2+wC4jN45Y6ejjp/3PXw4MPSJ/aPV0XwE+IupqGEzdUypMep4PfD+qrq3rXP7NNQAQJIALwU+O8waxqmjgJGjOo9klHPia4scCPx7Vf2o4/bzgIcmmUcvDHf5+3R+nxjjNbMUOL3dPx04vEs/Xd4rBjHeD2qsHtRYO6ixclBj3aDGq0GNN2P08wTga+3+KuDFm+ljfVVd1e7/AlhD7z+rk3otj9XPZF/L4/Qz2dfyWP10zh5zNUxfB/xRkkcleRi9/+kt3Mw2c81rgS9N186TnJDkFuAVwF9PUw2HAT+uqu9Mx/43cWz7+OnUiXw8PCSPB56d5PIkX03yf01THQDPBm6rqhunaf9vAv6uvUY/CLxzmuqYrY6k43+UqurH9P4mNwPrgbur6qIOXQ36fWLXqlrfalwP7LIFfQ1a5/F+EGP1gMfaQYyVgx7rtnS8GtR4cx1wWLt/BJN4PSdZBDwNuJwteC1v0k9n4/Qzqdfypv10fT3PyTBdVWvofRSwCvgy8B1g47gbzSFJ3kXv93HmdNVQVe+qqoWthmOnev/tzfNdTFOQ38RJwOOAvemFgw9NUx3zgB2B/YC3A+e0Iy7T4eVMwVHpcbweeHN7jb4ZOGUaa5lV0rv4y2HA5zpuvyO9I2d7Ao8GHp7klZPtZ668T2zpeL+lY/WAx9pBjZWDHuu2dLwa1HjzWuCYJFfSm97wm4lslGR74DzgTZscvZ2UYfcz2dfyaP10fT3PyTANUFWnVNU+VfVH9D4Kma4jXDNKkmXAnwCvqDZxaJr9E5v5KGpIHkfvzfg7SX5I72Ojq5L83lQXUlW3VdV9VfVb4JPAdH3ZbR1wfvV8C/gtMPQvZW6qfXT/IuDsqd53n2XA+e3+55i+v8lsdAhwVVXd1nH75wE/qKoNVfUf9P5Of9ilowG/T9yWZDeA9nOo06QmYsDjfdexemBj7QDHyoGNdQMarwYy3lTVd6vqoKp6Or1w/++b2ybJg+kFzjOraqSGSb+Wx+hn0sbqZ7Kv5QnUM6nX85wN00l2aT8fQ++FPp1HuWaEJAcD7wAOq6p7prGOxX0PDwO+O9U1VNW1VbVLVS2qqkX0Btd9quonU13LyKDVvJDeR3XT4f8ABwAkeTywLXDHNNTxPOC7VbVuGvY94lbgOe3+Afif8UHa0qN4NwP7JXlYO5p4IL05kZM24PeJFfRCEe3nBVvQ1xYbxHg/iLF6kGPtAMfKQY51gxivBjLe9L2eHwT8FfCJzawfekfB11TVh/sWTeq1PE4/kzJWP5N9LY/TT/fXc03iW6+z6QZ8HbiB3kd3B07xvj9L7yOo/6A3cBxN7x/+OuBe4DbgwmmoYS1wC3B1u03FWTRGq+M8eoPgNcAX6H0xYMrr2GT5D5mas3mM9vv4NHBt+32sAHabpjq2BT7T/jZXAQdMx98EOA34s2H/Djbzu3gWcGUbPy4Hnj5V9czmG70vC/4UeOQW9vM39N4Ir2v/fh7SsZ9O7xNjvGYeBVxMLwhdDOzUsZ9Jv1cMYrwf1Fg9qLF2UGPloMa6QY1XgxpvxujnjfTOXPF94P20C/eN08ez6H0J75q+18mhk30tj9PPpF7L4/Qz2dfyWP10zh5eAVGSJEnqaM5O85AkSZK2lGFakiRJ6sgwLUmSJHVkmJYkSZI6MkxLkiRJHRmmNWWSvCvJ9e1Sr1cnecaA+n10knMH0dcY/R+eZK8O2+2fZNSLRSQ5Psnbtrw6SRoOx+z7LXPM1pjmTXcBmhuS/Dd6Vyfap6ruTbIzvXN5brGquhV4ySD6GsPhwBfpnW92QtpVr/YHfgn863DKGn2/VTXrLnksaWo5Zk8Nx+zZwSPTmiq7AXdU1b0AVXVHG1BJ8vQkX01yZZIL+y5T+udJbmhHRc5qbc9pR0iuTvLtJDskWZTkurZ8uySfSnJtW/7c1v7qJOcn+XKSG5P87WhFJnl/3z4/2I5SHAb8Xdvn45L8jyRXJPlOkvOSPKxte1qSDye5hN6lY/8MeHPb7tmj7G6vJJcmuSnJn/fV8JYk17Xbm1rbfz7H9vhtSY5v9y9N8r+SfJXeSfklaUs5Zj+QY7ZG5ZFpTZWLgL9O8n3gX4Czq+qrSR4MnAgsraoNSV4GnAC8FjgO2LMdFZnf+nkbcExVfTPJ9sCvN9nPMQBV9QdJnghclN7lYAH2Bp5G72pL30tyYlXdMrJhkp3oXZHpiVVVSeZX1c+SrAC+WFXntvV+VlWfbPffR+/KUie2bh4PPK+q7msD5y+r6oNj/E6eCDwX2KHVcxLwX4HXAM8AAlzeBty7NvP7nV9Vz9nMOpI0UY7ZD+SYrVF5ZFpToqp+CTwdWA5sAM5O8mrgCcBTgFVJrgb+CtijbXYNcGaSVwIjH4N9E/hwOyowf5SPx55F75KyVNV3gR/RGywBLq6qu6vq1/Q+/nvsJtv+nN5A/49JXgTcM8bTeUqSrye5FngF8OS+ZZ+rqvs2+wvp+eequreq7gBuB3Zt9X++qn7VfmfnA6MdIdnU2RPcpyRtlmP2qByzNSrDtKZMVd1XVZdW1buBY4EX0/uf/PVVtXe7/UFVHdQ2+e/Ax+kN6FemN7fs/cDrgIcCl7UjGf0yTgn39t2/j00+mWmD/L7AefTm3H15jH5OA46tqj8A/gbYrm/Zr8bZ/0TqGav+jdz/3+t2myyfzH4labMcsydUj2O2DNOaGkmekGRxX9Pe9I5AfA9YkN6XXUjy4CRPTvIgYGFVXQL8BTAf2D7J46rq2qr6ALCa3sdu/b5G78gD7aPCx7R9TKTG7YFHVtVK4E2tRoBf0PtYb8QOwPr2cecrxuly0+0m4mvA4UkeluTh9D7C/DpwG7BLkkcleQi9LwZJ0lA4Zk+YY7acM60psz1wYptHtxFYCyyvqt8keQnw0SSPpPea/N/A94HPtLYAH2lz4d7bvqByH72P/b5E74syI/4B+ET7OG8j8Oo2f28iNe4AXJBku7bPN7f2s4BPto8pXwL8P8Dl9N5YrmXswfcLwLlJlgJvqKqvb66AqroqyWnAt1rTP1bVtwGSvKft9wfAdyfyhCSpI8dsx2xNUKpqumuQJEmStkpO85AkSZI6MkxLkiRJHRmmJUmSpI4M05IkSVJHhmlJkiSpI8O0JEmS1JFhWpIkSerIMC1JkiR19P8D6lE8fTQb+i0AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 864x432 with 2 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.subplots(1, 2, figsize = (12, 6)) \n",
    "\n",
    "plt.subplot(1, 2, 1)\n",
    "sns.countplot(session_start_hour[y_train == 1])\n",
    "plt.title(\"Alice\")\n",
    "plt.xlabel('Session start hour')\n",
    "          \n",
    "plt.subplot(1, 2, 2)\n",
    "sns.countplot(session_start_hour[y_train == 0])\n",
    "plt.title('Others')\n",
    "plt.xlabel('Session start hour');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we definitely see that Alice mostly prefers 4-5 pm for browsing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "time_split = TimeSeriesSplit(n_splits=10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [],
   "source": [
    "logit = LogisticRegression(C=1, random_state=SEED, solver='liblinear')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "# A helper function for writing predictions to a file\n",
    "def write_to_submission_file(predicted_labels, out_file,\n",
    "                             target='target', index_label=\"session_id\"):\n",
    "    predicted_df = pd.DataFrame(predicted_labels,\n",
    "                                index = np.arange(1, predicted_labels.shape[0] + 1),\n",
    "                                columns=[target])\n",
    "    predicted_df.to_csv(out_file, index_label=index_label)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "def train_and_predict(model, X_train, y_train, X_test, site_feature_names=vectorizer.get_feature_names(), \n",
    "                      new_feature_names=None, cv=time_split, scoring='roc_auc',\n",
    "                      top_n_features_to_show=30, submission_file_name='submission.csv'):\n",
    "    \n",
    "    \n",
    "    cv_scores = cross_val_score(model, X_train, y_train, cv=cv, \n",
    "                            scoring=scoring, n_jobs=4)\n",
    "    print('CV scores', cv_scores)\n",
    "    print('CV mean: {}, CV std: {}'.format(cv_scores.mean(), cv_scores.std()))\n",
    "    model.fit(X_train, y_train)\n",
    "    \n",
    "    if new_feature_names:\n",
    "        all_feature_names = site_feature_names + new_feature_names \n",
    "    else: \n",
    "        all_feature_names = site_feature_names\n",
    "    \n",
    "    display_html(eli5.show_weights(estimator=model, \n",
    "                  feature_names=all_feature_names, top=top_n_features_to_show))\n",
    "    \n",
    "    if new_feature_names:\n",
    "        print('New feature weights:')\n",
    "    \n",
    "        print(pd.DataFrame({'feature': new_feature_names, \n",
    "                        'coef': model.coef_.flatten()[-len(new_feature_names):]}))\n",
    "    \n",
    "    test_pred = model.predict_proba(X_test)[:, 1]\n",
    "    write_to_submission_file(test_pred, submission_file_name) \n",
    "    \n",
    "    return cv_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Adding new features\n",
    "def add_time_features(times, X_sparse, add_hour=True):\n",
    "    hour = times['time1'].apply(lambda ts: ts.hour)\n",
    "    morning = ((hour >= 7) & (hour <= 11)).astype('int').values.reshape(-1, 1)\n",
    "    day = ((hour >= 12) & (hour <= 18)).astype('int').values.reshape(-1, 1)\n",
    "    evening = ((hour >= 19) & (hour <= 23)).astype('int').values.reshape(-1, 1)\n",
    "\n",
    "    month = times['time1'].apply(lambda ts: ts.month)\n",
    "    summer = ((month >= 6) & (month <= 8)).values.reshape(-1, 1)\n",
    "\n",
    "    alice_hour = [12,13,16,17,18]\n",
    "    alice_hours = hour.apply(lambda x: 1 if x in alice_hour else 0).values.reshape(-1, 1)\n",
    "    \n",
    "    session_duration = ((times.max(axis=1) - times.min(axis=1)).astype('timedelta64[ms]').astype(int) ** 0.2).values.reshape(-1, 1)\n",
    "    number_of_sites = times.isnull().sum(axis=1).apply(lambda x: 10 - x).astype('int').values.reshape(-1, 1)\n",
    "    time_per_site = ((session_duration / number_of_sites) ** 0.2).astype('int')\n",
    "    \n",
    "    objects_to_hstack = [X_sparse, morning, day, evening, summer, number_of_sites, time_per_site, alice_hours]\n",
    "    feature_names = ['summer', 'morning', 'day', 'evening', 'number_of_sites', 'time_per_site', 'alice_hours']\n",
    "    \n",
    "    if add_hour:\n",
    "        # We'll do it right and scale hour dividing by 24\n",
    "        objects_to_hstack.append(hour.values.reshape(-1, 1) / 24)\n",
    "        feature_names.append('hour')\n",
    "        \n",
    "    X = hstack(objects_to_hstack)\n",
    "    return X, feature_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_with_times2, new_feat_names = add_time_features(train_times, X_train_sites, add_hour=False)\n",
    "X_test_with_times2, _ = add_time_features(test_times, X_test_sites, add_hour=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_durations = (train_times.max(axis=1) - train_times.min(axis=1)).astype('timedelta64[ms]').astype(int)\n",
    "test_durations = (test_times.max(axis=1) - test_times.min(axis=1)).astype('timedelta64[ms]').astype(int)\n",
    "\n",
    "scaler = StandardScaler()\n",
    "train_dur_scaled = scaler.fit_transform(train_durations.values.reshape(-1, 1))\n",
    "test_dur_scaled = scaler.transform(test_durations.values.reshape(-1, 1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_with_time_correct = hstack([X_train_with_times2, train_dur_scaled])\n",
    "X_test_with_time_correct = hstack([X_test_with_times2, test_dur_scaled])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "def add_day_month(times, X_sparse):\n",
    "    day_of_week = times['time1'].apply(lambda t: t.weekday()).values.reshape(-1, 1)\n",
    "    month = times['time1'].apply(lambda t: t.month).values.reshape(-1, 1) \n",
    "    \n",
    "    # Linear trend: time in a form YYYYMM, we'll divide by 1e5 to scale this feature \n",
    "    year_month = times['time1'].apply(lambda t: 100 * t.year + t.month).values.reshape(-1, 1) / 1e5\n",
    "\n",
    "    objects_to_hstack = [X_sparse, day_of_week, year_month]\n",
    "    feature_names = ['day_of_week','year_month']\n",
    "        \n",
    "    X = hstack(objects_to_hstack)\n",
    "    return X, feature_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train_final, more_feat_names = add_day_month(train_times, X_train_with_time_correct)\n",
    "X_test_final, _ = add_day_month(test_times, X_test_with_time_correct)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CV scores [0.87797528 0.84368018 0.91704513 0.95379435 0.95888487 0.95544836\n",
      " 0.94341706 0.9313181  0.97352958 0.97330229]\n",
      "CV mean: 0.9328395194045406, CV std: 0.040304685025825134\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "    table.eli5-weights tr:hover {\n",
       "        filter: brightness(85%);\n",
       "    }\n",
       "</style>\n",
       "\n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "        \n",
       "\n",
       "    \n",
       "\n",
       "        \n",
       "            \n",
       "                \n",
       "                \n",
       "    \n",
       "        <p style=\"margin-bottom: 0.5em; margin-top: 0em\">\n",
       "            <b>\n",
       "    \n",
       "        y=1\n",
       "    \n",
       "</b>\n",
       "\n",
       "top features\n",
       "        </p>\n",
       "    \n",
       "    <table class=\"eli5-weights\"\n",
       "           style=\"border-collapse: collapse; border: none; margin-top: 0em; table-layout: auto; margin-bottom: 2em;\">\n",
       "        <thead>\n",
       "        <tr style=\"border: none;\">\n",
       "            \n",
       "                <th style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\" title=\"Feature weights. Note that weights do not account for feature value scales, so if feature values have different scales, features with highest weights might not be the most important.\">\n",
       "                    Weight<sup>?</sup>\n",
       "                </th>\n",
       "            \n",
       "            <th style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">Feature</th>\n",
       "            \n",
       "        </tr>\n",
       "        </thead>\n",
       "        <tbody>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 80.00%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.226\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        cid-ed6c3e6a5c6608a4.users.storage.live.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 80.79%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.933\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        melty.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 81.66%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.619\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        audienceinsights.net\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 82.59%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.288\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        info-jeunes.net\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 82.61%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.281\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        r4---sn-gxo5uxg-jqbe.googlevideo.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 82.86%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.192\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        vk.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 83.06%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.124\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        youwatch.org\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 83.28%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.045\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        banque-chalus.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 83.60%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.936\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        video.tt\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.56%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.610\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        fr.glee.wikia.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.72%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.559\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        i1.ytimg.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.75%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.548\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        s.videostep.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.83%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.521\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        api.bing.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.26%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.379\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        www35.glam.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.27%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.376\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        r1---sn-gxo5uxg-jqbe.googlevideo.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.36%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.346\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        alice_hours\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.38%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.340\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        reviewer.lavoixdunord.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.48%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.309\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        dub119.mail.live.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.59%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.272\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        browser-update.org\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.64%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.255\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        demotivateur.disqus.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.74%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.224\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        r3---sn-gxo5uxg-jqbe.googlevideo.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.80%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.203\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        demotivateur.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.81%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.201\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        media.melty.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.93%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.164\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        media-1.melty.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.08%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.115\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        kelbillet.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.09%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.110\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        jeux.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.46%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +2.995\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        regarder-film-gratuit.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.46%); border: none;\">\n",
       "                <td colspan=\"2\" style=\"padding: 0 0.5em 0 0.5em; text-align: center; border: none; white-space: nowrap;\">\n",
       "                    <i>&hellip; 3200 more positive &hellip;</i>\n",
       "                </td>\n",
       "            </tr>\n",
       "        \n",
       "\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 85.27%); border: none;\">\n",
       "                <td colspan=\"2\" style=\"padding: 0 0.5em 0 0.5em; text-align: center; border: none; white-space: nowrap;\">\n",
       "                    <i>&hellip; 21781 more negative &hellip;</i>\n",
       "                </td>\n",
       "            </tr>\n",
       "        \n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 85.27%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -3.378\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        year_month\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 82.92%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -4.172\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        mail.google.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 80.28%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -5.122\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        plus.google.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "\n",
       "        </tbody>\n",
       "    </table>\n",
       "\n",
       "            \n",
       "        \n",
       "\n",
       "        \n",
       "\n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New feature weights:\n",
      "           feature      coef\n",
      "0           summer -0.773439\n",
      "1          morning  0.228250\n",
      "2              day -1.009819\n",
      "3          evening -2.221441\n",
      "4  number_of_sites  0.098524\n",
      "5    time_per_site -0.232480\n",
      "6      alice_hours  3.346274\n",
      "7    sess_duration -0.145808\n",
      "8      day_of_week -0.313351\n",
      "9       year_month -3.377683\n"
     ]
    }
   ],
   "source": [
    "cv_scores6 = train_and_predict(model=logit, X_train=X_train_final, y_train=y_train, \n",
    "                               X_test=X_test_final, \n",
    "                               site_feature_names=vectorizer.get_feature_names(),\n",
    "                               new_feature_names=new_feat_names + ['sess_duration'] + more_feat_names,\n",
    "                               cv=time_split, submission_file_name='alice_subm.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Here we've already narrowed down c_values to such a range\n",
    "c_values = np.logspace(-3, 1, 20)\n",
    "\n",
    "logit_grid_searcher = GridSearchCV(estimator=logit, param_grid={'C': c_values},\n",
    "                                  scoring='roc_auc', n_jobs=-1, cv=time_split, verbose=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 10 folds for each of 20 candidates, totalling 200 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 6 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done  38 tasks      | elapsed:   15.7s\n",
      "[Parallel(n_jobs=-1)]: Done 188 tasks      | elapsed:  2.7min\n",
      "[Parallel(n_jobs=-1)]: Done 200 out of 200 | elapsed:  3.2min finished\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 3min 22s\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=TimeSeriesSplit(max_train_size=None, n_splits=10),\n",
       "             error_score='raise-deprecating',\n",
       "             estimator=LogisticRegression(C=1, class_weight=None, dual=False,\n",
       "                                          fit_intercept=True,\n",
       "                                          intercept_scaling=1, l1_ratio=None,\n",
       "                                          max_iter=100, multi_class='warn',\n",
       "                                          n_jobs=None, penalty='l2',\n",
       "                                          random_state=17, solver='liblinear',\n",
       "                                          tol=0.0001, verbose=0,\n",
       "                                          warm_start=False),\n",
       "             iid='wa...\n",
       "             param_grid={'C': array([1.00000000e-03, 1.62377674e-03, 2.63665090e-03, 4.28133240e-03,\n",
       "       6.95192796e-03, 1.12883789e-02, 1.83298071e-02, 2.97635144e-02,\n",
       "       4.83293024e-02, 7.84759970e-02, 1.27427499e-01, 2.06913808e-01,\n",
       "       3.35981829e-01, 5.45559478e-01, 8.85866790e-01, 1.43844989e+00,\n",
       "       2.33572147e+00, 3.79269019e+00, 6.15848211e+00, 1.00000000e+01])},\n",
       "             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,\n",
       "             scoring='roc_auc', verbose=1)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "%%time\n",
    "logit_grid_searcher.fit(X_train_final, y_train); "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.9350360716974566, {'C': 2.3357214690901213})"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logit_grid_searcher.best_score_, logit_grid_searcher.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "final_model = logit_grid_searcher.best_estimator_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CV scores [0.89322821 0.82687777 0.92224881 0.9597258  0.95912693 0.95437731\n",
      " 0.94961893 0.93985813 0.97042058 0.97487824]\n",
      "CV mean: 0.9350360716974567, CV std: 0.04271889387982625\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "\n",
       "    <style>\n",
       "    table.eli5-weights tr:hover {\n",
       "        filter: brightness(85%);\n",
       "    }\n",
       "</style>\n",
       "\n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "        \n",
       "\n",
       "    \n",
       "\n",
       "        \n",
       "            \n",
       "                \n",
       "                \n",
       "    \n",
       "        <p style=\"margin-bottom: 0.5em; margin-top: 0em\">\n",
       "            <b>\n",
       "    \n",
       "        y=1\n",
       "    \n",
       "</b>\n",
       "\n",
       "top features\n",
       "        </p>\n",
       "    \n",
       "    <table class=\"eli5-weights\"\n",
       "           style=\"border-collapse: collapse; border: none; margin-top: 0em; table-layout: auto; margin-bottom: 2em;\">\n",
       "        <thead>\n",
       "        <tr style=\"border: none;\">\n",
       "            \n",
       "                <th style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\" title=\"Feature weights. Note that weights do not account for feature value scales, so if feature values have different scales, features with highest weights might not be the most important.\">\n",
       "                    Weight<sup>?</sup>\n",
       "                </th>\n",
       "            \n",
       "            <th style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">Feature</th>\n",
       "            \n",
       "        </tr>\n",
       "        </thead>\n",
       "        <tbody>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 80.00%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +8.240\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        cid-ed6c3e6a5c6608a4.users.storage.live.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.55%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.699\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        banque-chalus.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 84.55%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.698\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        melty.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.32%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.298\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        video.tt\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.54%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.182\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        audienceinsights.net\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.58%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.166\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        r4---sn-gxo5uxg-jqbe.googlevideo.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.87%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +5.015\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        browser-update.org\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 85.94%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.978\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        info-jeunes.net\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.41%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.744\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        demotivateur.disqus.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.68%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.611\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        fr.glee.wikia.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.83%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.537\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        s.videostep.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.87%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.515\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        vk.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.87%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.515\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        reviewer.lavoixdunord.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 86.90%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.501\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        jeux.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.06%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.423\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        youwatch.org\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.12%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.396\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        api.bing.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.19%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.359\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        dub119.mail.live.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.40%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.258\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        clermont-filmfest.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.43%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.244\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        kelbillet.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.61%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.159\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        wwwstats.brgm.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.66%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.133\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        www35.glam.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.68%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.125\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        regarder-film-gratuit.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.72%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.107\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        rhonealpesjob.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 87.90%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +4.019\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        r3---sn-gxo5uxg-jqbe.googlevideo.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 88.04%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.952\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        media-1.melty.fr\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 88.13%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.911\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        static.programme-tv.net\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 88.30%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        +3.829\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        bbc.co.uk\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "        \n",
       "            <tr style=\"background-color: hsl(120, 100.00%, 88.30%); border: none;\">\n",
       "                <td colspan=\"2\" style=\"padding: 0 0.5em 0 0.5em; text-align: center; border: none; white-space: nowrap;\">\n",
       "                    <i>&hellip; 3182 more positive &hellip;</i>\n",
       "                </td>\n",
       "            </tr>\n",
       "        \n",
       "\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 88.03%); border: none;\">\n",
       "                <td colspan=\"2\" style=\"padding: 0 0.5em 0 0.5em; text-align: center; border: none; white-space: nowrap;\">\n",
       "                    <i>&hellip; 21799 more negative &hellip;</i>\n",
       "                </td>\n",
       "            </tr>\n",
       "        \n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 88.03%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -3.958\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        year_month\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 84.26%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -5.853\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        mail.google.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "            <tr style=\"background-color: hsl(0, 100.00%, 81.36%); border: none;\">\n",
       "    <td style=\"padding: 0 1em 0 0.5em; text-align: right; border: none;\">\n",
       "        -7.453\n",
       "    </td>\n",
       "    <td style=\"padding: 0 0.5em 0 0.5em; text-align: left; border: none;\">\n",
       "        plus.google.com\n",
       "    </td>\n",
       "    \n",
       "</tr>\n",
       "        \n",
       "\n",
       "        </tbody>\n",
       "    </table>\n",
       "\n",
       "            \n",
       "        \n",
       "\n",
       "        \n",
       "\n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "    \n",
       "\n",
       "\n",
       "\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "New feature weights:\n",
      "           feature      coef\n",
      "0           summer -0.643810\n",
      "1          morning  0.287778\n",
      "2              day -1.342091\n",
      "3          evening -2.822577\n",
      "4  number_of_sites  0.173273\n",
      "5    time_per_site -0.205600\n",
      "6      alice_hours  3.471187\n",
      "7    sess_duration -0.127744\n",
      "8      day_of_week -0.315660\n",
      "9       year_month -3.957884\n"
     ]
    }
   ],
   "source": [
    "cv_scores7 = train_and_predict(model=final_model, X_train=X_train_final, y_train=y_train, \n",
    "                               X_test=X_test_final, \n",
    "                               site_feature_names=vectorizer.get_feature_names(),\n",
    "                               new_feature_names=new_feat_names + ['sess_duration'] + more_feat_names,\n",
    "                               cv=time_split, submission_file_name='alice_subm_final.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}