{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:26:50.542194Z",
     "iopub.status.busy": "2021-07-26T10:26:50.541490Z",
     "iopub.status.idle": "2021-07-26T10:32:22.198836Z",
     "shell.execute_reply": "2021-07-26T10:32:22.199761Z",
     "shell.execute_reply.started": "2021-07-26T10:20:24.368850Z"
    },
    "papermill": {
     "duration": 331.690361,
     "end_time": "2021-07-26T10:32:22.200320",
     "exception": false,
     "start_time": "2021-07-26T10:26:50.509959",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type='text/css'>\n",
       ".datatable table.frame { margin-bottom: 0; }\n",
       ".datatable table.frame thead { border-bottom: none; }\n",
       ".datatable table.frame tr.coltypes td {  color: #FFFFFF;  line-height: 6px;  padding: 0 0.5em;}\n",
       ".datatable .bool    { background: #DDDD99; }\n",
       ".datatable .object  { background: #565656; }\n",
       ".datatable .int     { background: #5D9E5D; }\n",
       ".datatable .float   { background: #4040CC; }\n",
       ".datatable .str     { background: #CC4040; }\n",
       ".datatable .time    { background: #40CC40; }\n",
       ".datatable .row_index {  background: var(--jp-border-color3);  border-right: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  font-size: 9px;}\n",
       ".datatable .frame tbody td { text-align: left; }\n",
       ".datatable .frame tr.coltypes .row_index {  background: var(--jp-border-color0);}\n",
       ".datatable th:nth-child(2) { padding-left: 12px; }\n",
       ".datatable .hellipsis {  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .vellipsis {  background: var(--jp-layout-color0);  color: var(--jp-cell-editor-border-color);}\n",
       ".datatable .na {  color: var(--jp-cell-editor-border-color);  font-size: 80%;}\n",
       ".datatable .sp {  opacity: 0.25;}\n",
       ".datatable .footer { font-size: 9px; }\n",
       ".datatable .frame_dimensions {  background: var(--jp-border-color3);  border-top: 1px solid var(--jp-border-color0);  color: var(--jp-ui-font-color3);  display: inline-block;  opacity: 0.6;  padding: 1px 10px 1px 5px;}\n",
       "</style>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "========== train_updated.csv load ==========\n",
      "********** this is rosters **********\n",
      "********** this is nextDayPlayerEngagement **********\n",
      "********** this is playerBoxScores **********\n",
      "CPU times: user 4min 38s, sys: 14.7 s, total: 4min 52s\n",
      "Wall time: 5min 31s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "from datetime import timedelta\n",
    "from functools import reduce\n",
    "from tqdm import tqdm\n",
    "import lightgbm as lgbm\n",
    "import mlb\n",
    "import os\n",
    "\n",
    "import gc\n",
    "\n",
    "\n",
    "BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting')\n",
    "#train = pd.read_csv(BASE_DIR / 'train_updated.csv')\n",
    "if os.path.isfile(BASE_DIR / 'train_updated.csv'):\n",
    "    train = pd.read_csv(BASE_DIR / 'train_updated.csv')\n",
    "    print(10*'=','train_updated.csv','load',10*'=')\n",
    "else:\n",
    "    train = pd.read_csv(BASE_DIR / 'train.csv')\n",
    "    print(10*'=','train.csv','load',10*'=')\n",
    "            \n",
    "null = np.nan\n",
    "true = True\n",
    "false = False\n",
    "\n",
    "for col in ['rosters','nextDayPlayerEngagement','playerBoxScores']:\n",
    "    print(10*'*','this is',col,10*'*')\n",
    "    if col == 'date': continue\n",
    "\n",
    "    _index = train[col].notnull()\n",
    "    train.loc[_index, col] = train.loc[_index, col].apply(lambda x: eval(x))\n",
    "\n",
    "    outputs = []\n",
    "    for index, date, record in train.loc[_index, ['date', col]].itertuples():\n",
    "        _df = pd.DataFrame(record)\n",
    "        _df['index'] = index\n",
    "        _df['date'] = date\n",
    "        outputs.append(_df)\n",
    "\n",
    "    outputs = pd.concat(outputs).reset_index(drop=True)\n",
    "\n",
    "    outputs.to_csv(f'{col}_train.csv', index=False)\n",
    "    outputs.to_pickle(f'{col}_train.pkl')\n",
    "\n",
    "    del outputs\n",
    "    del train[col]\n",
    "    gc.collect()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:22.246845Z",
     "iopub.status.busy": "2021-07-26T10:32:22.246101Z",
     "iopub.status.idle": "2021-07-26T10:32:23.960190Z",
     "shell.execute_reply": "2021-07-26T10:32:23.959646Z",
     "shell.execute_reply.started": "2021-07-26T10:26:02.194504Z"
    },
    "papermill": {
     "duration": 1.737149,
     "end_time": "2021-07-26T10:32:23.960338",
     "exception": false,
     "start_time": "2021-07-26T10:32:22.223189",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting')\n",
    "TRAIN_DIR = Path('./')\n",
    "\n",
    "players = pd.read_csv(BASE_DIR / 'players.csv')\n",
    "\n",
    "rosters = pd.read_pickle(TRAIN_DIR / 'rosters_train.pkl')\n",
    "targets = pd.read_pickle(TRAIN_DIR / 'nextDayPlayerEngagement_train.pkl')\n",
    "scores = pd.read_pickle(TRAIN_DIR / 'playerBoxScores_train.pkl')\n",
    "scores = scores.groupby(['playerId', 'date']).sum().reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:24.013823Z",
     "iopub.status.busy": "2021-07-26T10:32:24.007884Z",
     "iopub.status.idle": "2021-07-26T10:32:24.017126Z",
     "shell.execute_reply": "2021-07-26T10:32:24.016396Z"
    },
    "papermill": {
     "duration": 0.040202,
     "end_time": "2021-07-26T10:32:24.017280",
     "exception": false,
     "start_time": "2021-07-26T10:32:23.977078",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "targets_cols = ['playerId', 'target1', 'target2', 'target3', 'target4', 'date']\n",
    "players_cols = ['playerId', 'primaryPositionName','heightInches','weight']\n",
    "rosters_cols = ['playerId', 'teamId', 'status', 'date']\n",
    "scores_cols = ['playerId', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',\n",
    "       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',\n",
    "       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',\n",
    "       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',\n",
    "       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',\n",
    "       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',\n",
    "       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',\n",
    "       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',\n",
    "       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',\n",
    "       'groundOutsPitching', 'runsPitching', 'doublesPitching',\n",
    "       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',\n",
    "       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',\n",
    "       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',\n",
    "       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',\n",
    "       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',\n",
    "       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',\n",
    "       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',\n",
    "       'inheritedRunnersScored', 'catchersInterferencePitching',\n",
    "       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',\n",
    "       'assists', 'putOuts', 'errors', 'chances', 'date']\n",
    "\n",
    "feature_cols = ['label_playerId', 'label_primaryPositionName', 'label_teamId',\n",
    "       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',\n",
    "       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',\n",
    "       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',\n",
    "       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',\n",
    "       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',\n",
    "       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',\n",
    "       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',\n",
    "       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',\n",
    "       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',\n",
    "       'groundOutsPitching', 'runsPitching', 'doublesPitching',\n",
    "       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',\n",
    "       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',\n",
    "       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',\n",
    "       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',\n",
    "       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',\n",
    "       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',\n",
    "       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',\n",
    "       'inheritedRunnersScored', 'catchersInterferencePitching',\n",
    "       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',\n",
    "       'assists', 'putOuts', 'errors', 'chances','target1_mean',\n",
    " 'target1_median',\n",
    " 'target1_std',\n",
    " 'target1_min',\n",
    " 'target1_max',\n",
    " 'target1_prob',\n",
    " 'target2_mean',\n",
    " 'target2_median',\n",
    " 'target2_std',\n",
    " 'target2_min',\n",
    " 'target2_max',\n",
    " 'target2_prob',\n",
    " 'target3_mean',\n",
    " 'target3_median',\n",
    " 'target3_std',\n",
    " 'target3_min',\n",
    " 'target3_max',\n",
    " 'target3_prob',\n",
    " 'target4_mean',\n",
    " 'target4_median',\n",
    " 'target4_std',\n",
    " 'target4_min',\n",
    " 'target4_max',\n",
    " 'target4_prob']\n",
    "feature_cols2 = ['label_playerId', 'label_primaryPositionName', 'label_teamId',\n",
    "       'label_status', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',\n",
    "       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',\n",
    "       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',\n",
    "       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',\n",
    "       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',\n",
    "       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',\n",
    "       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',\n",
    "       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',\n",
    "       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',\n",
    "       'groundOutsPitching', 'runsPitching', 'doublesPitching',\n",
    "       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',\n",
    "       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',\n",
    "       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',\n",
    "       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',\n",
    "       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',\n",
    "       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',\n",
    "       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',\n",
    "       'inheritedRunnersScored', 'catchersInterferencePitching',\n",
    "       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',\n",
    "       'assists', 'putOuts', 'errors', 'chances','target1_mean',\n",
    " 'target1_median',\n",
    " 'target1_std',\n",
    " 'target1_min',\n",
    " 'target1_max',\n",
    " 'target1_prob',\n",
    " 'target2_mean',\n",
    " 'target2_median',\n",
    " 'target2_std',\n",
    " 'target2_min',\n",
    " 'target2_max',\n",
    " 'target2_prob',\n",
    " 'target3_mean',\n",
    " 'target3_median',\n",
    " 'target3_std',\n",
    " 'target3_min',\n",
    " 'target3_max',\n",
    " 'target3_prob',\n",
    " 'target4_mean',\n",
    " 'target4_median',\n",
    " 'target4_std',\n",
    " 'target4_min',\n",
    " 'target4_max',\n",
    " 'target4_prob',\n",
    "    'target1']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:24.057009Z",
     "iopub.status.busy": "2021-07-26T10:32:24.056180Z",
     "iopub.status.idle": "2021-07-26T10:32:24.136934Z",
     "shell.execute_reply": "2021-07-26T10:32:24.136353Z"
    },
    "papermill": {
     "duration": 0.10236,
     "end_time": "2021-07-26T10:32:24.137088",
     "exception": false,
     "start_time": "2021-07-26T10:32:24.034728",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['playerId',\n",
       " 'target1_mean',\n",
       " 'target1_median',\n",
       " 'target1_std',\n",
       " 'target1_min',\n",
       " 'target1_max',\n",
       " 'target1_prob',\n",
       " 'target2_mean',\n",
       " 'target2_median',\n",
       " 'target2_std',\n",
       " 'target2_min',\n",
       " 'target2_max',\n",
       " 'target2_prob',\n",
       " 'target3_mean',\n",
       " 'target3_median',\n",
       " 'target3_std',\n",
       " 'target3_min',\n",
       " 'target3_max',\n",
       " 'target3_prob',\n",
       " 'target4_mean',\n",
       " 'target4_median',\n",
       " 'target4_std',\n",
       " 'target4_min',\n",
       " 'target4_max',\n",
       " 'target4_prob']"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "player_target_stats = pd.read_csv(\"../input/my-player-target-stat/player_target_stats.csv\")\n",
    "data_names=player_target_stats.columns.values.tolist()\n",
    "data_names"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:24.183998Z",
     "iopub.status.busy": "2021-07-26T10:32:24.182305Z",
     "iopub.status.idle": "2021-07-26T10:32:33.730454Z",
     "shell.execute_reply": "2021-07-26T10:32:33.729892Z"
    },
    "papermill": {
     "duration": 9.576279,
     "end_time": "2021-07-26T10:32:33.730615",
     "exception": false,
     "start_time": "2021-07-26T10:32:24.154336",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "# creat dataset\n",
    "train = targets[targets_cols].merge(players[players_cols], on=['playerId'], how='left')\n",
    "train = train.merge(rosters[rosters_cols], on=['playerId', 'date'], how='left')\n",
    "train = train.merge(scores[scores_cols], on=['playerId', 'date'], how='left')\n",
    "train = train.merge(player_target_stats, how='inner', left_on=[\"playerId\"],right_on=[\"playerId\"])\n",
    "\n",
    "\n",
    "# label encoding\n",
    "player2num = {c: i for i, c in enumerate(train['playerId'].unique())}\n",
    "position2num = {c: i for i, c in enumerate(train['primaryPositionName'].unique())}\n",
    "teamid2num = {c: i for i, c in enumerate(train['teamId'].unique())}\n",
    "status2num = {c: i for i, c in enumerate(train['status'].unique())}\n",
    "train['label_playerId'] = train['playerId'].map(player2num)\n",
    "train['label_primaryPositionName'] = train['primaryPositionName'].map(position2num)\n",
    "train['label_teamId'] = train['teamId'].map(teamid2num)\n",
    "train['label_status'] = train['status'].map(status2num)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:33.774093Z",
     "iopub.status.busy": "2021-07-26T10:32:33.773362Z",
     "iopub.status.idle": "2021-07-26T10:32:39.909449Z",
     "shell.execute_reply": "2021-07-26T10:32:39.908813Z"
    },
    "papermill": {
     "duration": 6.161649,
     "end_time": "2021-07-26T10:32:39.909604",
     "exception": false,
     "start_time": "2021-07-26T10:32:33.747955",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "train_X = train[feature_cols]\n",
    "train_y = train[['target1', 'target2', 'target3', 'target4']]\n",
    "\n",
    "#_index = (train['date'] < 20210401)\n",
    "_index = ((train['date'] > 20200529) & (train['date'] <= 20200831)) | ((train['date'] > 20190529) & (train['date'] <= 20190831)) | ((train['date'] > 20180529) & (train['date'] <= 20180831))\n",
    "x_train1 = train_X.loc[~_index].reset_index(drop=True)\n",
    "y_train1 = train_y.loc[~_index].reset_index(drop=True)\n",
    "x_valid1 = train_X.loc[_index].reset_index(drop=True)\n",
    "y_valid1 = train_y.loc[_index].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:41.209923Z",
     "iopub.status.busy": "2021-07-26T10:32:41.208914Z",
     "iopub.status.idle": "2021-07-26T10:32:43.873607Z",
     "shell.execute_reply": "2021-07-26T10:32:43.873047Z"
    },
    "papermill": {
     "duration": 3.945406,
     "end_time": "2021-07-26T10:32:43.873756",
     "exception": false,
     "start_time": "2021-07-26T10:32:39.928350",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "train_X = train[feature_cols2]\n",
    "train_y = train[['target1', 'target2', 'target3', 'target4']]\n",
    "\n",
    "#_index = (train['date'] < 20210401)\n",
    "x_train2 = train_X.loc[~_index].reset_index(drop=True)\n",
    "y_train2 = train_y.loc[~_index].reset_index(drop=True)\n",
    "x_valid2 = train_X.loc[_index].reset_index(drop=True)\n",
    "y_valid2 = train_y.loc[_index].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.016809,
     "end_time": "2021-07-26T10:32:43.907896",
     "exception": false,
     "start_time": "2021-07-26T10:32:43.891087",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# LGB"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T10:32:43.951312Z",
     "iopub.status.busy": "2021-07-26T10:32:43.950568Z",
     "iopub.status.idle": "2021-07-26T11:14:16.906091Z",
     "shell.execute_reply": "2021-07-26T11:14:16.906631Z"
    },
    "papermill": {
     "duration": 2492.981885,
     "end_time": "2021-07-26T11:14:16.906885",
     "exception": false,
     "start_time": "2021-07-26T10:32:43.925000",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[LightGBM] [Warning] feature_fraction is set=0.8101240539122566, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.8101240539122566\n",
      "[LightGBM] [Warning] bagging_freq is set=8, subsample_freq=0 will be ignored. Current value: bagging_freq=8\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.8884451442950513, subsample=1.0 will be ignored. Current value: bagging_fraction=0.8884451442950513\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's l1: 0.603681\n",
      "[200]\tvalid_0's l1: 0.602494\n",
      "[300]\tvalid_0's l1: 0.602064\n",
      "[400]\tvalid_0's l1: 0.602072\n",
      "[500]\tvalid_0's l1: 0.601976\n",
      "[600]\tvalid_0's l1: 0.601959\n",
      "[700]\tvalid_0's l1: 0.601676\n",
      "Early stopping, best iteration is:\n",
      "[673]\tvalid_0's l1: 0.601666\n",
      "mae: 0.6016661242531858\n",
      "[LightGBM] [Warning] feature_fraction is set=0.9101240539122566, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.9101240539122566\n",
      "[LightGBM] [Warning] bagging_freq is set=3, subsample_freq=0 will be ignored. Current value: bagging_freq=3\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.9884451442950513, subsample=1.0 will be ignored. Current value: bagging_fraction=0.9884451442950513\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's l1: 1.72755\n",
      "[200]\tvalid_0's l1: 1.71505\n",
      "[300]\tvalid_0's l1: 1.71076\n",
      "[400]\tvalid_0's l1: 1.70847\n",
      "[500]\tvalid_0's l1: 1.70694\n",
      "[600]\tvalid_0's l1: 1.70668\n",
      "[700]\tvalid_0's l1: 1.7052\n",
      "[800]\tvalid_0's l1: 1.70479\n",
      "[900]\tvalid_0's l1: 1.70432\n",
      "[1000]\tvalid_0's l1: 1.7037\n",
      "[1100]\tvalid_0's l1: 1.703\n",
      "[1200]\tvalid_0's l1: 1.70253\n",
      "[1300]\tvalid_0's l1: 1.70207\n",
      "[1400]\tvalid_0's l1: 1.702\n",
      "[1500]\tvalid_0's l1: 1.70185\n",
      "[1600]\tvalid_0's l1: 1.7011\n",
      "[1700]\tvalid_0's l1: 1.70071\n",
      "[1800]\tvalid_0's l1: 1.70073\n",
      "[1900]\tvalid_0's l1: 1.70063\n",
      "[2000]\tvalid_0's l1: 1.7004\n",
      "[2100]\tvalid_0's l1: 1.70023\n",
      "[2200]\tvalid_0's l1: 1.69993\n",
      "Early stopping, best iteration is:\n",
      "[2192]\tvalid_0's l1: 1.69993\n",
      "mae: 1.699927207171509\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.5637405128936662, subsample=1.0 will be ignored. Current value: bagging_fraction=0.5637405128936662\n",
      "[LightGBM] [Warning] feature_fraction is set=0.5419185713426886, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5419185713426886\n",
      "[LightGBM] [Warning] bagging_freq is set=15, subsample_freq=0 will be ignored. Current value: bagging_freq=15\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's l1: 0.715969\n",
      "[200]\tvalid_0's l1: 0.714891\n",
      "[300]\tvalid_0's l1: 0.714887\n",
      "[400]\tvalid_0's l1: 0.714884\n",
      "[500]\tvalid_0's l1: 0.714883\n",
      "[600]\tvalid_0's l1: 0.714882\n",
      "[700]\tvalid_0's l1: 0.714879\n",
      "[800]\tvalid_0's l1: 0.714878\n",
      "[900]\tvalid_0's l1: 0.714714\n",
      "[1000]\tvalid_0's l1: 0.714713\n",
      "[1100]\tvalid_0's l1: 0.714712\n",
      "[1200]\tvalid_0's l1: 0.714711\n",
      "[1300]\tvalid_0's l1: 0.714711\n",
      "[1400]\tvalid_0's l1: 0.71471\n",
      "[1500]\tvalid_0's l1: 0.714709\n",
      "[1600]\tvalid_0's l1: 0.714709\n",
      "[1700]\tvalid_0's l1: 0.71455\n",
      "[1800]\tvalid_0's l1: 0.71455\n",
      "[1900]\tvalid_0's l1: 0.714549\n",
      "[2000]\tvalid_0's l1: 0.714549\n",
      "[2100]\tvalid_0's l1: 0.714548\n",
      "[2200]\tvalid_0's l1: 0.714548\n",
      "[2300]\tvalid_0's l1: 0.714547\n",
      "[2400]\tvalid_0's l1: 0.714547\n",
      "[2500]\tvalid_0's l1: 0.714546\n",
      "[2600]\tvalid_0's l1: 0.714546\n",
      "[2700]\tvalid_0's l1: 0.714545\n",
      "[2800]\tvalid_0's l1: 0.714545\n",
      "[2900]\tvalid_0's l1: 0.714544\n",
      "[3000]\tvalid_0's l1: 0.714543\n",
      "[3100]\tvalid_0's l1: 0.714543\n",
      "[3200]\tvalid_0's l1: 0.714543\n",
      "[3300]\tvalid_0's l1: 0.714542\n",
      "[3400]\tvalid_0's l1: 0.714542\n",
      "[3500]\tvalid_0's l1: 0.714543\n",
      "Early stopping, best iteration is:\n",
      "[3473]\tvalid_0's l1: 0.714542\n",
      "mae: 0.7145421805738932\n",
      "[LightGBM] [Warning] feature_fraction is set=0.5419185713426886, colsample_bytree=1.0 will be ignored. Current value: feature_fraction=0.5419185713426886\n",
      "[LightGBM] [Warning] bagging_freq is set=19, subsample_freq=0 will be ignored. Current value: bagging_freq=19\n",
      "[LightGBM] [Warning] bagging_fraction is set=0.2637405128936662, subsample=1.0 will be ignored. Current value: bagging_fraction=0.2637405128936662\n",
      "Training until validation scores don't improve for 100 rounds\n",
      "[100]\tvalid_0's l1: 0.82029\n",
      "[200]\tvalid_0's l1: 0.817858\n",
      "[300]\tvalid_0's l1: 0.816503\n",
      "[400]\tvalid_0's l1: 0.815647\n",
      "[500]\tvalid_0's l1: 0.815143\n",
      "[600]\tvalid_0's l1: 0.814731\n",
      "[700]\tvalid_0's l1: 0.814472\n",
      "[800]\tvalid_0's l1: 0.814144\n",
      "[900]\tvalid_0's l1: 0.813819\n",
      "[1000]\tvalid_0's l1: 0.813664\n",
      "[1100]\tvalid_0's l1: 0.813584\n",
      "Early stopping, best iteration is:\n",
      "[1053]\tvalid_0's l1: 0.813545\n",
      "mae: 0.8135446889692893\n",
      "score: 0.9574200502419693\n"
     ]
    }
   ],
   "source": [
    "def fit_lgbm(x_train, y_train, x_valid, y_valid, params: dict=None, verbose=100):\n",
    "    oof_pred = np.zeros(len(y_valid), dtype=np.float32)\n",
    "    model = lgbm.LGBMRegressor(**params)\n",
    "    model.fit(x_train, y_train, \n",
    "        eval_set=[(x_valid, y_valid)],  \n",
    "        early_stopping_rounds=verbose, \n",
    "        verbose=verbose)\n",
    "    oof_pred = model.predict(x_valid)\n",
    "    score = mean_absolute_error(oof_pred, y_valid)\n",
    "    print('mae:', score)\n",
    "    return oof_pred, model, score\n",
    "\n",
    "\n",
    "params1 = {'objective':'mae',\n",
    "           'reg_alpha': 0.14547461820098767, \n",
    "           'reg_lambda': 0.10185644384043743, \n",
    "           'n_estimators': 3333, \n",
    "           'learning_rate': 0.1046301304430488, \n",
    "           'num_leaves': 674, \n",
    "           'feature_fraction': 0.8101240539122566, \n",
    "           'bagging_fraction': 0.8884451442950513, \n",
    "           'bagging_freq': 8, \n",
    "           'min_child_samples': 51}\n",
    "\n",
    "params2 = {\n",
    " 'objective':'mae',\n",
    "           'reg_alpha': 0.14947461820098767, \n",
    "           'reg_lambda': 0.10185644384043743, \n",
    "           'n_estimators': 3633, \n",
    "           'learning_rate': 0.08046301304430488, \n",
    "           'num_leaves': 64, \n",
    "           'feature_fraction': 0.9101240539122566, \n",
    "           'bagging_fraction': 0.9884451442950513, \n",
    "           'bagging_freq': 3, \n",
    "           'min_child_samples': 15\n",
    "}\n",
    "\n",
    "params4 = {'objective':'mae',\n",
    "           'reg_alpha': 0.016468100279441976, \n",
    "           'reg_lambda': 0.09128335764019105, \n",
    "           'n_estimators': 9868, \n",
    "           'learning_rate': 0.10528150510326864, \n",
    "           'num_leaves': 157, \n",
    "           'feature_fraction': 0.5419185713426886, \n",
    "           'bagging_fraction': 0.2637405128936662, \n",
    "           'bagging_freq': 19, \n",
    "           'min_child_samples': 71}\n",
    "\n",
    "\n",
    "params = {\n",
    " 'objective':'mae',\n",
    "#  'reg_alpha': 0.1,\n",
    "#  'reg_lambda': 0.1, \n",
    " 'n_estimators': 10000,\n",
    " 'learning_rate': 0.1,\n",
    " 'random_state': 2021,\n",
    " \"num_leaves\": 127,\n",
    " 'feature_fraction': 0.5419185713426886, \n",
    " 'bagging_fraction': 0.5637405128936662, \n",
    " 'bagging_freq': 15, \n",
    "}\n",
    "\n",
    "\n",
    "\n",
    "oof1, model1, score1 = fit_lgbm(\n",
    "    x_train1, y_train1['target1'],\n",
    "    x_valid1, y_valid1['target1'],\n",
    "    params1\n",
    " )\n",
    "\n",
    "oof2, model2, score2 = fit_lgbm(\n",
    "    x_train2, y_train2['target2'],\n",
    "    x_valid2, y_valid2['target2'],\n",
    "    params2\n",
    ")\n",
    "\n",
    "oof3, model3, score3 = fit_lgbm(\n",
    "    x_train2, y_train2['target3'],\n",
    "    x_valid2, y_valid2['target3'],\n",
    "   params\n",
    ")\n",
    "\n",
    "oof4, model4, score4 = fit_lgbm(\n",
    "    x_train2, y_train2['target4'],\n",
    "    x_valid2, y_valid2['target4'],\n",
    "    params4\n",
    ")\n",
    "\n",
    "score = (score1+score2+score3+score4) / 4\n",
    "print(f'score: {score}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.044368,
     "end_time": "2021-07-26T11:14:16.995082",
     "exception": false,
     "start_time": "2021-07-26T11:14:16.950714",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Cat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:14:17.101601Z",
     "iopub.status.busy": "2021-07-26T11:14:17.100913Z",
     "iopub.status.idle": "2021-07-26T11:29:55.386184Z",
     "shell.execute_reply": "2021-07-26T11:29:55.385613Z"
    },
    "papermill": {
     "duration": 938.347381,
     "end_time": "2021-07-26T11:29:55.386423",
     "exception": false,
     "start_time": "2021-07-26T11:14:17.039042",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "********** <_io.BufferedReader name='../input/mlb-lightgbm-training/mymodel_lgb_2.pkl'> **********\n",
      "mae: 1.7495870137280762\n",
      "********** <_io.BufferedReader name='../input/mlb-catboost-training/mymodel_cb_2.pkl'> **********\n",
      "mae: 1.8092167805205939\n",
      "********** <_io.BufferedReader name='../input/mlb-lightgbm-training/mymodel_lgb_1.pkl'> **********\n",
      "mae: 0.613446416791378\n",
      "********** <_io.BufferedReader name='../input/mlb-catboost-training/mymodel_cb_1.pkl'> **********\n",
      "mae: 0.6265251914020156\n",
      "********** <_io.BufferedReader name='../input/mlb-lightgbm-training/mymodel_lgb_3.pkl'> **********\n",
      "mae: 0.7371395237381603\n",
      "********** <_io.BufferedReader name='../input/mlb-catboost-training/mymodel_cb_3.pkl'> **********\n",
      "mae: 0.7406471371942968\n",
      "********** <_io.BufferedReader name='../input/mlb-lightgbm-training/mymodel_lgb_4.pkl'> **********\n",
      "mae: 0.8178008821616221\n",
      "********** <_io.BufferedReader name='../input/mlb-catboost-training/mymodel_cb_4.pkl'> **********\n",
      "mae: 0.846635209540955\n",
      "LightGBM score: 0.9794934591048092\n",
      "Catboost score: 1.0057560796644653\n"
     ]
    }
   ],
   "source": [
    "import pickle\n",
    "from catboost import CatBoostRegressor\n",
    "\n",
    "def fit_lgbm(x_train, y_train, x_valid, y_valid, target, params: dict=None, verbose=100):\n",
    "    oof_pred_lgb = np.zeros(len(y_valid), dtype=np.float32)\n",
    "    oof_pred_cat = np.zeros(len(y_valid), dtype=np.float32)\n",
    "    \n",
    "    if os.path.isfile(f'../input/mlb-lightgbm-training/mymodel_lgb_{target}.pkl'):\n",
    "        with open(f'../input/mlb-lightgbm-training/mymodel_lgb_{target}.pkl', 'rb') as fin:\n",
    "            model = pickle.load(fin)\n",
    "            oof_pred_lgb = model.predict(x_valid)\n",
    "            score_lgb = mean_absolute_error(oof_pred_lgb, y_valid)\n",
    "            print('*'*10,fin,'*'*10)\n",
    "            print('mae:', score_lgb)\n",
    "    else:\n",
    "        with open(f'mymodel_lgb_{target}.pkl', 'wb') as handle:\n",
    "            pickle.dump(model, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
    "    \n",
    "\n",
    "    \n",
    "    if os.path.isfile(f'../input/mlb-catboost-training/mymodel_cb_{target}.pkl'):\n",
    "        with open(f'../input/mlb-catboost-training/mymodel_cb_{target}.pkl', 'rb') as fin:\n",
    "            model_cb = pickle.load(fin)\n",
    "            oof_pred_cat = model_cb.predict(x_valid)\n",
    "            score_cat = mean_absolute_error(oof_pred_cat, y_valid)\n",
    "            print('*'*10,fin,'*'*10)\n",
    "            print('mae:', score_cat)\n",
    "    \n",
    "    else:\n",
    "\n",
    "        with open(f'model_cb_{target}.pkl', 'wb') as handle:\n",
    "            pickle.dump(model_cb, handle, protocol=pickle.HIGHEST_PROTOCOL)\n",
    "    \n",
    "\n",
    "    return oof_pred_lgb, model, oof_pred_cat, model_cb, score_lgb, score_cat\n",
    "\n",
    "\n",
    "params = {\n",
    "'boosting_type': 'gbdt',\n",
    "'objective':'mae',\n",
    "'subsample': 0.6,\n",
    "'subsample_freq': 1,\n",
    "'learning_rate': 0.03,\n",
    "'num_leaves': 2**11-1,\n",
    "'min_data_in_leaf': 2**12-1,\n",
    "'feature_fraction': 0.6,\n",
    "'max_bin': 100,\n",
    "'n_estimators': 2500,\n",
    "'boost_from_average': False,\n",
    "\"random_seed\":2021,\n",
    "}\n",
    "\n",
    "oof_pred_lgb2, model_lgb2, oof_pred_cat2, model_cb2, score_lgb2, score_cat2 = fit_lgbm(\n",
    "    x_train1, y_train1['target2'],\n",
    "    x_valid1, y_valid1['target2'],\n",
    "    2, params\n",
    ")\n",
    "\n",
    "oof_pred_lgb1, model_lgb1, oof_pred_cat1, model_cb1, score_lgb1, score_cat1 = fit_lgbm(\n",
    "    x_train1, y_train1['target1'],\n",
    "    x_valid1, y_valid1['target1'],\n",
    "    1, params\n",
    ")\n",
    "\n",
    "oof_pred_lgb3, model_lgb3, oof_pred_cat3, model_cb3, score_lgb3, score_cat3 = fit_lgbm(\n",
    "    x_train1, y_train1['target3'],\n",
    "    x_valid1, y_valid1['target3'],\n",
    "    3, params\n",
    ")\n",
    "oof_pred_lgb4, model_lgb4, oof_pred_cat4, model_cb4, score_lgb4, score_cat4= fit_lgbm(\n",
    "    x_train1, y_train1['target4'],\n",
    "    x_valid1, y_valid1['target4'],\n",
    "    4, params\n",
    ")\n",
    "\n",
    "score = (score_lgb1+score_lgb2+score_lgb3+score_lgb4) / 4\n",
    "print(f'LightGBM score: {score}')\n",
    "\n",
    "score = (score_cat1+score_cat2+score_cat3+score_cat4) / 4\n",
    "print(f'Catboost score: {score}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.054535,
     "end_time": "2021-07-26T11:29:55.501236",
     "exception": false,
     "start_time": "2021-07-26T11:29:55.446701",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# ANN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:29:55.620861Z",
     "iopub.status.busy": "2021-07-26T11:29:55.620092Z",
     "iopub.status.idle": "2021-07-26T11:29:55.633123Z",
     "shell.execute_reply": "2021-07-26T11:29:55.633643Z"
    },
    "papermill": {
     "duration": 0.066731,
     "end_time": "2021-07-26T11:29:55.633849",
     "exception": false,
     "start_time": "2021-07-26T11:29:55.567118",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "players_cols = ['playerId', 'primaryPositionName']\n",
    "rosters_cols = ['playerId', 'teamId', 'status']\n",
    "scores_cols = ['playerId', 'battingOrder', 'gamesPlayedBatting', 'flyOuts',\n",
    "       'groundOuts', 'runsScored', 'doubles', 'triples', 'homeRuns',\n",
    "       'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch',\n",
    "       'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay',\n",
    "       'groundIntoTriplePlay', 'plateAppearances', 'totalBases', 'rbi',\n",
    "       'leftOnBase', 'sacBunts', 'sacFlies', 'catchersInterference',\n",
    "       'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching',\n",
    "       'completeGamesPitching', 'shutoutsPitching', 'winsPitching',\n",
    "       'lossesPitching', 'flyOutsPitching', 'airOutsPitching',\n",
    "       'groundOutsPitching', 'runsPitching', 'doublesPitching',\n",
    "       'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching',\n",
    "       'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching',\n",
    "       'hitByPitchPitching', 'atBatsPitching', 'caughtStealingPitching',\n",
    "       'stolenBasesPitching', 'inningsPitched', 'saveOpportunities',\n",
    "       'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 'balls',\n",
    "       'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching',\n",
    "       'rbiPitching', 'gamesFinishedPitching', 'inheritedRunners',\n",
    "       'inheritedRunnersScored', 'catchersInterferencePitching',\n",
    "       'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves',\n",
    "       'assists', 'putOuts', 'errors', 'chances']\n",
    "\n",
    "null = np.nan\n",
    "true = True\n",
    "false = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:29:55.782230Z",
     "iopub.status.busy": "2021-07-26T11:29:55.769001Z",
     "iopub.status.idle": "2021-07-26T11:31:40.775987Z",
     "shell.execute_reply": "2021-07-26T11:31:40.776546Z"
    },
    "papermill": {
     "duration": 105.090934,
     "end_time": "2021-07-26T11:31:40.776753",
     "exception": false,
     "start_time": "2021-07-26T11:29:55.685819",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2506176, 6)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20/20 [01:08<00:00,  3.43s/it]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(2506176, 87)\n",
      "(2464956, 87)\n",
      "Model: \"ANN\"\n",
      "_________________________________________________________________\n",
      "Layer (type)                 Output Shape              Param #   \n",
      "=================================================================\n",
      "inputs (InputLayer)          [(None, 84)]              0         \n",
      "_________________________________________________________________\n",
      "d1 (Dense)                   (None, 50)                4250      \n",
      "_________________________________________________________________\n",
      "d2 (Dense)                   (None, 50)                2550      \n",
      "_________________________________________________________________\n",
      "preds (Dense)                (None, 4)                 204       \n",
      "=================================================================\n",
      "Total params: 7,004\n",
      "Trainable params: 7,004\n",
      "Non-trainable params: 0\n",
      "_________________________________________________________________\n",
      "None\n",
      "FOLD: 0\n",
      "10/10 [==============================] - 0s 17ms/step\n",
      "FOLD: 1\n",
      "10/10 [==============================] - 0s 14ms/step\n",
      "FOLD: 2\n",
      "10/10 [==============================] - 0s 15ms/step\n",
      "FOLD: 3\n",
      "10/10 [==============================] - 0s 14ms/step\n",
      "FOLD: 4\n",
      "10/10 [==============================] - 0s 16ms/step\n",
      "mae: 0.7727517316297969\n",
      "mse: 3.9314386784209567\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from datetime import timedelta\n",
    "from tqdm import tqdm\n",
    "import gc\n",
    "from functools import reduce\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "\n",
    "ROOT_DIR = \"../input/mlb-player-digital-engagement-forecasting\"\n",
    "\n",
    "#=======================#\n",
    "def flatten(df, col):\n",
    "    du = (df.pivot(index=\"playerId\", columns=\"EvalDate\", \n",
    "               values=col).add_prefix(f\"{col}_\").\n",
    "      rename_axis(None, axis=1).reset_index())\n",
    "    return du\n",
    "#============================#\n",
    "def reducer(left, right):\n",
    "    return left.merge(right, on=\"playerId\")\n",
    "#========================\n",
    "\n",
    "TGTCOLS = [\"target1\",\"target2\",\"target3\",\"target4\"]\n",
    "def train_lag(df, lag=1):\n",
    "    dp = df[[\"playerId\",\"EvalDate\"]+TGTCOLS].copy()\n",
    "    dp[\"EvalDate\"]  =dp[\"EvalDate\"] + timedelta(days=lag) \n",
    "    df = df.merge(dp, on=[\"playerId\", \"EvalDate\"], suffixes=[\"\",f\"_{lag}\"], how=\"left\")\n",
    "    return df\n",
    "#=================================\n",
    "def test_lag(sub):\n",
    "    sub[\"playerId\"] = sub[\"date_playerId\"].apply(lambda s: int(  s.split(\"_\")[1]  ) )\n",
    "    assert sub.date.nunique() == 1\n",
    "    dte = sub[\"date\"].unique()[0]\n",
    "    \n",
    "    eval_dt = pd.to_datetime(dte, format=\"%Y%m%d\")\n",
    "    dtes = [eval_dt + timedelta(days=-k) for k in LAGS]\n",
    "    mp_dtes = {eval_dt + timedelta(days=-k):k for k in LAGS}\n",
    "    \n",
    "    sl = LAST.loc[LAST.EvalDate.between(dtes[-1], dtes[0]), [\"EvalDate\",\"playerId\"]+TGTCOLS].copy()\n",
    "    sl[\"EvalDate\"] = sl[\"EvalDate\"].map(mp_dtes)\n",
    "    du = [flatten(sl, col) for col in TGTCOLS]\n",
    "    du = reduce(reducer, du)\n",
    "    return du, eval_dt\n",
    "    #\n",
    "#===============\n",
    "\n",
    "tr = pd.read_csv(\"../input/my-mlb-data/target.csv\")\n",
    "print(tr.shape)\n",
    "gc.collect()\n",
    "\n",
    "tr[\"EvalDate\"] = pd.to_datetime(tr[\"EvalDate\"])\n",
    "tr[\"EvalDate\"] = tr[\"EvalDate\"] + timedelta(days=-1)\n",
    "tr[\"EvalYear\"] = tr[\"EvalDate\"].dt.year\n",
    "\n",
    "MED_DF = tr.groupby([\"playerId\",\"EvalYear\"])[TGTCOLS].median().reset_index()\n",
    "MEDCOLS = [\"tgt1_med\",\"tgt2_med\", \"tgt3_med\", \"tgt4_med\"]\n",
    "MED_DF.columns = [\"playerId\",\"EvalYear\"] + MEDCOLS\n",
    "\n",
    "LAGS = list(range(1,21))\n",
    "FECOLS = [f\"{col}_{lag}\" for lag in reversed(LAGS) for col in TGTCOLS]\n",
    "\n",
    "for lag in tqdm(LAGS):\n",
    "    tr = train_lag(tr, lag=lag)\n",
    "    gc.collect()\n",
    "#===========\n",
    "tr = tr.sort_values(by=[\"playerId\", \"EvalDate\"])\n",
    "print(tr.shape)\n",
    "tr = tr.dropna()\n",
    "print(tr.shape)\n",
    "tr = tr.merge(MED_DF, on=[\"playerId\",\"EvalYear\"])\n",
    "gc.collect()\n",
    "\n",
    "X = tr[FECOLS+MEDCOLS].values\n",
    "y = tr[TGTCOLS].values\n",
    "cl = tr[\"playerId\"].values\n",
    "\n",
    "NFOLDS = 5\n",
    "skf = StratifiedKFold(n_splits=NFOLDS)\n",
    "folds = skf.split(X, cl)\n",
    "folds = list(folds)\n",
    "\n",
    "import tensorflow as tf\n",
    "import tensorflow.keras.layers as L\n",
    "import tensorflow.keras.models as M\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
    "from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping\n",
    "\n",
    "tf.random.set_seed(2021)\n",
    "\n",
    "def make_model(n_in):\n",
    "    inp = L.Input(name=\"inputs\", shape=(n_in,))\n",
    "    x = L.Dense(50, activation=\"relu\", name=\"d1\")(inp)\n",
    "    x = L.Dense(50, activation=\"relu\", name=\"d2\")(x)\n",
    "    preds = L.Dense(4, activation=\"linear\", name=\"preds\")(x)\n",
    "    \n",
    "    model = M.Model(inp, preds, name=\"ANN\")\n",
    "    model.compile(loss=\"mean_absolute_error\", optimizer=\"adam\")\n",
    "    return model\n",
    "\n",
    "net = make_model(X.shape[1])\n",
    "print(net.summary())\n",
    "\n",
    "oof = np.zeros(y.shape)\n",
    "nets = []\n",
    "for idx in range(NFOLDS):\n",
    "    print(\"FOLD:\", idx)\n",
    "    tr_idx, val_idx = folds[idx]\n",
    "    ckpt = ModelCheckpoint(f\"../input/mlb-ann-training/w{idx}.h5\", monitor='val_loss', verbose=1, save_best_only=True,mode='min')\n",
    "    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,patience=3, min_lr=0.0001)\n",
    "    es = EarlyStopping(monitor='val_loss', patience=5)\n",
    "    reg = make_model(X.shape[1])\n",
    "#     reg.fit(X[tr_idx], y[tr_idx], epochs=10, batch_size=30_000, validation_data=(X[val_idx], y[val_idx]),\n",
    "#             verbose=1, callbacks=[ckpt, reduce_lr, es])\n",
    "    reg.load_weights(f\"../input/mlb-ann-training/w{idx}.h5\")\n",
    "    oof[val_idx] = reg.predict(X[val_idx], batch_size=50_000, verbose=1)\n",
    "    nets.append(reg)\n",
    "    gc.collect()\n",
    "\n",
    "mae = mean_absolute_error(y, oof)\n",
    "mse = mean_squared_error(y, oof, squared=False)\n",
    "print(\"mae:\", mae)\n",
    "print(\"mse:\", mse)\n",
    "\n",
    "# Historical information to use in prediction time\n",
    "bound_dt = pd.to_datetime(\"2021-01-01\")\n",
    "LAST = tr.loc[tr.EvalDate>bound_dt].copy()\n",
    "\n",
    "LAST_MED_DF = MED_DF.loc[MED_DF.EvalYear==2021].copy()\n",
    "LAST_MED_DF.drop(\"EvalYear\", axis=1, inplace=True)\n",
    "del tr\n",
    "\n",
    "#\"\"\"\n",
    "import mlb\n",
    "FE = []; SUB = [];"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "papermill": {
     "duration": 0.064615,
     "end_time": "2021-07-26T11:31:40.909832",
     "exception": false,
     "start_time": "2021-07-26T11:31:40.845217",
     "status": "completed"
    },
    "tags": []
   },
   "source": [
    "# Predict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:31:41.068184Z",
     "iopub.status.busy": "2021-07-26T11:31:41.048554Z",
     "iopub.status.idle": "2021-07-26T11:32:04.799825Z",
     "shell.execute_reply": "2021-07-26T11:32:04.800318Z"
    },
    "papermill": {
     "duration": 23.827166,
     "end_time": "2021-07-26T11:32:04.800508",
     "exception": false,
     "start_time": "2021-07-26T11:31:40.973342",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.\n"
     ]
    }
   ],
   "source": [
    "import copy\n",
    "\n",
    "env = mlb.make_env() # initialize the environment\n",
    "iter_test = env.iter_test() # iterator which loops over each date in test set\n",
    "\n",
    "for (test_df, sample_prediction_df) in iter_test: # make predictions here\n",
    "    \n",
    "    sub = copy.deepcopy(sample_prediction_df.reset_index())\n",
    "    sample_prediction_df = copy.deepcopy(sample_prediction_df.reset_index(drop=True))\n",
    "    \n",
    "    # LGBM summit\n",
    "    # creat dataset\n",
    "    sample_prediction_df['playerId'] = sample_prediction_df['date_playerId']\\\n",
    "                                        .map(lambda x: int(x.split('_')[1]))\n",
    "    # Dealing with missing values\n",
    "    if test_df['rosters'].iloc[0] == test_df['rosters'].iloc[0]:\n",
    "        test_rosters = pd.DataFrame(eval(test_df['rosters'].iloc[0]))\n",
    "    else:\n",
    "        test_rosters = pd.DataFrame({'playerId': sample_prediction_df['playerId']})\n",
    "        for col in rosters.columns:\n",
    "            if col == 'playerId': continue\n",
    "            test_rosters[col] = np.nan\n",
    "            \n",
    "    if test_df['playerBoxScores'].iloc[0] == test_df['playerBoxScores'].iloc[0]:\n",
    "        test_scores = pd.DataFrame(eval(test_df['playerBoxScores'].iloc[0]))\n",
    "    else:\n",
    "        test_scores = pd.DataFrame({'playerId': sample_prediction_df['playerId']})\n",
    "        for col in scores.columns:\n",
    "            if col == 'playerId': continue\n",
    "            test_scores[col] = np.nan\n",
    "    test_scores = test_scores.groupby('playerId').sum().reset_index()\n",
    "    test = sample_prediction_df[['playerId']].copy()\n",
    "    test = test.merge(players[players_cols], on='playerId', how='left')\n",
    "    test = test.merge(test_rosters[rosters_cols], on='playerId', how='left')\n",
    "    test = test.merge(test_scores[scores_cols], on='playerId', how='left')\n",
    "    test = test.merge(player_target_stats, how='inner', left_on=[\"playerId\"],right_on=[\"playerId\"])\n",
    "    \n",
    "\n",
    "    test['label_playerId'] = test['playerId'].map(player2num)\n",
    "    test['label_primaryPositionName'] = test['primaryPositionName'].map(position2num)\n",
    "    test['label_teamId'] = test['teamId'].map(teamid2num)\n",
    "    test['label_status'] = test['status'].map(status2num)\n",
    "    \n",
    "    test_X = test[feature_cols]\n",
    "    # predict\n",
    "    pred1 = model1.predict(test_X)\n",
    "    \n",
    "    # predict\n",
    "    pred_lgd1 = model_lgb1.predict(test_X)\n",
    "    pred_lgd2 = model_lgb2.predict(test_X)\n",
    "    pred_lgd3 = model_lgb3.predict(test_X)\n",
    "    pred_lgd4 = model_lgb4.predict(test_X)\n",
    "    \n",
    "    pred_cat1 = model_cb1.predict(test_X)\n",
    "    pred_cat2 = model_cb2.predict(test_X)\n",
    "    pred_cat3 = model_cb3.predict(test_X)\n",
    "    pred_cat4 = model_cb4.predict(test_X)\n",
    "    \n",
    "    test['target1'] = np.clip(pred1,0,100)\n",
    "    test_X = test[feature_cols2]\n",
    "\n",
    "    pred2 = model2.predict(test_X)\n",
    "    pred3 = model3.predict(test_X)\n",
    "    pred4 = model4.predict(test_X)\n",
    "    \n",
    "    # merge submission\n",
    "    sample_prediction_df['target1'] = 1.00*np.clip(pred1, 0, 100)+0.00*np.clip(pred_lgd1, 0, 100)+0.00*np.clip(pred_cat1, 0, 100)\n",
    "    sample_prediction_df['target2'] = 0.05*np.clip(pred2, 0, 100)+0.54*np.clip(pred_lgd2, 0, 100)+0.405*np.clip(pred_cat2, 0, 100)\n",
    "    sample_prediction_df['target3'] = 0.76*np.clip(pred3, 0, 100)+0.14*np.clip(pred_lgd3, 0, 100)+0.10*np.clip(pred_cat3, 0, 100)\n",
    "    sample_prediction_df['target4'] = 0.77*np.clip(pred4, 0, 100)+0.13*np.clip(pred_lgd4, 0, 100)+0.10*np.clip(pred_cat4, 0, 100)\n",
    "    sample_prediction_df = sample_prediction_df.fillna(0.)\n",
    "    del sample_prediction_df['playerId']\n",
    "    # TF summit\n",
    "    # Features computation at Evaluation Date\n",
    "    sub_fe, eval_dt = test_lag(sub)\n",
    "    sub_fe = sub_fe.merge(LAST_MED_DF, on=\"playerId\", how=\"left\")\n",
    "    sub_fe = sub_fe.fillna(0.)\n",
    "    \n",
    "    _preds = 0.\n",
    "    for reg in nets:\n",
    "        _preds += reg.predict(sub_fe[FECOLS + MEDCOLS]) / NFOLDS\n",
    "    sub_fe[TGTCOLS] = np.clip(_preds, 0, 100)\n",
    "    sub.drop([\"date\"]+TGTCOLS, axis=1, inplace=True)\n",
    "    sub = sub.merge(sub_fe[[\"playerId\"]+TGTCOLS], on=\"playerId\", how=\"left\")\n",
    "    sub.drop(\"playerId\", axis=1, inplace=True)\n",
    "    sub = sub.fillna(0.)\n",
    "    # Blending\n",
    "    blend = pd.concat(\n",
    "        [sub[['date_playerId']],\n",
    "        (0.22*sub.drop('date_playerId', axis=1) + 0.78*sample_prediction_df.drop('date_playerId', axis=1))],\n",
    "        axis=1\n",
    "    )\n",
    "    env.predict(blend)\n",
    "    # Update Available information\n",
    "    sub_fe[\"EvalDate\"] = eval_dt\n",
    "    #sub_fe.drop(MEDCOLS, axis=1, inplace=True)\n",
    "    LAST = LAST.append(sub_fe)\n",
    "    LAST = LAST.drop_duplicates(subset=[\"EvalDate\",\"playerId\"], keep=\"last\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:32:04.944978Z",
     "iopub.status.busy": "2021-07-26T11:32:04.944303Z",
     "iopub.status.idle": "2021-07-26T11:32:04.957672Z",
     "shell.execute_reply": "2021-07-26T11:32:04.958218Z"
    },
    "papermill": {
     "duration": 0.094386,
     "end_time": "2021-07-26T11:32:04.958407",
     "exception": false,
     "start_time": "2021-07-26T11:32:04.864021",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_playerId</th>\n",
       "      <th>target1</th>\n",
       "      <th>target2</th>\n",
       "      <th>target3</th>\n",
       "      <th>target4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20210501_488726</td>\n",
       "      <td>1.417833</td>\n",
       "      <td>5.650557</td>\n",
       "      <td>6.859939e-02</td>\n",
       "      <td>1.995048</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20210501_605218</td>\n",
       "      <td>0.003536</td>\n",
       "      <td>0.396355</td>\n",
       "      <td>1.702673e-03</td>\n",
       "      <td>0.868064</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20210501_621563</td>\n",
       "      <td>0.099256</td>\n",
       "      <td>2.386864</td>\n",
       "      <td>7.695223e-02</td>\n",
       "      <td>0.771513</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20210501_670084</td>\n",
       "      <td>0.022301</td>\n",
       "      <td>0.878644</td>\n",
       "      <td>6.095328e-04</td>\n",
       "      <td>0.277289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20210501_670970</td>\n",
       "      <td>0.010307</td>\n",
       "      <td>0.251098</td>\n",
       "      <td>2.952593e-02</td>\n",
       "      <td>0.118300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1182</th>\n",
       "      <td>20210501_596049</td>\n",
       "      <td>0.000276</td>\n",
       "      <td>0.009644</td>\n",
       "      <td>2.608818e-12</td>\n",
       "      <td>0.035421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1183</th>\n",
       "      <td>20210501_642851</td>\n",
       "      <td>0.000176</td>\n",
       "      <td>0.041358</td>\n",
       "      <td>1.818041e-07</td>\n",
       "      <td>0.079906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1184</th>\n",
       "      <td>20210501_596071</td>\n",
       "      <td>0.000451</td>\n",
       "      <td>0.083687</td>\n",
       "      <td>1.183390e-04</td>\n",
       "      <td>0.070381</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1185</th>\n",
       "      <td>20210501_664901</td>\n",
       "      <td>0.003308</td>\n",
       "      <td>0.309083</td>\n",
       "      <td>3.068393e-02</td>\n",
       "      <td>0.199449</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1186</th>\n",
       "      <td>20210501_605525</td>\n",
       "      <td>0.002655</td>\n",
       "      <td>0.555847</td>\n",
       "      <td>4.771943e-04</td>\n",
       "      <td>0.116648</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1187 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date_playerId   target1   target2       target3   target4\n",
       "0     20210501_488726  1.417833  5.650557  6.859939e-02  1.995048\n",
       "1     20210501_605218  0.003536  0.396355  1.702673e-03  0.868064\n",
       "2     20210501_621563  0.099256  2.386864  7.695223e-02  0.771513\n",
       "3     20210501_670084  0.022301  0.878644  6.095328e-04  0.277289\n",
       "4     20210501_670970  0.010307  0.251098  2.952593e-02  0.118300\n",
       "...               ...       ...       ...           ...       ...\n",
       "1182  20210501_596049  0.000276  0.009644  2.608818e-12  0.035421\n",
       "1183  20210501_642851  0.000176  0.041358  1.818041e-07  0.079906\n",
       "1184  20210501_596071  0.000451  0.083687  1.183390e-04  0.070381\n",
       "1185  20210501_664901  0.003308  0.309083  3.068393e-02  0.199449\n",
       "1186  20210501_605525  0.002655  0.555847  4.771943e-04  0.116648\n",
       "\n",
       "[1187 rows x 5 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.concat(\n",
    "    [sub[['date_playerId']],\n",
    "    (sub.drop('date_playerId', axis=1) + sample_prediction_df.drop('date_playerId', axis=1)) / 2],\n",
    "    axis=1\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-07-26T11:32:05.091936Z",
     "iopub.status.busy": "2021-07-26T11:32:05.090877Z",
     "iopub.status.idle": "2021-07-26T11:32:05.108982Z",
     "shell.execute_reply": "2021-07-26T11:32:05.108454Z"
    },
    "papermill": {
     "duration": 0.086599,
     "end_time": "2021-07-26T11:32:05.109124",
     "exception": false,
     "start_time": "2021-07-26T11:32:05.022525",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>date_playerId</th>\n",
       "      <th>target1</th>\n",
       "      <th>target2</th>\n",
       "      <th>target3</th>\n",
       "      <th>target4</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>20210501_488726</td>\n",
       "      <td>2.728486e+00</td>\n",
       "      <td>8.022509</td>\n",
       "      <td>1.070538e-01</td>\n",
       "      <td>2.425154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>20210501_605218</td>\n",
       "      <td>2.152436e-03</td>\n",
       "      <td>0.566561</td>\n",
       "      <td>3.182109e-03</td>\n",
       "      <td>0.828016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>20210501_621563</td>\n",
       "      <td>1.422361e-01</td>\n",
       "      <td>2.763233</td>\n",
       "      <td>1.348766e-02</td>\n",
       "      <td>0.880154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>20210501_670084</td>\n",
       "      <td>2.031692e-03</td>\n",
       "      <td>0.647363</td>\n",
       "      <td>1.219066e-03</td>\n",
       "      <td>0.119984</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>20210501_670970</td>\n",
       "      <td>5.941349e-04</td>\n",
       "      <td>0.162923</td>\n",
       "      <td>9.765327e-03</td>\n",
       "      <td>0.048072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1182</th>\n",
       "      <td>20210501_596049</td>\n",
       "      <td>9.109051e-15</td>\n",
       "      <td>0.016895</td>\n",
       "      <td>5.217635e-12</td>\n",
       "      <td>0.032081</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1183</th>\n",
       "      <td>20210501_642851</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.072513</td>\n",
       "      <td>3.636082e-07</td>\n",
       "      <td>0.081339</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1184</th>\n",
       "      <td>20210501_596071</td>\n",
       "      <td>1.820810e-04</td>\n",
       "      <td>0.109463</td>\n",
       "      <td>1.856178e-05</td>\n",
       "      <td>0.085482</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1185</th>\n",
       "      <td>20210501_664901</td>\n",
       "      <td>6.615386e-03</td>\n",
       "      <td>0.358820</td>\n",
       "      <td>3.113339e-03</td>\n",
       "      <td>0.202974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1186</th>\n",
       "      <td>20210501_605525</td>\n",
       "      <td>8.827960e-04</td>\n",
       "      <td>0.593431</td>\n",
       "      <td>4.494436e-10</td>\n",
       "      <td>0.107011</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1187 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "        date_playerId       target1   target2       target3   target4\n",
       "0     20210501_488726  2.728486e+00  8.022509  1.070538e-01  2.425154\n",
       "1     20210501_605218  2.152436e-03  0.566561  3.182109e-03  0.828016\n",
       "2     20210501_621563  1.422361e-01  2.763233  1.348766e-02  0.880154\n",
       "3     20210501_670084  2.031692e-03  0.647363  1.219066e-03  0.119984\n",
       "4     20210501_670970  5.941349e-04  0.162923  9.765327e-03  0.048072\n",
       "...               ...           ...       ...           ...       ...\n",
       "1182  20210501_596049  9.109051e-15  0.016895  5.217635e-12  0.032081\n",
       "1183  20210501_642851  0.000000e+00  0.072513  3.636082e-07  0.081339\n",
       "1184  20210501_596071  1.820810e-04  0.109463  1.856178e-05  0.085482\n",
       "1185  20210501_664901  6.615386e-03  0.358820  3.113339e-03  0.202974\n",
       "1186  20210501_605525  8.827960e-04  0.593431  4.494436e-10  0.107011\n",
       "\n",
       "[1187 rows x 5 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sample_prediction_df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 3924.98772,
   "end_time": "2021-07-26T11:32:07.079842",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2021-07-26T10:26:42.092122",
   "version": "2.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}