{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://github.com/christopherjenness/NBA-prediction\n",
    "https://github.com/fastai/courses/blob/master/deeplearning1/nbs/utils.py"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Using TensorFlow backend.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPython 3.5.3\n",
      "IPython 5.1.0\n",
      "\n",
      "numpy 1.11.3\n",
      "pandas 0.19.2\n",
      "keras 1.2.0\n",
      "tensorflow 0.10.0rc0\n",
      "\n",
      "compiler   : GCC 4.4.7 20120313 (Red Hat 4.4.7-1)\n",
      "system     : Linux\n",
      "release    : 4.4.0-72-generic\n",
      "machine    : x86_64\n",
      "processor  : x86_64\n",
      "CPU cores  : 4\n",
      "interpreter: 64bit\n",
      "Git hash   : 60c28751d01a6e854bbcdafc490acf97fa1c15da\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -v -m -p numpy,pandas,keras,tensorflow -g"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "\n",
    "df = pd.read_pickle('./2016_scores.pkl')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>away_score</th>\n",
       "      <th>away_team</th>\n",
       "      <th>home_score</th>\n",
       "      <th>home_team</th>\n",
       "      <th>difference</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>5</td>\n",
       "      <td>Toronto Blue Jays</td>\n",
       "      <td>3</td>\n",
       "      <td>Tampa Bay Rays</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>St. Louis Cardinals</td>\n",
       "      <td>4</td>\n",
       "      <td>Pittsburgh Pirates</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>New York Mets</td>\n",
       "      <td>4</td>\n",
       "      <td>Kansas City Royals</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>Seattle Mariners</td>\n",
       "      <td>3</td>\n",
       "      <td>Texas Rangers</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Toronto Blue Jays</td>\n",
       "      <td>3</td>\n",
       "      <td>Tampa Bay Rays</td>\n",
       "      <td>-2</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   away_score            away_team  home_score           home_team  difference\n",
       "0           5    Toronto Blue Jays           3      Tampa Bay Rays          -2\n",
       "1           1  St. Louis Cardinals           4  Pittsburgh Pirates           3\n",
       "2           3        New York Mets           4  Kansas City Royals           1\n",
       "3           2     Seattle Mariners           3       Texas Rangers           1\n",
       "4           5    Toronto Blue Jays           3      Tampa Bay Rays          -2"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['difference'] = df['home_score'] - df['away_score']\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "list_of_teams = list(df['away_team'].unique()) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 101,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "idx2teamid = {teamid2idx[key]:key for key in teamid2idx.keys()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "list_of_teams = df['away_team'].unique()\n",
    "n_teams = len(list_of_teams)\n",
    "\n",
    "teamid2idx = {o:i for i,o in enumerate(list_of_teams)}\n",
    "idx2teamid = {teamid2idx[key]:key for key in teamid2idx.keys()}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "df.away_team = df.away_team.apply(lambda x: teamid2idx[x])\n",
    "df.home_team = df.home_team.apply(lambda x: teamid2idx[x])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "\n",
    "msk = np.random.rand(len(df)) < 0.8\n",
    "trn = df[msk]\n",
    "val = df[~msk]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.layers import Input, Embedding, merge\n",
    "from keras.layers.core import Flatten\n",
    "from keras.models import Model\n",
    "from keras.regularizers import l2\n",
    "from keras.optimizers import Adam"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 126,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "n_factors = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 127,
   "metadata": {},
   "outputs": [],
   "source": [
    "away_in = Input(shape=(1,), dtype='int64', name='away_in')\n",
    "home_in = Input(shape=(1,), dtype='int64', name='home_in')\n",
    "\n",
    "embedding_layer = Embedding(n_teams, n_factors, input_length=1, W_regularizer=l2(1e-4))\n",
    "\n",
    "#a = Embedding(n_teams, n_factors, input_length=1, W_regularizer=l2(1e-4))(away_in)\n",
    "#h = Embedding(n_teams, n_factors, input_length=1, W_regularizer=l2(1e-4))(home_in)\n",
    "\n",
    "a = embedding_layer(away_in)\n",
    "h = embedding_layer(home_in)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 128,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = merge([a, h], mode='dot')\n",
    "x = Flatten()(x)\n",
    "model = Model([away_in, home_in], x)\n",
    "model.compile(Adam(0.001), loss='mse')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 129,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/1\n",
      "1980/1980 [==============================] - 0s - loss: 18.7924 - val_loss: 17.6625\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f436773c8d0>"
      ]
     },
     "execution_count": 129,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=1, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 130,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.optimizer.lr=0.01"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 131,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/3\n",
      "1980/1980 [==============================] - 0s - loss: 18.7898 - val_loss: 17.6625\n",
      "Epoch 2/3\n",
      "1980/1980 [==============================] - 0s - loss: 18.7872 - val_loss: 17.6631\n",
      "Epoch 3/3\n",
      "1980/1980 [==============================] - 0s - loss: 18.7837 - val_loss: 17.6641\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f4367642748>"
      ]
     },
     "execution_count": 131,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=3, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 132,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "model.optimizer.lr=0.001"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7789 - val_loss: 17.6653\n",
      "Epoch 2/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7726 - val_loss: 17.6668\n",
      "Epoch 3/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7640 - val_loss: 17.6695\n",
      "Epoch 4/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7535 - val_loss: 17.6725\n",
      "Epoch 5/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7397 - val_loss: 17.6758\n",
      "Epoch 6/6\n",
      "1980/1980 [==============================] - 0s - loss: 18.7234 - val_loss: 17.6792\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f43676b6518>"
      ]
     },
     "execution_count": 133,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=6, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 134,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def embedding_input(name, n_in, n_out, reg):\n",
    "    inp = Input(shape=(1,), dtype='int64', name=name)\n",
    "    return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def create_bias(inp, n_in):\n",
    "    x = Embedding(n_in, 1, input_length=1)(inp)\n",
    "    return Flatten()(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [],
   "source": [
    "#away_in, a = embedding_input('away_in', n_teams, n_factors, 1e-4)\n",
    "#home_in, h = embedding_input('home_in', n_teams, n_factors, 1e-4)\n",
    "\n",
    "away_in = Input(shape=(1,), dtype='int64', name='away_in')\n",
    "home_in = Input(shape=(1,), dtype='int64', name='home_in')\n",
    "\n",
    "embedding_layer = Embedding(n_teams, n_factors, input_length=1, W_regularizer=l2(1e-4))\n",
    "\n",
    "a = embedding_layer(away_in)\n",
    "h = embedding_layer(home_in)\n",
    "\n",
    "ab = create_bias(away_in, n_teams)\n",
    "hb = create_bias(home_in, n_teams)\n",
    "\n",
    "x = merge([a, h], mode='dot')\n",
    "x = Flatten()(x)\n",
    "x = merge([x, ab], mode='sum')\n",
    "x = merge([x, hb], mode='sum')\n",
    "model = Model([away_in, home_in], x)\n",
    "model.compile(Adam(0.001), loss='mse')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 137,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/1\n",
      "1980/1980 [==============================] - 0s - loss: 18.7880 - val_loss: 17.6421\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f4367491518>"
      ]
     },
     "execution_count": 137,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=1, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.9710 - val_loss: 17.7098\n",
      "Epoch 2/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.9262 - val_loss: 17.7259\n",
      "Epoch 3/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.8813 - val_loss: 17.7452\n",
      "Epoch 4/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.8392 - val_loss: 17.7703\n",
      "Epoch 5/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.7955 - val_loss: 17.7886\n",
      "Epoch 6/6\n",
      "1980/1980 [==============================] - 0s - loss: 17.7522 - val_loss: 17.8073: 17.65\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f4367867c18>"
      ]
     },
     "execution_count": 140,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.optimizer.lr=0.01\n",
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=6, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 139,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train on 1980 samples, validate on 483 samples\n",
      "Epoch 1/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.6135 - val_loss: 17.6012\n",
      "Epoch 2/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.5813 - val_loss: 17.5995\n",
      "Epoch 3/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.5480 - val_loss: 17.5970\n",
      "Epoch 4/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.5118 - val_loss: 17.5975\n",
      "Epoch 5/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.4739 - val_loss: 17.5984\n",
      "Epoch 6/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.4330 - val_loss: 17.5997\n",
      "Epoch 7/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.3895 - val_loss: 17.6087\n",
      "Epoch 8/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.3459 - val_loss: 17.6110\n",
      "Epoch 9/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.3017 - val_loss: 17.6212\n",
      "Epoch 10/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.2525 - val_loss: 17.6283\n",
      "Epoch 11/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.2077 - val_loss: 17.6425\n",
      "Epoch 12/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.1599 - val_loss: 17.6507\n",
      "Epoch 13/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.1129 - val_loss: 17.6628\n",
      "Epoch 14/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.0640 - val_loss: 17.6769: 1\n",
      "Epoch 15/15\n",
      "1980/1980 [==============================] - 0s - loss: 18.0180 - val_loss: 17.6948\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.callbacks.History at 0x7f43673a8f60>"
      ]
     },
     "execution_count": 139,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.optimizer.lr=0.001\n",
    "model.fit([trn.away_team, trn.home_team], trn.difference, batch_size=64, nb_epoch=15, \n",
    "          validation_data=([val.away_team, val.home_team], val.difference))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "https://github.com/fastai/courses/blob/master/deeplearning1/nbs/lesson4.ipynb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 143,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(0.3362835, 'Philadelphia Phillies'),\n",
       " (0.22890224, 'Cincinnati Reds'),\n",
       " (0.22622988, 'Milwaukee Brewers'),\n",
       " (0.20452867, 'Atlanta Braves'),\n",
       " (0.16434038, 'Oakland Athletics'),\n",
       " (0.15163806, \"Arizona D'Backs\"),\n",
       " (0.14097856, 'San Diego Padres'),\n",
       " (0.1341555, 'Kansas City Royals'),\n",
       " (0.11964823, 'Colorado Rockies'),\n",
       " (0.11323795, 'New York Yankees'),\n",
       " (0.094945267, 'Chicago White Sox'),\n",
       " (0.081414096, 'Miami Marlins'),\n",
       " (0.071450919, 'Houston Astros'),\n",
       " (0.062424611, 'Detroit Tigers'),\n",
       " (0.054559465, 'Baltimore Orioles')]"
      ]
     },
     "execution_count": 143,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "get_away_bias = Model(away_in, ab)\n",
    "away_bias = get_away_bias.predict(df.away_team.unique())\n",
    "away_rating = [(b[0], idx2teamid[i]) for i,b in zip(df.away_team.unique(),away_bias)]              \n",
    "                \n",
    "sorted(away_rating,  key=lambda x: x[0], reverse=True)[:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 144,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[(-0.33137923, 'St. Louis Cardinals'),\n",
       " (-0.28681648, 'Washington Nationals'),\n",
       " (-0.27189431, 'Boston Red Sox'),\n",
       " (-0.27137929, 'Chicago Cubs'),\n",
       " (-0.26645699, 'Toronto Blue Jays'),\n",
       " (-0.24045761, 'Seattle Mariners'),\n",
       " (-0.14827146, 'San Francisco Giants'),\n",
       " (-0.095634982, 'Pittsburgh Pirates'),\n",
       " (-0.04807644, 'New York Mets'),\n",
       " (-0.023228975, 'LA Angels of Anaheim'),\n",
       " (-0.0085854754, 'Tampa Bay Rays'),\n",
       " (0.013221953, 'Texas Rangers'),\n",
       " (0.013871406, 'Los Angeles Dodgers'),\n",
       " (0.040301573, 'Cleveland Indians'),\n",
       " (0.049724065, 'Minnesota Twins')]"
      ]
     },
     "execution_count": 144,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sorted(away_rating,  key=lambda x: x[0])[:15]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda root]",
   "language": "python",
   "name": "conda-root-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}