{ "metadata": { "name": "", "signature": "sha256:0d2a6e52ed1890166418a78f69258d1e429d6926c23cfb6a5bbf8f5e4b4e6856" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#College Football Baseline 1" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Importing necessary libraries." ] }, { "cell_type": "code", "collapsed": false, "input": [ "%pylab inline\n", "\n", "from collections import defaultdict\n", "\n", "import cPickle as pickle\n", "import math\n", "import numpy as np\n", "import pandas as pd\n", "import pylab\n", "import glob\n", "import seaborn" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Populating the interactive namespace from numpy and matplotlib\n" ] } ], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "colors = seaborn.color_palette()\n", "\n", "from matplotlib import rc\n", "\n", "rc(\"figure\", facecolor=\"white\")\n", "rc(\"axes\", facecolor=\"white\")\n", "rc(\"axes\", edgecolor=\"grey\")\n", "\n", "\n", "rc(\"grid\", alpha=0.9)\n", "rc(\"grid\", linewidth=0.2)\n", "rc(\"grid\", linestyle=\":\")\n", "\n", "\n", "rc('font',**{'family':'serif','serif':['Palatino']})" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# A utility function to compare the results side-by-side\n", "def side_by_side(*objs, **kwds):\n", " from pandas.core.common import adjoin\n", " space = kwds.get('space', 4)\n", " reprs = [repr(obj).split('\\n') for obj in objs]\n", " print adjoin(space, *reprs)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 3 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating Train and Test Set" ] }, { "cell_type": "code", "collapsed": false, "input": [ "trainDF = pd.read_csv('trainCOL.csv', index_col=0)\n", "testDF = pd.read_csv('testCOL.csv', index_col=0)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "trainDF = trainDF.sort(columns='game_id').reset_index(drop=True)\n", "testDF = testDF.sort(columns='game_id').reset_index(drop=True)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 5 }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Train Data operations" ] }, { "cell_type": "code", "collapsed": false, "input": [ "trdf = trainDF[['game_id','year','home_team','away_team','home_score','away_score','winner']]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_historic_games(team, gameId, trdf):\n", " df = trdf[trdf.game_id < gameId]\n", " hdf = df[df.home_team == team]\n", " adf = df[df.away_team == team]\n", " newdf = pd.concat([hdf, adf])\n", " newdf = newdf.sort_index()\n", " try:\n", " return pd.concat([hdf, adf])\n", " except:\n", " return False" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_points(team, df):\n", " pFor = []\n", " pAgainst = []\n", " for r, v in df.iterrows():\n", " hTeam = v.home_team\n", " aTeam = v.away_team\n", " hScore = v.home_score\n", " aScore = v.away_score\n", " if (team == hTeam):\n", " pFor.append(hScore)\n", " pAgainst.append(aScore)\n", " elif (team == aTeam):\n", " pFor.append(aScore)\n", " pAgainst.append(hScore)\n", " return (sum(pFor)/len(df), sum(pAgainst)/len(df))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "for i in xrange(1, 50):\n", " lst = []\n", " for r, v in trdf.iterrows():\n", " gameId = v.game_id\n", " home = v.home_team\n", " away = v.away_team\n", " homehGames = get_historic_games(home, gameId, trdf)\n", " awayhGames = get_historic_games(away, gameId, trdf)\n", " if (isinstance(homehGames, pd.DataFrame) and isinstance(awayhGames, pd.DataFrame)):\n", " if (len(homehGames.index) >= i and len(awayhGames.index) >= i):\n", " homehGames = homehGames[-i:]\n", " awayhGames = awayhGames[-i:]\n", " hFor, hAgainst = get_points(home, homehGames)\n", " aFor, aAgainst = get_points(away, awayhGames)\n", " else:\n", " hFor, hAgainst, aFor, aAgainst = 0, 0, 0, 0\n", " else:\n", " hFor, hAgainst, aFor, aAgainst = 0, 0, 0, 0\n", " v = v.append(pd.Series([hFor, hAgainst, aFor, aAgainst], index=['hFor', 'hAgainst', 'aFor', 'aAgainst'], name=r))\n", " lst.append(pd.DataFrame(v).T)\n", " trdfn = pd.concat(lst)\n", " csv_name = \"baseline1/data_iter_\"+str(i)+\".csv\"\n", " trdfn.to_csv(csv_name)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_diff(hFor, hAgainst, aFor, aAgainst):\n", " if sum([hFor, hAgainst, aFor, aAgainst]) > 0:\n", " return (hFor + aAgainst)/2 - (aFor + hAgainst)/2\n", " else:\n", " return 0" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_predicted_winner(diff_xy, home_team, away_team):\n", " if diff_xy >= 0:\n", " return home_team\n", " else:\n", " return away_team" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "def is_prediction_correct(winner, predicted_winner):\n", " if winner == predicted_winner:\n", " return True\n", " else:\n", " return False" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "dfs = []\n", "for i in xrange(1, 50):\n", " csv_name = \"baseline1/data_iter_\"+str(i)+\".csv\"\n", " trdfn = pd.read_csv(csv_name, index_col=0)\n", " years = set(trdfn.year)\n", " lst = []\n", " for year in years:\n", " trdfny = trdfn[trdfn.year == year]\n", " trdfny['diff_xy'] = trdfny.apply(lambda x: get_diff(x['hFor'], x['hAgainst'], x['aFor'], x['aAgainst']), axis=1)\n", " trdfny['predicted_winner'] = trdfny.apply(lambda x: get_predicted_winner(x['diff_xy'], x['home_team'], x['away_team']), axis=1)\n", " trdfny['correct_prediction'] = trdfny.apply(lambda x: is_prediction_correct(x['winner'], x['predicted_winner']), axis=1)\n", " lst.append(float(sum(trdfny.correct_prediction))*100/len(trdfny.index))\n", " dfs.append(pd.DataFrame(lst, columns=[i], index=years).T)\n", "ansB1 = pd.concat(dfs) " ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "tmpB1 = ansB1.T.mean()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "tmpB1" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 42, "text": [ "1 58.679652\n", "2 62.370012\n", "3 63.960777\n", "4 64.882653\n", "5 65.617572\n", "6 65.579237\n", "7 65.843909\n", "8 66.775720\n", "9 66.865880\n", "10 67.329603\n", "11 67.123369\n", "12 67.328187\n", "13 67.439274\n", "14 67.169722\n", "15 67.578752\n", "16 67.620286\n", "17 67.446574\n", "18 67.743059\n", "19 67.651776\n", "20 67.725993\n", "21 67.773268\n", "22 67.797476\n", "23 67.791740\n", "24 67.383125\n", "25 67.435805\n", "26 67.412654\n", "27 67.730099\n", "28 67.711859\n", "29 67.205141\n", "30 67.172648\n", "31 67.321478\n", "32 67.328854\n", "33 67.320027\n", "34 67.153167\n", "35 67.389960\n", "36 67.068088\n", "37 66.746560\n", "38 66.537536\n", "39 66.470723\n", "40 66.521872\n", "41 66.614384\n", "42 66.602449\n", "43 66.398296\n", "44 66.325003\n", "45 66.336004\n", "46 66.121544\n", "47 66.256656\n", "48 66.173984\n", "49 66.148152\n", "dtype: float64" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "max(tmpB1)" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 43, "text": [ "67.797475992742307" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "tmpB1.plot(label=\"Baseline 1\")\n", "plt.grid(b=True, which='major', color='gray', linestyle=':')\n", "plt.xlabel('Number of games considered')\n", "plt.ylabel('Prediction accurcay')\n", "plt.ylim([0, 70])\n", "plt.legend()\n", "plt.show()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "display_data", "png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAESCAYAAAD67L7dAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJzt3XtcVHX6B/DPmQv3i8igCWoqkqmttanBzyuilaWoeEkt\nNe1CuT8vQGaa+spN07Qy0rYtyi5ettbWwM1MS0nst4rGtm5llpZXRAGH63CZ6/f3x8AkzoHBywxj\n5/N+vXjBmWHOPOdheOZ7nvM9ZyQhhAARESmGqqUDICIiz2LhJyJSGBZ+IiKFYeEnIlIYFn4iIoVh\n4SciUhiNO1d+4sQJpKWlOZbPnj2LuXPnYtSoUUhNTUVBQQGioqKQnp6OkJAQd4ZCRER1JE/N47fZ\nbBg0aBA+/vhjbNy4EWFhYXj88ceRkZGBiooKzJs3zxNhEBEpnsdaPfv370fHjh3Rrl07ZGdnIykp\nCQCQlJSE3bt3eyoMIiLF81jh/+yzzzBixAgAgF6vh06nAwDodDro9XpPhUFEpHgeKfwmkwlfffUV\n7rvvPqf7JEmCJEmeCIOIiOChwr9v3z707NkTrVu3BgCEh4ejuLgYAFBUVOS4vTEGg4HLXOYyl7l8\nFctyPHJwNzU1FYMGDXL09VevXo1WrVohOTmZB3eJiDzM7YW/uroaQ4YMwZ49exAUFAQAKCsrQ0pK\nCs6fP8/pnE2of+euzxsxJ41hXuQxL/I8Np2TiIi8A8/cJSJSGLeeuUvUGJtNoLrWDK1WDR+Nqtkz\nu4QQMFtssFhtsFhF3fe6L4sNVpuA1SYghIDNJmCzAba6nwXsO7cS6p5LAiTYZ5b5+ajRoW0wfLRq\nN21xw20wmq1QSRJUKvusNpUEzm4jj2Hh92LXsz9ptdpQY7TAbLEhJMgXapXrIlNrsuD0+QqcKKhA\nQbHBUXCtVgGLre671QYACAn0ueTL1/Gzr1aNwtJqXLhYhfP6KlzQV+OCvgqFJdUwW+yPlSTAV6uG\nn48Gvj5q+PqoodWoYDLbYDJbYbZYYXT8bLvmXDRGpZLQoU0QOkeFokuk/atTZAhCg3wdv2O1Cceb\njNlig00IqCQJkgSo64u4yv5VXWNGwcUqFBQb7N8vGlBQbM+D0WSVj0ECQoJ80b1Ta3Tv1Bo9OrdG\nl6hW0Gqa3jlnL1se8yKPPX4vIoTAeX0Vjp0pw/EzpThzoRL+fhqEBfsiLMSvwffgAB8Yqs0oraxF\naaURpZW1KKsworTSiDKDEdW1ZtQYLfavWgtMlxRMtUqCrpU/2oQFoE3ruu9hAQgK0OJsYSVOFlTg\nZEE5CooNsLnh1RHkr8VNukC0DvarK+pW1JqsMJosdd+tsFht0GrU8NGq4KNVw1drfzPwqdtD0GhU\n0KjrvyTHz2qVBJVaso+mLynCqrpRtQBQ/4oXEKjbCUBFtQmn6ra79rKi7O+rdrzJXWs+/HzUaKcL\nRFiwH4QQEMK+R1L/3WYTKCqthr681vEYH40KMR3D0KNza7RvEwy1SoJaLdm/q1SObbRYbTBUm2Co\nNsNQY0ZltQmGGjMM1WaYzPZtEsK+3XL/9ZJk3xuq3/GwvyFrEBLog+C6N/LggN/e4E1mK/TltdCX\n1+Bi3Xd9WS0ultfA10eNSF0gIiOCEFX3FakLRJvWAdCo2WFuaSz8HiCEgMlig9FkRa3JAmNdcas1\nWWCoMePX/HIcO1uK42dKUVltvubnkyTAz0cDf9+6Lz8NAup+1qhV0JfXoKi0GiUVxkbXEeCnQefI\nUHSJCkWXyBB0aBsMXx8N1Cp7kVXXFdv6PYfKahMqqkwoN9i/V1QZUVFlgtFkRUSYP24KD0S78EDc\nFB6AoACfa95Gd7HZBC7oq3CioBwnzpXjZEEF9OU10KhV0Na/2WhU0Na90ahUkr2lVN9aqi/iNuEo\nfu10QYiMCESkLhCtQ/xctnSEECgurcGPp0rw40k9jp4swekLFbLF+lpJEq7ren191NCF+qHWZG3w\n5lVPrZJwU3gA2unq3xACEakLQmREEMJD/aBqxp4oXTsWfjc6ca4c2//vBPYdPtforv2lbgoPwC0d\nwhDTMQy3dGyFLpGhqDVZHaP6sspalNaN6iurTQgK0CIsuG5PINgPYSG+aBXsi5DA5rVyTGYrLpbZ\n3wQKS2pQUWVE+zbB6BIVijZh/uw5e5GqGjN+Pl2K4rJq2OqOY1htAlargNVmbzlpVCoEBWgRFOCD\nIH8tguu+BwVo4atVA5JUd0xD/nhC/R6IsC9AAKg1WVFRZURllf0Nvf4NvqLKBK1ahfBW/tCF+iM8\n1A/hrfwR6KdxrLvGaMH5uhbXuWJ7m6v+e2W1yen5fbRqRLTyq9urtb+eHa/vED8E+Gngo/ltz0+r\nUTn2/iAAi03AYvntmE99a9JotsJotA+0Hh6fgMgOnWETAhIk3P/AE+gU3R0AHC07QIJKBQT5+9j/\np4Lszx8a6AO1i72VBQsWYMiQIbj33nuxePFizJgxA9HR0df0ty8rK8Ps2bPxww8/YOzYsViyZMk1\nrQ9g4b/uLFYbcn84j+3/dxJHTtivQdQmzB/t2wQ7+tf1/Ww/HzX8fDXo1C4EMR1aNeglA+xPymFO\n5N1oeamsNqGg2IBzxb8d+zhXbIC+vAblBuc3hevl+OeLEXPfcgBAVfExlBzPRod+TzbrsZIEhAba\nB1f+vhr4aFXQauz/z/UtyezMvyK6Rx907RkL1LfxAEdLMShAi9Yh9je38BA/R+u2flKB2WKFocaM\n6loLqmrsLbuycgPyz/yCC+dOo+DsKcxKeRq+2t9qifrSduZlPzeGB3evk3KDETtzT+Hz/accu7h/\nvCUCIwd2Qe9b2zZrBH65G+Wf2JOYE3k3Wl6CA3zQ7ebW6Haz8+VaLFYbyg1GlFYYUeLYy61FrdF+\nrKr+IH/9d7PFBkmCUxtOo1FBo5IcBdLXR4Olu9VInfxH+PpocPhQDQ5I7ZH2+P+guroKa1YuQpWh\nElarBWMnPYqYnn1ReLEcmzJWorzkIixWKzR/uBcmy20oKzqNwh8+hc1qhNonEDfdPhEav2BcKDSg\nBIX4qfQkzu5/ExE9R8IvtD2Of74YYZ0HoKroKCSVFpF9p0PjGwSL0YCi7zNhM5ZBCEDXIxH+rTvJ\n5qz87BkYywvw66s5zcqxr48a/1g5UvY+Fv5LmC1WHD1VApPZBpVKgkZtP3hWfyBNkiSUVtSiqLQG\nxaXVKCq1t0mKS6tRWmmEEPYDgSP7d8aIAZ3Rvk1wS28SkVd699Mj+Nd/z13Xdfa/PQqPJPZs8ncW\nm01Yu3wOjEYjiouL8cEHH6DnrW1gtVqx4b0MBAUFoaSkBJMmTcIXX4zHrl270LdXVyxb9iEA+56V\nr68vpk6dis3bNyEgMASffbYDBw78C2nzl+DFF/YhNu42DBgcj/n5f8cTU/siptutuPczM9IeS0T3\nXsvw9l/XQahP4c6BY/Dxey+j76CR0IbeDEtNKfJ2rMOwmasR5K9FoJ8Wgf5a+PtqYLUJ5H59DmdP\nGhA/qAtMZhuMJguMZius1kuOMTmON6HJmWCKL/xmiw3/PV6Mrw+fw8EfzqOq1nJFj6+fIfOHaB3i\nbmuHoX07IMBP66Zoieha+Pr6IisrCwBw+PBhPPPMM9i+fTtsNhvWrFmDvLw8qFQqFBUVQa/Xo1u3\nbli1ahVefvllxMfHo0+fPjh27BiOHz+Oxx59FABgtVrRpk0btG8TjOAAH7QND0DnyFDHDK6bbwqB\nVqvFhKT7AQD3JsRi//79mD6yJ9564SdoraWO+AJ8bJg74Tb4+/s7xS5K2iFQ6PH46D9ccx4UWfgt\nVnux/7/DBTjww3lU1dhn0uha+WNo344IC/GDtW6euv0Amv3EIJtNoFWwLyLCAtAmzD4NMizE76ra\nOM1xo/VtPYE5kXej5eWRxJ4uR+fXQ1N5ueOOO1BaWoqSkhLs3bsXpaWlyMzMhFqtRkJCAoxGIzp1\n6oSsrCzs3bsXr732GuLi4nD33XcjJiYGH330UbPj0Gh+K7UqlQpWa/30WoEtW7bAx8ezM90UVfgL\nS6rx+f6T+OLgGcesgvBQPwzr2xEDbo/ELR3DvGo62Y3yT+xJzIk85kVeU3n59ddfYbPZ0KpVKxgM\nBoSHh0OtViM3NxcFBQUA7JeNDw0NxahRoxAcHIytW7ciOTkZJSUlOHz4MO644w6YzWacPn0aXbt2\nveL4+vfvj40bN+LRur2Ho0ePonv37rK/ez3n4fzuC7/NJvDf48X47F8ncejHCxDCfpbpqIFdMOD2\nKHS72buKPRG5j9FoxJgxYwDYC+mLL74IlUqFxMREzJw5E4mJibjtttscUzCPHTuG1atXQ6VSQaPR\n4M9//jO0Wi3Wrl2L5cuXo7KyElarFQ8//HCThf/S6bOX/rx48WI8//zzGDVqFKxWK/r27YulS5c6\nPT4hIQFVVVUwmUzYs2cP1q9ff03TRH+30zmraszYk3cGO/51CueK7bt7t3RshRH9u2DA7ZEeuSYL\nEZE3+l0W/pxv8/HXrf9FVa0FGrUKg/4YhRH9O+OWjmEtHdoVudH6tp7AnMhjXuQxL/J+V4W/xmhB\nRub32P3NGfj5qDF+aAyGx3VyOjGKiEjJfjc9/l/zy/DSpjycK65CdPtQzJ/SB5ERfJcnIrrcDV/4\nhRD459cn8P72H2Gx2jBmcDSm3d/D5WVsiYiU6oYu/OUGI9I/+g/yjhaiVZAvUib/Eb1vbdvSYV03\n7E86Y07kMS/ymBd5N2yPv7SyFqmv5kBfXos7bolA2uQ7ERbi19JhERF5vRt2xP/BZz9CX16LCUNj\nMGV4d87FJyJqphuyEf7TqRLs+eYsOkeG4CEWfSKiK3LDFX6rTeCtzO8AAE8k9XLbdXK8gcFgcPQo\nyY45kce8yGNe5Lm91VNRUYHFixfj+PHjkCQJK1euxM0334zU1FQUFBQgKioK6enpCAkJadb6vjx4\nGr/klyO+d3v07BLu5uhbFg9IOWNO5DEv8pgXeW4f8b/wwgsYNGgQPv/8c/zzn/9Ely5dkJGRgX79\n+mHXrl2Ii4tDRkZGs9ZVUWXChh0/wt9XjRkj3X9lPyKi3yO3Fv7Kykrk5eVh/PjxAOyXJg0ODkZ2\ndjaSkpIAAElJSdi9e3ez1rdp51FUVpsx+Z5b0ZozeIiIropbC39+fj5at26NhQsXIikpCYsXL0Z1\ndTX0ej10Oh0AQKfTQa/XN7keg8GAX/PLsOvAKbRvE4Qhd7Rxuv/3uFzfn/SWeLxh2WAwoLCw0Gvi\n8ZblS18n3hCPtyzz9SLPrYXfYrHgxx9/xOTJk5GZmQl/f3+nto4kSQ0uUypHCIG3Mr+HTQDJY/4A\njULOyg0KCmKP8jJBQUEIDAxs6TC8Dl8r8vh6kefWE7iKi4sxceJEZGdnAwDy8vKQkZGBs2fPYsOG\nDYiIiEBRURGmTZuGnTt3Nrqe7LyzePXDb9GvVzssfPgud4VLRKQIbh06R0REoF27djh58iQA4MCB\nA+jatSuGDBmCzMxMAEBWVhaGDRvW5Hre334EPlo1Hk28zZ3hEhEpgtuncy5ZsgTz5s2D2WxGx44d\nsXLlSlitVqSkpGDr1q2O6ZxNKa004qHht6JN6wB3h+tVeJ0RZ8yJPOZFHvMi74a4Vs/jK77EX55O\n4KdmERFdBzdE4T9ZUI7OkaEtHQYR0e/CDVH4iYjo+lHGvMgbFK8z4ow5kce8yGNe5HHET0SkMBzx\nExEpDAs/EZHCsPB7MfYnnTEn8pgXecyLPPb4iYgUhiN+IiKFYeEnIlIYFn4vxv6kM+ZEHvMij3mR\nxx4/EZHCcMRPRKQwLPxERArDwu/F2J90xpzIY17kMS/y2OMnIlIYjviJiBSGhZ+ISGFY+L0Y+5PO\nmBN5zIs85kUee/xERArDET8RkcKw8BMRKQwLvxdjf9IZcyKPeZHHvMhz2eN/9NFH8dBDD2HIkCGQ\nJOmKnyAhIQGBgYFQq9XQaDT4xz/+gbKyMqSmpqKgoABRUVFIT09HSEjIVW8EERE1n8vC/8UXX2Dz\n5s04e/YsJk2ahAkTJiAsLKzZT5CQkIBPPvkErVq1cty2evVqhIWF4fHHH0dGRgYqKiowb968q98K\nIiJqNpetnnvuuQcffPAB3n77bRQVFWHkyJGYP38+fvjhh2Y/yeXvLdnZ2UhKSgIAJCUlYffu3VcY\nNhERXa0r6vELIaDRaODr64tnnnkGK1eudPkYSZIwY8YMjB07Flu2bAEA6PV66HQ6AIBOp4Ner29y\nHZf36JSyXN+f9JZ4vGHZYDCgsLDQa+LxluVLXyfeEI+3LPP1Ik/j6hd27tyJv/3tbyguLsaUKVOw\nY8cOBAYGwmKx4J577sHChQubfPyHH36INm3aoKSkBDNmzECXLl0a3C9J0lUdO1CCoKAgAM37QypF\nfU6oIeZFHvMiz2WP/4knnsCUKVMwYMAApwK9Z88eDB06tNlP9vrrryMgIABbtmzBxo0bERERgaKi\nIkybNg07d+68ui0gIqIr4tYzd2tqamC1WhEUFITq6mo88sgjmDVrFvbv349WrVohOTmZB3eJiDzM\nZeGvqKjA22+/jaNHj8JoNNofJEnYsGGDy5WfPXsWs2bNAgBYrVYkJibiiSeeQFlZGVJSUnD+/HlO\n52xCfYuHu6u/YU7kMS/ymBd5Lgv/rFmzEB0djc8++wxz587F1q1b0bNnTzz99NOeipGIiK4jl7N6\nTp8+jdTUVPj7+yMxMREZGRnIy8vzRGxEROQGLgu/j48PAECr1aK0tNTxnYiIbkwup3N27twZpaWl\nSExMxKRJkxAUFISePXt6IjbFY3/SGXMij3mRx7zIu6JZPXl5eaisrMTAgQOh0bh8zyAiIi/ksvAf\nO3YMUVFRCAwMBABUVVWhoKAAMTExHgmQiIiuL5c9/meeecbR5wfsvf5nnnnGrUEREZH7uCz8NpsN\nWq3Wsezj4wOr1erWoMiO1xJ3xpzIY17kMS/yXDbqNRoNzpw5g44dOwKwT+9Uq9VuD4x4QEoOcyKP\neZHHvMhzWfhnzZqFBx98EIMHD4YQAvv27cOyZcs8ERsREblBsy7ZoNfrsX//fgDAgAEDcPPNN3sk\nOCIiuv6aLPw2mw0jRozA559/7smYqA7nIDtjTuQxL/KYF3lNtnpUKhUiIyNRVlbW4KMTyTP4YnXG\nnMhjXuQxL/Jc9vgDAwORlJSEwYMHw9/fH4D96pzz5893e3BERHT9uSz8MTExjpO1JEmCEIKfmEVE\ndANz6wex0LVhf9IZcyKPeZHHvMhzWfhXrVolO9Jnq4eI6Mbk8szdgIAABAQEIDAwECqVCvv27eNl\nmYmIbmBX3OoxGAyYM2cO3n33XXfFREREbuRyxH+5wMBAFBQUuCMWugyvM+KMOZHHvMhjXuS5nNWz\natUqx89CCBw5cgRdu3Z1a1BkxwNSzpgTecyLPOZFnsvCHxAQ4Dioq1arMXnyZNx9991uD4yIiNyD\n0zmJiBTGZY9/9uzZKCsrcyyXlpZi7ty5bg2K7NifdMacyGNe5DEv8ly2es6cOdPgOj1hYWE4ffp0\ns5/AarVi3LhxuOmmm/Dmm2+irKwMqampKCgoQFRUFNLT0xESEnJ10f/OsT/pjDmRx7zIY17kNesT\nuCwWi2PZbDbDbDY3+wk2bNiA6Ohox3JGRgb69euHXbt2IS4uDhkZGVcYMhERXQuXhX/AgAFIS0tD\nXl4e8vLykJaWhoEDBzZr5RcuXEBOTg4mTJjguC07OxtJSUkAgKSkJOzevfsqQycioqvhsvCnpqbi\nlltuwYsvvogXX3wR3bp1Q1paWrNWvmLFCsyfPx8q1W9Po9frodPpAAA6nQ56vd7lei7v0Sllub4/\n6S3xeMOywWBAYWGh18TjLcuXvk68IR5vWebrRZ7LHr+Pjw9mzZqFWbNmuVzZpb766iuEh4ejR48e\nOHjwoOzvSJLEK302ob4/yYNTv2HPVh7zIo95kedyOueyZcswe/ZsxwHe0tJSvPHGG1i0aFGTK16z\nZg22bdsGtVoNk8kEg8GAu+++G99//z02btyIiIgIFBUVYdq0adi5c+f12yIiImqSy1ZPXl6e06ye\nQ4cOuVxxWloacnJykJ2djTVr1iAuLg4vvfQSEhISkJmZCQDIysrCsGHDriF8IiK6Us2a1XM5q9V6\n1U+YnJyM/fv3495770Vubi6Sk5Ovel2/d5yD7Iw5kce8yGNe5Lls9SxcuBCBgYF47LHHIITA+vXr\nUVVVhZUrV3oqRiIiuo5cFv7KykqsWLECe/fuBQDEx8fj2WefRXBwsCfiIyKi64zX6iEiUhiX0zkB\n4MSJE/jpp59gMpkct40ZM8ZtQZEdPy/UGXMij3mRx7zIczni/+CDD7BlyxYUFRWhV69eyMvLQ9++\nffHOO+94KkYiIrqOXM7q2bJlC7Zs2YLIyEisX78eH3/8MQICAjwRGxERuYHLwu/r64vAwEDYbDbY\nbDbccsstOHXqlAdCIyIid3DZ4/f394fJZEK3bt3w8ssv46abbgKPB3sG+5POmBN5zIs85kWeyx7/\nzz//jPbt26OmpgZr1qyBwWDAzJkz0b17d0/FSERE1xGncxIRKYzLHj8REf2+sPB7MV5nxBlzIo95\nkce8yGOrh4hIYZp15m5NTQ0uXLjQ4KqcXbt2dVtQRETkPi4L/+bNm/Hyyy8jNDS0wUcoZmdnuzUw\nIiJyD5etnoSEBGzcuBFRUVGeionqcA6yM+ZEHvMij3mR53LE36ZNGxb9FsIXqzPmRB7zIo95kedy\nxL927VrU1tZixIgR8PX1ddzOHj8R0Y2pWa0eOezxExHdmDid04uxP+mMOZHHvMhjXuQ1q/D/8ssv\nyM3NhSRJiIuLQ3R0tCdiIyIiN3B55m5WVhZmzJiBn376CT/++COmT5+Obdu2eSI2IiJyA5cj/sTE\nRLz77ruIiIgAABQXF+ORRx7Bp59+6pEAiYjo+nI54pckyVH0ASAiIgKSJLk1KLLjdUacMSfymBd5\nzIs8l/P4O3TogLVr12LixIkA7B/F2KFDB5crNhqNmDJlCkwmE8xmM4YOHYqnnnoKZWVlSE1NRUFB\nAaKiopCeno6QkJBr35LfIR6QcsacyGNe5DEv8ly2ei5evIjly5fjwIEDAIB+/fph8eLFCA8Pd7ny\nmpoa+Pv7w2Kx4MEHH8T8+fORnZ2NsLAwPP7448jIyEBFRQXmzZt3fbaGiIhccjni1+l0SE9Pv6qV\n+/v7AwDMZjOsVitCQ0ORnZ2NTZs2AQCSkpIwdepUFn4iIg9qtMf/73//GwCwd+9e5OTkOH01h81m\nw+jRo9GvXz/ExsYiJiYGer0eOp0OgP1NRa/Xu1zP5T06pSzX9ye9JR5vWDYYDCgsLPSaeLxl+dLX\niTfE4y3LfL3Ia3TEn5mZid69e2P9+vWy9w8ePNjlylUqFbZt24bKyko8+uijyM3NbXC/JEk8UNyE\n+v4kD079hj1becyLPOZFnsfO3P3LX/4CPz8/fPzxx9i4cSMiIiJQVFSEadOmYefOnZ4IgYiI0Izp\nnJMnT27WbZcrKSlBRUUFAKC2thb79+9Hjx49kJCQgMzMTAD2k8OGDRt2pTETEdE1cHlwt6ampsGy\n1WpFeXm5yxUXFxdjwYIFsNlsjl7///zP/6B79+5ISUnB1q1bHdM5SR6vM+KMOZHHvMhjXuQ12up5\n++23sX79elRWViI4ONhxe21tLRITE7Fs2TKPBUlERNdPo4W/srIS5eXleP755/Hcc8+h/teCgoLQ\nqlUrjwZJRETXj8uDu3q9HsHBwfDx8QEAmEwmGAwGtG7d2iMBEhHR9eXy4O6TTz4Jq9XqWDabzXji\niSfcGhTZ8TojzpgTecyLPOZFnsuDuyaTyXEGLgAEBgbCZDK5NSiy4wEpZ8yJPOZFHvMiz+WIH0CD\ns2v1ej1sNpvbAiIiIvdyOeKfOnUqJk+ejDFjxkAIgW3btiE5OdkTsRERkRs068zdgwcPYu/evZAk\nCfHx8bjrrrs8EZvicQ6yM+ZEHvMij3mRxw9bJyJSmEZbPatXr8b8+fMxZ84cp/skScJrr73m1sCI\niMg9Gi38ffr0AQDEx8c73ccrahIR3bjY6vFi7E86Y07kMS/ymBd5jRb+S1s8kiQ5LtlQP9pnq4eI\n6MbU6Dz++Ph4xMfHIzw8HPn5+ejduzfuvPNOnDt3rlmft0tERN7JZatn0qRJeP/99+Hn5wfAfnXO\n6dOn46OPPvJIgEREdH25PHO3tLQUWq3WsazValFaWurWoMiO1xlxxpzIY17kMS/yXJ65Gxsbi+Tk\nZCQlJTnO3I2NjfVEbIrHA1LOmBN5zIs85kWey1aPyWTC3//+dxw8eBCSJCEuLg4PPPBAg70AIiK6\ncXA6JxGRwrjs8Z88eRKTJ09GQkICAODIkSNYt26d2wMj9iflMCfymBd5zIs8l4V/6dKlePLJJx2f\nu3vrrbfi888/d3tgZO9PskfZEHMij3mRx7zIc1n4KysrMXjwYMeJW2q1mv19IqIbmMvCr9FoGnzi\nVmFhIdRqtVuDIiIi93FZ+CdPnozZs2ejtLQUa9euxeTJkzFjxgxPxKZ47E86Y07kMS/ymBd5zZrV\nk5eXh+zsbABAQkKC48qdrpw/fx7z589HSUkJJEnCAw88gGnTpqGsrAypqakoKChAVFQU0tPTERIS\ncm1bQkREzdJk4bdYLJgwYQIyMzOvauXFxcW4ePEiunfvjqqqKowdOxZvvPEGtm7dirCwMDz++OPI\nyMhARUUF5s2bd9UbQUREzddkq0ej0SAgIAC1tbVXtfKIiAh0794dABAYGIjo6GgUFhYiOzsbSUlJ\nAICkpCTs3r37qtZPRERXzmWPv1OnTpgyZQrefvttbN682fF1pfLz83H06FH06tULer0eOp0OAKDT\n6aDX65t87OU9OqUs1/cnvSUeb1g2GAwoLCz0mni8ZfnS14k3xOMty3y9yHN5rR6r1YquXbvixIkT\nLlfWmKqqKsyZMweLFi1ymlMrSRI/0asR9bniwanfcE62POZFHvMir8kef1lZGc6ePYtOnTo5TuC6\nUmazGU9F9ZAVAAAQmklEQVQ++SQGDhyI6dOnAwCGDx+OjRs3IiIiAkVFRZg2bRp27tx5VesnIqIr\n02irZ8eOHRg8eDCeeOIJDBkyBAcOHLjilQshsGjRIkRHRzuKPmCfGVR/wDgrKwvDhg278siJiOiq\nNDriT0xMxOrVq9G9e3fk5ubi9ddfx6ZNm65o5Xl5eZgyZQq6devmaOekpaWhV69eSElJwfnz5zmd\nswn8vFBnzIk85kUe8yKv0cI/evRobNu2zbE8ZswYZGVleSwwIiJyj0YP7ppMJvzyyy8A7C0bo9Ho\nWAaArl27uj86IiK67hod8ddfhrkx9WfyEhHRjYUfxOLF2J90xpzIY17kMS/yWPiJiBTG5Zm7RET0\n+8LCT0SkMCz8XozXEnfGnMhjXuQxL/LY4yciUhiO+ImIFIaFn4hIYVj4vRj7k86YE3nMizzmRR57\n/ERECsMRPxGRwrDwExEpDAu/F2N/0hlzIo95kce8yGOPn4hIYTjiJyJSGBZ+IiKFYeH3YuxPOmNO\n5DEv8pgXeezxExEpDEf8REQKw8JPRKQwLPxejP1JZ8yJPOZFHvMiz609/oULFyInJwfh4eH49NNP\nAQBlZWVITU1FQUEBoqKikJ6ejpCQEHeFQEREl3HriH/cuHF45513GtyWkZGBfv36YdeuXYiLi0NG\nRoY7QyAiosu4tfD36dPHaTSfnZ2NpKQkAEBSUhJ2797tzhCIiOgyHu/x6/V66HQ6AIBOp4Ner3f5\nmMt7dEpZru9Peks83rBsMBhQWFjoNfF4y/KlrxNviMdblvl6kadx+RtuJEkSJElqyRC8WlBQEIDm\n/SGVoj4n1BDzIo95kef2E7jy8/Mxc+ZMx8Hd4cOHY+PGjYiIiEBRURGmTZuGnTt3ujMEIiK6hMdb\nPQkJCcjMzAQAZGVlYdiwYZ4OgYhI0dw64k9LS8OhQ4dQVlaG8PBwzJkzB0OHDkVKSgrOnz/P6Zwu\n1Ld4uLv6G+ZEHvMij3mRx2v1EBEpDM/cJSJSGBZ+IiKFYeH3YrzOiDPmRB7zIo95kccePxGRwnDE\nT0SkMCz8REQKw8LvxdifdMacyGNe5DEv8tjjJyJSGI74iYgUhoWfiEhhWPi9GPuTzpgTecyLPOZF\nHnv8REQKwxE/EZHCsPATESkMC78XY3/SGXMij3mRx7zIY4+fiEhhOOInIlIYFn4iIoVh4fdi7E86\nY07kMS/ymBd57PETESkMR/xERArDwk9EpDAtVvj37duH4cOH45577kFGRkZLheHV2J90xpzIY17k\nMS/yWqTHb7VaMXz4cLz33nto27Ytxo8fjzVr1iA6OtrToRARKU6LjPi/++47dOzYEe3bt4dWq8WI\nESOwZ8+elgiFiEhxWqTwFxYWol27do7ltm3borCwsCVCISJSnBYp/JIkXdHvX96jU8pyfX/SW+Lx\nhmWDweA0SPCm+Fpq+dLXiTfE4y3LfL3Ia5Ee/+HDh7Fu3TqsX78eAPDWW29BkiQkJyd7OhQiIsVp\nkRH/bbfdhtOnTyM/Px8mkwk7duzA0KFDWyIUIiLF0bTIk2o0WLJkCR599FHYbDaMHz+eM3qIiDyE\nl2wgIlIYnrlLRKQwLPxERArDwk9EpDAs/F5i4cKF6NevHxITEx23lZWVYcaMGbj33nvxyCOPoKKi\nogUjbBnnz5/H1KlTMWLECIwcORIbNmwAoOzcGI1GTJgwAaNHj8b999+PV155BYCyc3Ipq9WKMWPG\n4MknnwTAvMhh4fcS48aNwzvvvNPgtoyMDPTr1w+7du1CXFycIi9mp9Fo8Oyzz+Kzzz7D3//+d2ze\nvBm//vqronPj6+uLDRs2YNu2bfjnP/+JgwcPIi8vT9E5udSGDRsazBJkXpyx8HuJPn36ICQkpMFt\n2dnZSEpKAgAkJSVh9+7dLRFai4qIiED37t0BAIGBgYiOjkZhYaHic+Pv7w8AMJvNsFqtCA0NVXxO\nAODChQvIycnBhAkTHLcxL85Y+L2YXq+HTqcDAOh0Ouj1+haOqGXl5+fj6NGj6NWrl+JzY7PZMHr0\naPTr1w+xsbGIiYlRfE4AYMWKFZg/fz5Uqt9KG/PijIX/BiFJ0hVf4+j3pKqqCnPmzMGiRYsQFBTU\n4D4l5kalUmHbtm3Yt28f8vLykJub2+B+Jebkq6++Qnh4OHr06IHGTk9SYl7ktMiZu9Q84eHhKC4u\nRkREBIqKitC6deuWDqlFmM1mzJkzB6NGjcKwYcMAMDf1goODMXjwYBw5ckTxOfnPf/6D7Oxs5OTk\nwGQywWAw4Omnn1Z8XuRwxO/FEhISkJmZCQDIyspyFD0lEUJg0aJFiI6OxvTp0x23Kzk3JSUljpkp\ntbW12L9/P3r06KHonABAWloacnJykJ2djTVr1iAuLg4vvfSS4vMih5ds8BJpaWk4dOgQysrKEB4e\njjlz5mDo0KFISUnB+fPnERUVhfT0dKcDwL93eXl5mDJlCrp16+bYRU9LS0OvXr0Um5uff/4ZCxYs\ngM1mc/T6H3vsMZSVlSk2J5c7dOgQ3n33Xbz55pvMiwwWfiIihWGrh4hIYVj4iYgUhoWfiEhhWPiJ\niBSGhZ+ISGFY+ImIFIaFX+ESEhKQmJjY4BT3hIQE/PLLL9ftOfLz8xEXF3fd1tdcCxcuxMiRI5GW\nlubx575RrF27Fjt27JC9b926dVi1apVbn/+TTz7BnDlz3Poc5IyXbCBUV1dj27ZtGDNmTEuH0iSb\nzdbg4ltNuXjxIr744gv8+9//dnNUN7amiu7VXNPGYrFAo2FZ8Xb8CxFmzZqF119/HSNHjnT6p01I\nSEBGRga6du3qtJyQkIBRo0YhNzcXFy5cQFpaGi5evIgdO3agoqICK1asQJ8+fRzrWrVqFf71r39B\nCIHnnnvOcV9OTg7efPNNGI1G+Pj4YOHChbj99ttx8OBBLF++HLfddhuOHj2K1NRUDB48uEF8WVlZ\nWL9+PSRJQseOHfH888/Dx8cH06ZNQ21tLcaMGYMxY8Y0uNwDYD8j+M9//jMkSUJsbCz27Nnj2K5V\nq1bhm2++gdlsRlhYGFasWIHIyEjk5+dj3LhxmDhxIr7++mvU1tZi9erV+PDDD/H999/D398fb7zx\nhuNKkBkZGfjyyy9htVrRpk0bLF++HDqdDrt378Zrr70GtVoNq9WKJUuW4K677nL6u/zjH//Axo0b\nAQBarRYZGRlo3bq17Da3bt0an3zyCbZv347Q0FAcP34cwcHBWLduHXQ6Hb799lssX74cNpsNFosF\nf/rTn3D//fdjwYIF+MMf/oCHHnoIlZWVWLRoEY4fPw6dTod27dohPDwcAGAymfDqq68iLy8PJpMJ\n3bp1w9KlSxEQEIAFCxZArVbj1KlTqK6uRmZmJjIzM/Hhhx/CYrEgODgYS5cuRefOnWEymbB8+XIc\nPHgQYWFhjktuk4cJUrQhQ4aIY8eOiTlz5ogPPvjAcdvx48edfpa7b/Xq1UIIIb777jvRq1cvsXnz\nZiGEEDt27BCTJ08WQghx9uxZ0a1bN5GVlSWEEOLgwYNi0KBBwmQyidOnT4uJEyeKyspKIYQQx44d\nE/Hx8UIIIXJzc0X37t3F4cOHZWP/+eefxYABA0RxcbEQQoj09HSRkpIihBAiPz9fxMbGyj7OaDSK\ngQMHiry8PCGEEF9++aXo1q2bY7tKSkocv7tlyxaRmpraYDv27t0rhBDinXfeEb179xZHjx4VQgix\ndOlS8eqrrwohhMjKyhJLliwRNptNCCHE5s2bxVNPPSWEEGLUqFGObbLZbI5tv1Rubq64++67xcWL\nF4UQQlRXVwuj0djkNm/dulX07dtXXLhwQQghxOLFi8WaNWuEEELMnDlTbN++3bH+iooKIYQQCxYs\nEJs2bRJCCLFy5Urx7LPPOnIQHx8vVq1aJYQQ4i9/+Yt44403HI9fvXq1Y93PPPOMGDdunKipqRFC\nCPHNN9+I5ORkYTQahRBC7N27V0yaNEkIIcSGDRvEI488IiwWi6ipqRFjx44Vc+bMkf07kftwxE+Q\nJAkpKSmYNm0axo8ff0WPvf/++wEAPXr0gNFodCz37NkTZ86ccfyeVqvF6NGjAQB33XUX/Pz8cOLE\nCeTl5eHMmTOYMmWK43etVitKSkoAADfffDNuv/122ec+ePAg4uPjHSPsSZMmYdSoUQDQ6GV5AeDE\niRPw9/dH7969AQDDhg1rcO2WnJwcfPjhh6iurobFYmnw2ICAAMdeR48ePdCuXTvceuutjm3ev38/\nAPuHfxw5csTxASBWqxXBwcEAgLi4OKxYsQL33HMPBg0ahJiYGKcY9+7dizFjxjhG3PUfvNLUNgPA\nnXfeibZt2wIAbr/9dkc8sbGx+Otf/4ozZ86gf//+6NWrl9NzHjp0CEuWLAEAhIWF4e6773bcl52d\njaqqKuzatQuAfQ+gfrQuSRLuvfde+Pn5OX73p59+wgMPPADA/reorKx0xJ+UlAS1Wg21Wo1Ro0ax\nHdcCWPgJANC5c2cMHjwY7777boPbNRoNbDabY9lkMjW439fXFwCgVqsbLKtUKqeiKYRo0Deu/3ng\nwIGNHkQMCAhoNGZJkhoU+KaKfXOdO3cOL774IrZu3YqoqCh8++23mDdvnuN+Hx8fx88qlcpp+dJt\n/tOf/oSxY8c6PcfChQtx/PhxHDhwAHPnzsWMGTMafGKU3LY1dvvlv9NYPA8//DASEhKwf/9+LFu2\nDP3790dKSorT+pta99KlSxEbG+v0GMD57zRu3DjZ4wfu+JvRleOsHnKYPXs2/va3v6GqqspxW8eO\nHfHdd98BAA4cOICLFy9e1brNZjM+/fRTAPb+utFoRJcuXdC/f398/fXXDWYR1T+fK7GxscjJyXHE\ntGXLFgwYMMDl47p06YKamhp8++23AIDdu3c7LnNsMBig1Wqh0+lgs9nw0UcfXdF21ktISMDmzZsd\n6zWZTPjpp58A2Pc4YmJiMG3aNIwaNQrff/+90+Pj4+Oxbds2x6dFVVVVwWQyXfU2nzx5Eh06dMDE\niRMxdepUx3NeWnhjY2PxySefAABKS0sbfERhQkIC3nvvPRiNRgD2PP3666+NbntWVhYKCwsB2Pd2\njhw5AsC+t7Nt2zZYrVbU1tZi+/btLmOn648jfnJo27YtRo8ejffff99x29y5c7FgwQJs2rQJcXFx\niIyMbPTxl88CuXS5VatWOHr0qOMD5V955RVoNBp06tQJL730EhYtWoTa2lqYzWb07t3b0YpoamZJ\nTEwMnnrqKcyYMaPBgc7G4qnn4+ODV155Bc899xxUKhX69u2L8PBwBAcHo23bthg+fDjuv/9+hIWF\nYfDgwQ1aEZfvsTS2PHr0aJSWljpaWEIIPPjgg7j11luxZs0anD59Gmq1GiEhIXjhhRecYrzrrruQ\nnJyM6dOnO/Ys3nrrrSa3ual4Nm3ahIMHD0Kr1cLX1xeLFy922p7//d//xbPPPov77rsPOp2uwQHn\n5ORkrFu3DuPHj3esd/bs2Q0+1Lxenz59kJqaipkzZ8JqtcJsNuO+++5Dz5498cADD+Dnn3925Lf+\nYzTJs3hZZlKkqqoqBAYGAgByc3Px7LPPIjs7u4WjIvIMjvhJkb744gu8//77sNls8PX1xSuvvNLS\nIRF5DEf8REQKw4O7REQKw8JPRKQwLPxERArDwk9EpDAs/ERECsPCT0SkMP8PhcW+FUQAFr0AAAAA\nSUVORK5CYII=\n", "text": [ "" ] } ], "prompt_number": 53 }, { "cell_type": "markdown", "metadata": {}, "source": [ "##Test Data Operations\n" ] }, { "cell_type": "code", "collapsed": false, "input": [ "trdf_alpha = trainDF" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_historic_games(team, gameId, trdf):\n", " df = trdf[trdf.game_id < gameId]\n", " hdf = df[df.home_team == team]\n", " adf = df[df.away_team == team]\n", " newdf = pd.concat([hdf, adf])\n", " newdf = newdf.sort_index()\n", " try:\n", " return pd.concat([hdf, adf])\n", " except:\n", " return False" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_points(team, df, alpha):\n", " pFor = []\n", " pAgainst = []\n", " for r, v in df.iterrows():\n", " hTeam = v.home_team\n", " aTeam = v.away_team\n", " hScore = v.home_score\n", " aScore = v.away_score\n", " if (team == hTeam):\n", " pFor.append(hScore)\n", " pAgainst.append(aScore)\n", " elif (team == aTeam):\n", " pFor.append(aScore)\n", " pAgainst.append(hScore)\n", " numGames = len(pFor)\n", " weights = []\n", " for i in xrange(1, numGames+1):\n", " weights.append(alpha**i)\n", " return (int(round(np.dot(pFor, weights)/float(sum(weights)))), int(round(np.dot(pAgainst, weights)/float(sum(weights)))))" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "for alpha in np.arange(0.5, 1.01, 0.1):\n", " for i in xrange(17, 18):\n", " lst = []\n", " for r, v in trdf_alpha.iterrows():\n", " gameId = v.game_id\n", " home = v.home_team\n", " away = v.away_team\n", " homehGames = get_historic_games(home, gameId, trdf_alpha)\n", " awayhGames = get_historic_games(away, gameId, trdf_alpha)\n", " if (isinstance(homehGames, pd.DataFrame) and isinstance(awayhGames, pd.DataFrame)):\n", " if (len(homehGames.index) >= i and len(awayhGames.index) >= i):\n", " homehGames = homehGames[-i:]\n", " awayhGames = awayhGames[-i:]\n", " hFor, hAgainst = get_points(home, homehGames, alpha)\n", " aFor, aAgainst = get_points(away, awayhGames, alpha)\n", " else:\n", " hFor, hAgainst, aFor, aAgainst = 0, 0, 0, 0\n", " else:\n", " hFor, hAgainst, aFor, aAgainst = 0, 0, 0, 0\n", " v = v.append(pd.Series([hFor, hAgainst, aFor, aAgainst], index=['hFor', 'hAgainst', 'aFor', 'aAgainst'], name=r))\n", " lst.append(pd.DataFrame(v).T)\n", " trdf_alpha_n = pd.concat(lst)\n", " csv_name = \"baseline2/alpha\" + str(alpha) + \"/train_data_iter_\"+str(i)+\".csv\"\n", " trdf_alpha_n.to_csv(csv_name)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_diff(hFor, hAgainst, aFor, aAgainst):\n", " if sum([hFor, hAgainst, aFor, aAgainst]) > 0:\n", " return (hFor + aAgainst)/2 - (aFor + hAgainst)/2\n", " else:\n", " return 0" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "def get_predicted_winner(diff_xy, home_team, away_team):\n", " if diff_xy >= 0:\n", " return home_team\n", " else:\n", " return away_team" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "def is_prediction_correct(winner, predicted_winner):\n", " if winner == predicted_winner:\n", " return True\n", " else:\n", " return False" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "adfs = []\n", "for alpha in np.arange(0.5, 1.01, 0.1):\n", " dfs = []\n", " for i in xrange(17, 18):\n", " csv_name = \"baseline2/alpha\" + str(alpha) + \"/train_data_iter_\"+str(i)+\".csv\"\n", " trdfn = pd.read_csv(csv_name, index_col=0)\n", " years = set(trdfn.year)\n", " lst = []\n", " for year in years:\n", " trdfny = trdfn[trdfn.year == year]\n", " trdfny['diff_xy'] = trdfny.apply(lambda x: get_diff(x['hFor'], x['hAgainst'], x['aFor'], x['aAgainst']), axis=1)\n", " trdfny['predicted_winner'] = trdfny.apply(lambda x: get_predicted_winner(x['diff_xy'], x['home_team'], x['away_team']), axis=1)\n", " trdfny['correct_prediction'] = trdfny.apply(lambda x: is_prediction_correct(x['winner'], x['predicted_winner']), axis=1)\n", " lst.append(float(sum(trdfny.correct_prediction))*100/len(trdfny.index))\n", " dfs.append(pd.DataFrame(lst, columns=[i], index=years).T)\n", " tmp = pd.concat(dfs)\n", " tmp = tmp.T.mean()\n", " adfs.append(pd.DataFrame(tmp, columns=[alpha]).T)\n", "ans = pd.concat(adfs).T" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "ans.T" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
17
0.5 63.453913
0.6 64.496069
0.7 65.155919
0.8 66.047140
0.9 67.005708
1.0 67.595777
\n", "

6 rows \u00d7 1 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 20, "text": [ " 17\n", "0.5 63.453913\n", "0.6 64.496069\n", "0.7 65.155919\n", "0.8 66.047140\n", "0.9 67.005708\n", "1.0 67.595777\n", "\n", "[6 rows x 1 columns]" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "---" ] } ], "metadata": {} } ] }