{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import scrape_functions\n", "from datetime import date, timedelta\n", "import json_pbp\n", "import html_pbp\n", "import seaborn as sbs\n", "import espn_pbp\n", "import json_shifts\n", "import html_shifts\n", "import playing_roster\n", "import json_schedule\n", "import pandas as pd\n", "import time\n", "import numpy as np\n", "import datetime\n", "import warnings\n", "import shared\n", "import pickle\n", "#pip install mysql-connector-python-rf\n", "import mysql.connector\n", "from mysql.connector import Error\n", "from sqlalchemy import create_engine\n", "\n", "pd.set_option('display.max_columns', None)\n", "pd.set_option('display.max_colwidth', 999)\n", "pd.set_option('display.max_rows', None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Create Rink Adjust Object" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "import thinkbayes2 as tb\n", "import thinkbayes as tb0\n", "\n", "\n", "class RinkAdjust(object):\n", " \n", " def __init__( self ):\n", " self.teamxcdf, self.teamycdf, self.otherxcdf, self.otherycdf = {}, {}, {}, {}\n", "\n", " \n", " def addCDFs( self, team, this_x_cdf, this_y_cdf, other_x_cdf, other_y_cdf ):\n", " self.teamxcdf[team] = this_x_cdf \n", " self.teamycdf[team] = this_y_cdf \n", " self.otherxcdf[team] = other_x_cdf \n", " self.otherycdf[team] = other_y_cdf\n", "\n", "\n", " def addTeam( self, team, this_team, rest_of_league ):\n", " this_x_cdf = tb.MakeCdfFromPmf( tb.MakePmfFromList( this_team.X_unadj ) )\n", " this_y_cdf = tb.MakeCdfFromPmf( tb.MakePmfFromList( this_team.Y_unadj ) )\n", " other_x_cdf = tb.MakeCdfFromPmf( tb.MakePmfFromList( rest_of_league.X_unadj ) )\n", " other_y_cdf = tb.MakeCdfFromPmf( tb.MakePmfFromList( rest_of_league.Y_unadj ) )\n", " self.addCDFs( team, this_x_cdf, this_y_cdf, other_x_cdf, other_y_cdf )\n", "\n", "\n", " def PlotTeamCDFs( self, team, savefig=False ):\n", " this_x_cdf = self.teamxcdf[team]\n", " this_y_cdf = self.teamycdf[team]\n", " other_x_cdf = self.otherxcdf[team] \n", " other_y_cdf = self.otherycdf[team]\n", "\n", " f, axx = plt.subplots( 1, 2, sharey='col' )\n", " f.set_size_inches( 14, 8 )\n", " \n", " xx1, yx1 = this_x_cdf.Render()\n", " xx2, yx2 = other_x_cdf.Render()\n", "\n", " axx[0].plot( xx1, yx1, color='blue', label='@%s' % team )\n", " axx[0].plot( xx2, yx2, color='brown', label='@Rest of League' )\n", " axx[0].set_xlabel( 'CDF of X' )\n", " axx[0].legend()\n", " \n", " xy1, yy1 = this_y_cdf.Render()\n", " xy2, yy2 = other_y_cdf.Render()\n", " \n", " axx[1].plot( xy1, yy1, color='blue', label='@%s' % team )\n", " axx[1].plot( xy2, yy2, color='brown', label='@Rest of League' )\n", " axx[1].set_xlabel( 'CDF of Y' )\n", " axx[1].legend()\n", " \n", " f.suptitle( 'Cumulative Density Function for Shot Location Rink Bias Adjustment' )\n", " \n", " plt.show()\n", " \n", " if savefig:\n", " #f.set_tight_layout( True )\n", " plt.savefig( 'Rink bias CDF chart %s.png' % team )\n", "\n", "\n", " def rink_bias_adjust( self, x, y, team ):\n", " \"\"\" this method implements the actual location conversion from biased to \"unbiased\" shot location\n", " \n", " the way it works for rink bias adjustment is that for a given shot location in a specific rink,\n", " you find the cumulative probabilities for that x and y in that rink. Then you calculate the league \n", " equivalent x and y that have the same probabilities as the one measured in the specific rink\n", " \n", " The equivalency CDFs are calculated using only visiting teams, which ensures that both single rink and\n", " league wide rinks have as wide a sample of teams as possible but avoid any possible home team bias.\n", " All of which lets us assume that they are then unbiased enough to be representative (at least enough \n", " for standardization purposes)\n", " \n", " This is (my adaption of my understanding of) Shuckers' method for rink bias adjustment as described in Appendix A here:\n", " http://www.sloansportsconference.com/wp-content/uploads/2013/Total%20Hockey%20Rating%20(THoR)%20A%20comprehensive%20statistical%20rating%20of%20National%20Hockey%20League%20forwards%20and%20defensemen%20based%20upon%20all%20on-ice%20events.pdf\n", " \n", " for example, if a shot x coordinate is measured as xmeas in a rink\n", " \n", " xprob = this_x_cdf.Prob( xmeas ) # cum prob of seeing xmeas in this rink\n", " xadj = other_x_cdf.Value( xprob ) # value associated with same prob in rest of league \n", " \n", " analogous process for y\n", " \n", " The code for Cdf/Pmf creation and manipulation is taken directly from Allan Downey's code for \"Think Bayes\"\n", " \"\"\"\n", " \n", " xprob = self.teamxcdf[team].Prob( x )\n", " newx = self.otherxcdf[team].Value( xprob )\n", " \n", " yprob = self.teamycdf[team].Prob( y )\n", " newy = self.otherycdf[team].Value( yprob )\n", " \n", " return newx, newy" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Function to Transform Raw NHL PBP Data\n", "\n", "NHL PBP Data from scraper: https://github.com/HarryShomer/Hockey-Scraper" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "def transform_data(data):\n", "\n", " import warnings\n", " warnings.simplefilter(\"ignore\")\n", " \n", " from sqlalchemy import create_engine\n", " \n", " pbp_df = data\n", "\n", " print(\"All events and columns: \" + str(pbp_df.shape))\n", " \n", " ## Remove shootouts\n", " pbp_df['season'] = pbp_df.apply( lambda x: str(pd.to_datetime(x.Date).year-1) + str(pd.to_datetime(x.Date).year) if pd.to_datetime(x.Date).month < 9 else str(pd.to_datetime(x.Date).year) + str(pd.to_datetime(x.Date).year + 1), axis=1 )\n", " \n", " \n", "\n", " pbp_df['season2'] = pbp_df.apply( lambda x: x.season if x.Game_Id < 30000 else str(x.season) + \"p\", axis=1 )\n", "\n", " pbp_df['Season_Type'] = pbp_df.apply( lambda x: 'RS' if x.Game_Id < 30000 else 'PO', axis=1 )\n", "\n", " pbp_df['season_model'] = pbp_df.apply(lambda x: '2011_2012' if x.season in ['20102011','20112012'] else\n", " '2013_2014' if x.season in ['20122013','20132014'] else\n", " '2015_2016' if x.season in ['20142015','20152016'] else\n", " '2017_2018' if x.season in ['20162017','20172018'] else 0, axis = 1)\n", "\n", " pbp_df = pbp_df.drop_duplicates(['season','Game_Id','Period','Ev_Team','Seconds_Elapsed'])\n", "\n", " pbp_df = pbp_df.sort_values(['season','Game_Id','Period','Seconds_Elapsed'], ascending=True)\n", "\n", " # Remove SOs\n", " pbp_df = pbp_df.loc[((pbp_df.Period == 5) & (pbp_df.Season_Type == \"RS\")) != True,:]\n", "\n", " # Group Give/Take together\n", " pbp_df['Event'] = pbp_df['Event'].apply( lambda x: 'TURN' if x in [\"GIVE\",\"TAKE\"] else x )\n", "\n", " pbp_df['Type'] = pbp_df['Type'].apply( lambda x: 'DEFLECTED' if x in [\"DEFLECTED\",\"TIP-IN\"] else \\\n", " 'WRIST SHOT' if x in [\"WRIST SHOT\",\"SNAP SHOT\"] else x )\n", "\n", " ## Check Lag Time doesn't Cross Periods\n", " pbp_df = pbp_df.sort_values(['season','Game_Id','Period','Seconds_Elapsed'], ascending=True)\n", "\n", " pbp_df['lagged_Event'] = pbp_df.groupby(['Game_Id','Period'])['Event'].shift(1)\n", " pbp_df['lagged_Ev_Zone'] = pbp_df.groupby(['Game_Id','Period'])['Ev_Zone'].shift(1)\n", " pbp_df['lagged_Seconds_Elapsed'] = pbp_df.groupby(['Game_Id','Period'])['Seconds_Elapsed'].shift(1)\n", " \n", " #############################################\n", " ### Subset to just shots\n", " #############################################\n", " pbp_df = pbp_df.loc[pbp_df.Event.isin([\"SHOT\",\"GOAL\",\"MISS\",\"BLOCK\"]),:]\n", "\n", " print(\"All shots/blocks and columns: \" + str(pbp_df.shape))\n", "\n", " ## Binary\n", " pbp_df['Goal'] = pbp_df.apply( lambda x: 1 if x.Event == \"GOAL\" else 0, axis = 1 )\n", " \n", " pbp_df['EmptyNet_SA'] = pbp_df.apply( lambda x: 1 if ((pd.isnull(x.Home_Goalie)) & (x.Ev_Team == x.Away_Team)) | \\\n", " ((pd.isnull(x.Away_Goalie)) & (x.Ev_Team == x.Home_Team)) else 0, axis = 1)\n", "\n", " pbp_df['is_Rebound'] = pbp_df.apply( lambda x: 1 if (x.lagged_Event in [\"SHOT\"]) & \\\n", " ((x.Seconds_Elapsed - x.lagged_Seconds_Elapsed) <= 2) else 0, axis = 1 ) \n", " \n", " pbp_df['is_Bounce'] = pbp_df.apply( lambda x: 1 if (x.lagged_Event in [\"BLOCK\",\"MISS\"]) & \\\n", " ((x.Seconds_Elapsed - x.lagged_Seconds_Elapsed) <= 2) else 0, axis = 1 ) \n", "\n", " pbp_df['is_Rush'] = pbp_df.apply( lambda x: 1 if (x.Ev_Zone != x.lagged_Ev_Zone) & \\\n", " ((x.Seconds_Elapsed - x.lagged_Seconds_Elapsed) <= 6) else 0, axis = 1 ) \n", "\n", "\n", " # Replace every occurrence of PHX with ARI\n", " pbp_df['Home_Team'] = pbp_df.apply( lambda x: x.Home_Team if x.Home_Team !='PHX' else 'ARI', axis=1 )\n", " pbp_df['Away_Team'] = pbp_df.apply( lambda x: x.Away_Team if x.Away_Team !='PHX' else 'ARI', axis=1 )\n", " pbp_df['Ev_Team'] = pbp_df.apply( lambda x: x.Ev_Team if x.Ev_Team !='PHX' else 'ARI', axis=1 )\n", " # Replace every occurrence of ATL with WPG\n", " pbp_df['Home_Team'] = pbp_df.apply( lambda x: x.Home_Team if x.Home_Team !='ATL' else 'WPG', axis=1 )\n", " pbp_df['Away_Team'] = pbp_df.apply( lambda x: x.Away_Team if x.Away_Team !='ATL' else 'WPG', axis=1 )\n", " pbp_df['Ev_Team'] = pbp_df.apply( lambda x: x.Ev_Team if x.Ev_Team !='ATL' else 'WPG', axis=1 )\n", " \n", " # add a 'Direction' column to indicate the primary direction for shots. The heuristic to determine\n", " # direction is the sign of the median of the X coordinate of shots in each period. This then lets us filter\n", " # out shots that originate from back in the defensive zone when the signs don't match\n", " pbp_df['Home_Shooter'] = pbp_df.apply( lambda x: 1 if x.Ev_Team == x.Home_Team else 0, axis = 1)\n", "\n", " game_period_locations = pbp_df.groupby( by=['season', 'Game_Id', 'Period','Home_Shooter'] )['xC','yC']\n", " \n", " game_period_medians = game_period_locations.transform(np.median)\n", "\n", " pbp_df['Direction'] = np.sign( game_period_medians['xC'] )\n", "\n", " # should actually write this to a CSV as up to here is the performance intensive part\n", " pbp_df['X_unadj'], pbp_df['Y_unadj'] = zip( *pbp_df.apply( lambda x: (x.xC, x.yC) if x.Direction > 0 else (-x.xC,-x.yC), axis = 1 ) )\n", "\n", " pbp_df['LS_Shot'] = pbp_df.apply( lambda x: 1 if x.Y_unadj < 0 else 0, axis = 1)\n", "\n", " ## Logged Last Event Time\n", " pbp_df['LN_Last_Event_Time'] = pbp_df.apply( lambda x: 0 if (x.Seconds_Elapsed - x.lagged_Seconds_Elapsed) <= 0 \\\n", " else np.log(x.Seconds_Elapsed - x.lagged_Seconds_Elapsed + 0.001), axis = 1)\n", "\n", " # Last Event\n", " pbp_df['LastEV_Off_Faceoff'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Off') & (x.lagged_Event == 'FAC') else 0, axis = 1)\n", " pbp_df['LastEV_Def_Faceoff'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Def') & (x.lagged_Event == 'FAC') else 0, axis = 1)\n", " pbp_df['LastEV_Neu_Faceoff'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Neu') & (x.lagged_Event == 'FAC') else 0, axis = 1)\n", " pbp_df['LastEV_Off_Shot'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Off') & (x.lagged_Event in [\"SHOT\",\"MISS\",\"BLOCK\"]) else 0, axis = 1)\n", " pbp_df['LastEV_Def_Shot'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Def') & (x.lagged_Event in [\"SHOT\",\"MISS\",\"BLOCK\"]) else 0, axis = 1)\n", " pbp_df['LastEV_Neu_Shot'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Neu') & (x.lagged_Event in [\"SHOT\",\"MISS\",\"BLOCK\"]) else 0, axis = 1)\n", " pbp_df['LastEV_Off_Give'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Off') & (x.lagged_Event == 'TURN') else 0, axis = 1)\n", " pbp_df['LastEV_Def_Give'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Def') & (x.lagged_Event == 'TURN') else 0, axis = 1)\n", " pbp_df['LastEV_Neu_Give'] = pbp_df.apply( lambda x: x.LN_Last_Event_Time if (x.Ev_Zone == 'Neu') & (x.lagged_Event == 'TURN') else 0, axis = 1)\n", "\n", " ## Adjust X, Y coordinates by Rink, using CDF of shot attempts only (remove blocks since they skew data)\n", " pbp_df = pbp_df.loc[pbp_df.Event.isin([\"SHOT\",\"GOAL\",\"MISS\"]),:]\n", "\n", " ## Call RinkAdjust class\n", " adjuster = RinkAdjust()\n", "\n", " ## New dataframe of adjusted shots for each home rink\n", " pbp_df_adj = pd.DataFrame()\n", "\n", " ## For each home rink\n", " for team in sorted(pbp_df.Home_Team.unique()):\n", "\n", " ## Split shots into team arena and all other rinks\n", " shot_data = pbp_df\n", " rink_shots = shot_data[ shot_data.Home_Team == team ]\n", " rest_of_league = shot_data[ shot_data.Home_Team != team ]\n", "\n", " ## Create teamxcdf and otherxcdf for rink adjustment\n", " adjuster.addTeam( team, rink_shots, rest_of_league )\n", " \n", " ## Adjusted coordinates\n", " Xadj = []\n", " Yadj = []\n", "\n", " ## For each shot in rink adjust coordinates based on other rinks\n", " for row in rink_shots.itertuples():\n", " newx, newy = adjuster.rink_bias_adjust( row.X_unadj, row.Y_unadj, row.Home_Team )\n", "\n", " Xadj.append(newx)\n", " Yadj.append(newy)\n", "\n", " rink_shots['X'] = Xadj\n", " rink_shots['Y'] = Yadj\n", "\n", " pbp_df_adj = pbp_df_adj.append(rink_shots)\n", "\n", " print (\"All shots columns, rink adjusted: \" + str(pbp_df_adj.shape))\n", "\n", " ## Apply only to season level data after x,y CDF adjustment\n", " pbp_df_adj['Shot_Distance_Unadj'] = pbp_df_adj.apply( lambda x: ((89 - x.X_unadj)**2 + (x.Y_unadj ** 2)) ** 0.5, axis = 1 )\n", " pbp_df_adj['Shot_Distance'] = pbp_df_adj.apply( lambda x: ((89 - x.X)**2 + (x.Y ** 2)) ** 0.5, axis = 1 )\n", " pbp_df_adj['Shot_Angle'] = pbp_df_adj.apply( lambda x: np.arctan(abs(89 - x.X) / abs(0 - x.Y)) * (180 / np.pi) if x.Y != 0 \\\n", " else 90, axis = 1 ) \n", "\n", " pbp_df_adj['Last_Shot_Distance'] = pbp_df_adj.groupby(['Game_Id','Period','Home_Shooter'])['Shot_Distance'].shift(1)\n", " pbp_df_adj['Last_Shot_Angle'] = pbp_df_adj.groupby(['Game_Id','Period','Home_Shooter'])['Shot_Angle'].shift(1)\n", " pbp_df_adj['Last_LS_Shot'] = pbp_df_adj.groupby(['Game_Id','Period','Home_Shooter'])['LS_Shot'].shift(1)\n", "\n", " pbp_df_adj['Rebound_Distance_Change'] = pbp_df_adj.apply( lambda x: x.Last_Shot_Distance + x.Shot_Distance if x.is_Rebound == 1 else 0, axis = 1 )\n", " pbp_df_adj['Rebound_Angle_Change'] = pbp_df_adj.apply( lambda x: 0 if x.is_Rebound == 0 \\\n", " else abs(x.Last_Shot_Angle - x.Shot_Angle) \\\n", " if x.is_Rebound == 1 & (x.Last_LS_Shot == x.LS_Shot) else \\\n", " (180 - x.Last_Shot_Angle - x.Shot_Angle), axis = 1 )\n", "\n", " pbp_df_adj['Rebound_Distance_Traveled_byAngle'] = pbp_df_adj. \\\n", " apply( lambda x: x.Rebound_Distance_Change / x.Rebound_Angle_Change \\\n", " if x.Rebound_Angle_Change > 0 else 0, axis = 1)\n", "\n", " pbp_df_adj['LN_Rebound_Distance_Traveled_byAngle'] = pbp_df_adj. \\\n", " apply(lambda x: np.log(x.Rebound_Distance_Traveled_byAngle) \\\n", " if x.Rebound_Distance_Traveled_byAngle > 0 else 0, axis = 1)\n", "\n", " print (\"All shots columns, final calcuations: \" + str(pbp_df_adj.shape))\n", " \n", " return pbp_df_adj\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Read-in and Stack" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "types = {'xC': np.float64,\n", "'yC': np.float64,\n", "'X': np.float64,\n", "'X_unadj': np.float64,\n", "'Y': np.float64,\n", "'Y_unadj': np.float64,\n", "'Game_Id': int}" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Away_CoachAway_GoalieAway_Goalie_IdAway_PlayersAway_ScoreAway_TeamDateDescriptionEv_TeamEv_ZoneEventGame_IdHome_CoachHome_GoalieHome_Goalie_IdHome_PlayersHome_ScoreHome_TeamHome_ZonePeriodSeconds_ElapsedStrengthTime_ElapsedTypeawayPlayer1awayPlayer1_idawayPlayer2awayPlayer2_idawayPlayer3awayPlayer3_idawayPlayer4awayPlayer4_idawayPlayer5awayPlayer5_idawayPlayer6awayPlayer6_idhomePlayer1homePlayer1_idhomePlayer2homePlayer2_idhomePlayer3homePlayer3_idhomePlayer4homePlayer4_idhomePlayer5homePlayer5_idhomePlayer6homePlayer6_idp1_IDp1_namep2_IDp2_namep3_IDp3_namexCyC
0PAUL MAURICECAM WARD8470320.060CAR2010-10-07Period Start- Local time: 7:10 EETNaNNaNPSTR20003TODD RICHARDSNIKLAS BACKSTROM8473404.060MINNaN10.05x50:00NaNJEFF SKINNER8475784.0TUOMO RUUTU8469462.0JUSSI JOKINEN8469638.0JONI PITKANEN8470137.0JOE CORVO8466215.0CAM WARD8470320.0MIKKO KOIVU8469459.0ANTTI MIETTINEN8468704.0ANDREW BRUNETTE8459596.0GREG ZANON8468636.0CAM BARKER8471216.0NIKLAS BACKSTROM8473404.0NaNNaNNaNNaNNaNNaNNaNNaN
1PAUL MAURICECAM WARD8470320.060CAR2010-10-07MIN won Neu. Zone - CAR #36 JOKINEN vs MIN #9 KOIVUMINNeuFAC20003TODD RICHARDSNIKLAS BACKSTROM8473404.060MINNeu10.05x50:00NaNJEFF SKINNER8475784.0TUOMO RUUTU8469462.0JUSSI JOKINEN8469638.0JONI PITKANEN8470137.0JOE CORVO8466215.0CAM WARD8470320.0MIKKO KOIVU8469459.0ANTTI MIETTINEN8468704.0ANDREW BRUNETTE8459596.0GREG ZANON8468636.0CAM BARKER8471216.0NIKLAS BACKSTROM8473404.08469459.0MIKKO KOIVU8469638.0JUSSI JOKINENNaNNaN0.00.0
2PAUL MAURICECAM WARD8470320.060CAR2010-10-07OFFSIDENaNNaNSTOP20003TODD RICHARDSNIKLAS BACKSTROM8473404.060MINNaN18.05x50:08NaNJEFF SKINNER8475784.0TUOMO RUUTU8469462.0JUSSI JOKINEN8469638.0JONI PITKANEN8470137.0JOE CORVO8466215.0CAM WARD8470320.0MIKKO KOIVU8469459.0ANTTI MIETTINEN8468704.0ANDREW BRUNETTE8459596.0GREG ZANON8468636.0CAM BARKER8471216.0NIKLAS BACKSTROM8473404.0NaNNaNNaNNaNNaNNaNNaNNaN
3PAUL MAURICECAM WARD8470320.060CAR2010-10-07MIN won Neu. Zone - CAR #36 JOKINEN vs MIN #9 KOIVUMINNeuFAC20003TODD RICHARDSNIKLAS BACKSTROM8473404.060MINNeu18.05x50:08NaNJEFF SKINNER8475784.0TUOMO RUUTU8469462.0JUSSI JOKINEN8469638.0JONI PITKANEN8470137.0JOE CORVO8466215.0CAM WARD8470320.0MIKKO KOIVU8469459.0ANTTI MIETTINEN8468704.0ANDREW BRUNETTE8459596.0GREG ZANON8468636.0CAM BARKER8471216.0NIKLAS BACKSTROM8473404.08469459.0MIKKO KOIVU8469638.0JUSSI JOKINENNaNNaN20.0-22.0
4PAUL MAURICECAM WARD8470320.060CAR2010-10-07CAR ONGOAL - #12 STAAL, Snap, Off. Zone, 37 ft.CAROffSHOT20003TODD RICHARDSNIKLAS BACKSTROM8473404.060MINDef165.05x51:05SNAP SHOTERIC STAAL8470595.0CHAD LAROSE8469812.0ERIK COLE8467396.0JONI PITKANEN8470137.0JOE CORVO8466215.0CAM WARD8470320.0MATT CULLEN8464989.0CAL CLUTTERBUCK8473504.0MARTIN HAVLAT8467899.0BRENT BURNS8470613.0NICK SCHULTZ8468513.0NIKLAS BACKSTROM8473404.08470595.0ERIC STAALNaNNaNNaNNaN56.0-15.0
\n", "
" ], "text/plain": [ " Away_Coach Away_Goalie Away_Goalie_Id Away_Players Away_Score \\\n", "0 PAUL MAURICE CAM WARD 8470320.0 6 0 \n", "1 PAUL MAURICE CAM WARD 8470320.0 6 0 \n", "2 PAUL MAURICE CAM WARD 8470320.0 6 0 \n", "3 PAUL MAURICE CAM WARD 8470320.0 6 0 \n", "4 PAUL MAURICE CAM WARD 8470320.0 6 0 \n", "\n", " Away_Team Date Description \\\n", "0 CAR 2010-10-07 Period Start- Local time: 7:10 EET \n", "1 CAR 2010-10-07 MIN won Neu. Zone - CAR #36 JOKINEN vs MIN #9 KOIVU \n", "2 CAR 2010-10-07 OFFSIDE \n", "3 CAR 2010-10-07 MIN won Neu. Zone - CAR #36 JOKINEN vs MIN #9 KOIVU \n", "4 CAR 2010-10-07 CAR ONGOAL - #12 STAAL, Snap, Off. Zone, 37 ft. \n", "\n", " Ev_Team Ev_Zone Event Game_Id Home_Coach Home_Goalie \\\n", "0 NaN NaN PSTR 20003 TODD RICHARDS NIKLAS BACKSTROM \n", "1 MIN Neu FAC 20003 TODD RICHARDS NIKLAS BACKSTROM \n", "2 NaN NaN STOP 20003 TODD RICHARDS NIKLAS BACKSTROM \n", "3 MIN Neu FAC 20003 TODD RICHARDS NIKLAS BACKSTROM \n", "4 CAR Off SHOT 20003 TODD RICHARDS NIKLAS BACKSTROM \n", "\n", " Home_Goalie_Id Home_Players Home_Score Home_Team Home_Zone Period \\\n", "0 8473404.0 6 0 MIN NaN 1 \n", "1 8473404.0 6 0 MIN Neu 1 \n", "2 8473404.0 6 0 MIN NaN 1 \n", "3 8473404.0 6 0 MIN Neu 1 \n", "4 8473404.0 6 0 MIN Def 1 \n", "\n", " Seconds_Elapsed Strength Time_Elapsed Type awayPlayer1 \\\n", "0 0.0 5x5 0:00 NaN JEFF SKINNER \n", "1 0.0 5x5 0:00 NaN JEFF SKINNER \n", "2 8.0 5x5 0:08 NaN JEFF SKINNER \n", "3 8.0 5x5 0:08 NaN JEFF SKINNER \n", "4 65.0 5x5 1:05 SNAP SHOT ERIC STAAL \n", "\n", " awayPlayer1_id awayPlayer2 awayPlayer2_id awayPlayer3 awayPlayer3_id \\\n", "0 8475784.0 TUOMO RUUTU 8469462.0 JUSSI JOKINEN 8469638.0 \n", "1 8475784.0 TUOMO RUUTU 8469462.0 JUSSI JOKINEN 8469638.0 \n", "2 8475784.0 TUOMO RUUTU 8469462.0 JUSSI JOKINEN 8469638.0 \n", "3 8475784.0 TUOMO RUUTU 8469462.0 JUSSI JOKINEN 8469638.0 \n", "4 8470595.0 CHAD LAROSE 8469812.0 ERIK COLE 8467396.0 \n", "\n", " awayPlayer4 awayPlayer4_id awayPlayer5 awayPlayer5_id awayPlayer6 \\\n", "0 JONI PITKANEN 8470137.0 JOE CORVO 8466215.0 CAM WARD \n", "1 JONI PITKANEN 8470137.0 JOE CORVO 8466215.0 CAM WARD \n", "2 JONI PITKANEN 8470137.0 JOE CORVO 8466215.0 CAM WARD \n", "3 JONI PITKANEN 8470137.0 JOE CORVO 8466215.0 CAM WARD \n", "4 JONI PITKANEN 8470137.0 JOE CORVO 8466215.0 CAM WARD \n", "\n", " awayPlayer6_id homePlayer1 homePlayer1_id homePlayer2 \\\n", "0 8470320.0 MIKKO KOIVU 8469459.0 ANTTI MIETTINEN \n", "1 8470320.0 MIKKO KOIVU 8469459.0 ANTTI MIETTINEN \n", "2 8470320.0 MIKKO KOIVU 8469459.0 ANTTI MIETTINEN \n", "3 8470320.0 MIKKO KOIVU 8469459.0 ANTTI MIETTINEN \n", "4 8470320.0 MATT CULLEN 8464989.0 CAL CLUTTERBUCK \n", "\n", " homePlayer2_id homePlayer3 homePlayer3_id homePlayer4 \\\n", "0 8468704.0 ANDREW BRUNETTE 8459596.0 GREG ZANON \n", "1 8468704.0 ANDREW BRUNETTE 8459596.0 GREG ZANON \n", "2 8468704.0 ANDREW BRUNETTE 8459596.0 GREG ZANON \n", "3 8468704.0 ANDREW BRUNETTE 8459596.0 GREG ZANON \n", "4 8473504.0 MARTIN HAVLAT 8467899.0 BRENT BURNS \n", "\n", " homePlayer4_id homePlayer5 homePlayer5_id homePlayer6 \\\n", "0 8468636.0 CAM BARKER 8471216.0 NIKLAS BACKSTROM \n", "1 8468636.0 CAM BARKER 8471216.0 NIKLAS BACKSTROM \n", "2 8468636.0 CAM BARKER 8471216.0 NIKLAS BACKSTROM \n", "3 8468636.0 CAM BARKER 8471216.0 NIKLAS BACKSTROM \n", "4 8470613.0 NICK SCHULTZ 8468513.0 NIKLAS BACKSTROM \n", "\n", " homePlayer6_id p1_ID p1_name p2_ID p2_name p3_ID \\\n", "0 8473404.0 NaN NaN NaN NaN NaN \n", "1 8473404.0 8469459.0 MIKKO KOIVU 8469638.0 JUSSI JOKINEN NaN \n", "2 8473404.0 NaN NaN NaN NaN NaN \n", "3 8473404.0 8469459.0 MIKKO KOIVU 8469638.0 JUSSI JOKINEN NaN \n", "4 8473404.0 8470595.0 ERIC STAAL NaN NaN NaN \n", "\n", " p3_name xC yC \n", "0 NaN NaN NaN \n", "1 NaN 0.0 0.0 \n", "2 NaN NaN NaN \n", "3 NaN 20.0 -22.0 \n", "4 NaN 56.0 -15.0 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nhl_pbp20102011 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20102011.csv', dtype=types)\n", "nhl_pbp20112012 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20112012.csv', dtype=types)\n", "nhl_pbp20122013 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20122013.csv', dtype=types)\n", "nhl_pbp20132014 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20132014.csv', dtype=types)\n", "nhl_pbp20142015 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20142015.csv', dtype=types)\n", "nhl_pbp20152016 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20152016.csv', dtype=types)\n", "nhl_pbp20162017 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20162017.csv', dtype=types)\n", "\n", "nhl_pbp20172018 = pd.read_csv('/Users/colander1/Documents/CWA/HockeyScrape/nhl_pbp20172018.csv', dtype=types)\n", "\n", "nhl_pbp = pd.concat([nhl_pbp20102011, nhl_pbp20112012, nhl_pbp20122013, nhl_pbp20132014,\n", " nhl_pbp20142015, nhl_pbp20152016, nhl_pbp20162017, nhl_pbp20172018])\n", "\n", "unwanted = nhl_pbp.columns[nhl_pbp.columns.str.startswith('Unna')]\n", "\n", "nhl_pbp.drop(unwanted, axis=1, inplace=True)\n", "\n", "nhl_pbp.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load Goalie/Skater Roster with Handedness" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": true }, "outputs": [], "source": [ "player_lookup = pd.read_sql(con=engine, sql =\"SELECT * FROM `nhl_all`.`hockey_roster_info` AS B\")\n", "\n", "player_lookup = player_lookup.sort_values('gamesPlayed',ascending=False).groupby(['playerId']).first().reset_index(). \\\n", " loc[:, ['playerBirthDate', 'playerPositionCode', 'playerShootsCatches','playerId']]\n", " \n", "skater_lookup = player_lookup.loc[player_lookup.playerPositionCode != \"G\", :]\n", "skater_lookup.columns = ['shooterDOB','Player_Position','Shoots','p1_ID']\n", "skater_lookup['p1_ID'] = skater_lookup['p1_ID'].astype(str)\n", "\n", "\n", "goalie_lookup = pd.read_sql(con=engine, sql = \"SELECT DISTINCT playerId as SA_Goalie_Id, playerShootsCatches as Catches, playerBirthDate as goalieDOB FROM `nhl_all`.`hockey_goalies_roster` AS A\") \n", "goalie_lookup['SA_Goalie_Id'] = goalie_lookup['SA_Goalie_Id'].astype(str)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Lookup Players, Generate More Features" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def lookups_data_clean(data):\n", " \n", " \n", " for col in ['Game_Id','Away_Goalie_Id','Home_Goalie_Id','p1_ID','p2_ID','p3_ID',\n", " 'awayPlayer1_id','awayPlayer2_id','awayPlayer3_id','awayPlayer4_id','awayPlayer5_id','awayPlayer6_id',\n", " 'homePlayer1_id','homePlayer2_id','homePlayer3_id','homePlayer4_id','homePlayer5_id','homePlayer6_id']:\n", " data[col] = data[col].fillna(0).astype(int).astype(str)\n", "\n", " data['SA_Goalie'] = data.apply( lambda x: x.Away_Goalie if x.Ev_Team == x.Home_Team else x.Home_Goalie, axis=1 )\n", " data['SA_Goalie_Id'] = data.apply( lambda x: x.Away_Goalie_Id if x.Ev_Team == x.Home_Team else x.Home_Goalie_Id, axis=1 )\n", " \n", " data['Away_State'] = data.apply( lambda x: x.Away_Players - 1 if x.Away_Goalie_Id in [x.awayPlayer6_id, x.awayPlayer5_id, x.awayPlayer4_id, x.awayPlayer3_id] else x.Away_Players, axis=1 )\n", " data['Home_State'] = data.apply( lambda x: x.Home_Players - 1 if x.Home_Goalie_Id in [x.homePlayer6_id, x.homePlayer5_id, x.homePlayer4_id, x.homePlayer3_id] else x.Home_Players, axis=1 )\n", " \n", " data['Away_State'] = data.apply( lambda x: x.Away_Players - 1 if x.Away_Goalie_Id in [x.awayPlayer6_id, x.awayPlayer5_id, x.awayPlayer4_id, x.awayPlayer3_id] else x.Away_Players, axis=1 )\n", " data['Home_State'] = data.apply( lambda x: x.Home_Players - 1 if x.Home_Goalie_Id in [x.homePlayer6_id, x.homePlayer5_id, x.homePlayer4_id, x.homePlayer3_id] else x.Home_Players, axis=1 )\n", " \n", " data['Results_inRebound'] = data['is_Rebound'].shift(periods=-1)\n", " \n", " data['Shooter_State'] = data.apply( lambda x: x.Away_State if x.Ev_Team != x.Home_Team else x.Home_State, axis=1 )\n", " data['Goalie_State'] = data.apply( lambda x: x.Away_State if x.Ev_Team == x.Home_Team else x.Home_State, axis=1 )\n", " \n", " data['Game_State'] = data.apply( lambda x: str(x.Away_State) + \"v\" + str(x.Home_State) if x.Ev_Team == x.Home_Team else \\\n", " str(x.Home_State) + \"v\" + str(x.Away_State) , axis=1 )\n", " data['Game_State'] = data.apply( lambda x: \"SH_SA\" if x.Game_State in [\"3v5\",\"3v4\",\"3v6\",\"4v5\",\"4v6\",\"5v6\"] else \\\n", " \"PP_2p_SA\" if x.Game_State in [\"6v3\",\"6v4\",\"5v3\"] else \\\n", " \"5v5\" if x.Game_State in [\"5v5\",\"6v6\"] else x.Game_State, axis=1 )\n", " \n", " data['State_Space'] = data['Goalie_State'] + data['Shooter_State']\n", " data['Shooter_State_Advantage'] = data['Shooter_State'] - data['Goalie_State']\n", " \n", " data = data.merge(skater_lookup, on=['p1_ID'], how = 'left')\n", " data = data.merge(goalie_lookup, on=['SA_Goalie_Id'], how = 'left')\n", " \n", "\n", " data['Shooter_Handedness'] = data.apply( lambda x: \"L\" if x.Shoots == \"L\" else \\\n", " \"R\" if x.Shoots == \"R\" else \"U\", axis=1 )\n", " \n", " data['Handed_Class'] = data['Shoots'].str.cat(data['Catches'], sep='')\n", " \n", " data['Handed_Class2'] = data.apply( lambda x: \"Same\" if x.Handed_Class in [\"LL\",\"RR\"] else \\\n", " \"Opposite\" if x.Handed_Class in [\"LR\",\"RL\"] else \"U\", axis = 1)\n", " \n", " data['Player_Position2'] = data.apply( lambda x: \"D\" if x.Player_Position == \"D\" else \"F\", axis=1 )\n", " \n", " return data\n", " " ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "## Cumulative Shooting Function" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def cumulative_shooting_talent(data):\n", " \n", " shooting_percentage = data.groupby(['Player_Position2'])['Goal'].mean()\n", " \n", " data['Cum_Goal'] = data.groupby(['p1_ID'])['Goal'].cumsum()\n", " data['Cum_Shots'] = data.groupby(['p1_ID']).cumcount()\n", " \n", " data['Cum_Goal'] = data.apply( lambda x: x.Cum_Goal - 1 if x.Event == \"GOAL\" else x.Cum_Goal, axis = 1)\n", " \n", " kr21_stabilizer_F = pd.to_numeric(375.0)\n", " kr21_stabilizer_D = pd.to_numeric(275.0)\n", "\n", " data['Regressed_Shooting_Indexed'] = data.apply( lambda x: ((x.Cum_Goal + (kr21_stabilizer_D * shooting_percentage[0])) /\\\n", " (x.Cum_Shots + kr21_stabilizer_D)) / shooting_percentage[0]\\\n", " if x.Player_Position2 == \"D\" else ((x.Cum_Goal + (kr21_stabilizer_F * shooting_percentage[1])) /\\\n", " (x.Cum_Shots + kr21_stabilizer_F)) / shooting_percentage[1], axis = 1)\n", " \n", " return data\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Create Dummy and Polynomial Variables\n", "\n", "For each category variable, create dummies\n", "For shot distance and angle 3rd degree polynomial" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "def feature_generation(data, \n", " id_vars = [\"season\"],\n", " target_vars = ['Goal','Results_inRebound'],\n", " num_vars = [\"EmptyNet_SA\",\"is_Rebound\",\"is_Rush\",\"LN_Last_Event_Time\",\"LastEV_Off_Faceoff\",\n", " \"LastEV_Def_Faceoff\",\"LastEV_Neu_Faceoff\",\"LastEV_Off_Shot\",\"LastEV_Def_Shot\",\"LastEV_Neu_Shot\",\n", " \"LastEV_Off_Give\",\"LastEV_Def_Give\",\"LastEV_Neu_Give\",\"LN_Rebound_Distance_Traveled_byAngle\",\n", " \"Regressed_Shooting_Indexed\"],\n", " cat_vars = [\"Type\",\"Shooter_State\",\"Goalie_State\",\"Handed_Class2\",\"Player_Position2\"], \n", " poly_vars = [\"Shot_Distance\",\"Shot_Angle\"],\n", " model_vars = ['EmptyNet_SA', 'is_Rebound', 'is_Rush', 'LN_Last_Event_Time',\n", " 'LastEV_Off_Faceoff', 'LastEV_Def_Faceoff', 'LastEV_Neu_Faceoff',\n", " 'LastEV_Off_Shot', 'LastEV_Def_Shot', 'LastEV_Neu_Shot',\n", " 'LastEV_Off_Give', 'LastEV_Def_Give', 'LastEV_Neu_Give',\n", " 'LN_Rebound_Distance_Traveled_byAngle', 'Regressed_Shooting_Indexed',\n", " 'Type_BACKHAND', 'Type_DEFLECTED', 'Type_SLAP SHOT', 'Type_WRAP-AROUND',\n", " 'Type_WRIST SHOT', 'Shooter_State', 'Goalie_State',\n", " 'Handed_Class2_Opposite',\n", " 'Player_Position2_F', 'Shot_Distance',\n", " 'Shot_Distance^2', 'Shot_Distance^3', 'Shot_Angle', 'Shot_Angle^2',\n", " 'Shot_Angle^3']):\n", "\n", " from sklearn.preprocessing import PolynomialFeatures\n", " ## Dummy Variables\n", " model_data = data[num_vars].fillna(0)\n", "\n", " for i in cat_vars:\n", " var_dummies = pd.get_dummies(data.loc[:,[i]])\n", "\n", " model_data = pd.concat([model_data, var_dummies], axis=1)\n", "\n", " ## Polynomial Variables\n", " for i in poly_vars:\n", "\n", " poly_data = data.loc[:,[i]]\n", "\n", " poly = PolynomialFeatures(degree=3,interaction_only=False).fit(poly_data)\n", " poly_names = poly.get_feature_names(poly_data.columns)\n", "\n", "\n", " poly_output = poly.transform(data.loc[:,[i]])\n", "\n", " model_data = pd.DataFrame(pd.concat([ model_data, \n", " pd.DataFrame(poly_output,\n", " columns = poly_names ).iloc[:,1:]], axis=1))\n", "\n", " #model_mat = model_data.loc[:, model_vars].as_matrix()\n", " model_data = pd.concat([data[id_vars],data[target_vars], model_data], axis=1)\n", " \n", " \n", " print(model_data.shape)\n", " \n", " return model_data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data Pipeline" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "All events and columns: (2878182, 56)\n", "All shots/blocks and columns: (1014120, 63)\n", "All shots columns, rink adjusted: (753814, 85)\n", "All shots columns, final calcuations: (753814, 95)\n" ] } ], "source": [ "shot_data_all = transform_data(nhl_pbp)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Shot_DistanceShot_Distance_Unadj
Home_Team
NYR37.96712833.186499
DET34.29184334.282345
CHI35.83716434.322037
STL36.45869734.468851
N.J34.59066834.594601
ANA36.82610635.160489
PIT35.50540335.351800
NSH35.67185935.385298
L.A34.94142835.561865
MTL34.58640335.622235
WSH34.73221135.654820
DAL36.01536035.655394
CAR36.12178535.702802
TOR32.97996835.775225
CGY36.03751936.258758
T.B34.31868636.449372
VGK34.73783936.491532
NYI37.68683636.756656
COL34.98853636.862748
ARI35.58837236.873783
FLA36.39237536.958453
CBJ38.09069837.245836
EDM38.19824737.329866
VAN36.45787637.376298
BOS33.26514937.584670
S.J37.49952337.637475
WPG39.25244739.239246
BUF37.70218739.468667
MIN37.54562639.570156
PHI37.19010239.699116
OTT38.72900140.723540
\n", "
" ], "text/plain": [ " Shot_Distance Shot_Distance_Unadj\n", "Home_Team \n", "NYR 37.967128 33.186499\n", "DET 34.291843 34.282345\n", "CHI 35.837164 34.322037\n", "STL 36.458697 34.468851\n", "N.J 34.590668 34.594601\n", "ANA 36.826106 35.160489\n", "PIT 35.505403 35.351800\n", "NSH 35.671859 35.385298\n", "L.A 34.941428 35.561865\n", "MTL 34.586403 35.622235\n", "WSH 34.732211 35.654820\n", "DAL 36.015360 35.655394\n", "CAR 36.121785 35.702802\n", "TOR 32.979968 35.775225\n", "CGY 36.037519 36.258758\n", "T.B 34.318686 36.449372\n", "VGK 34.737839 36.491532\n", "NYI 37.686836 36.756656\n", "COL 34.988536 36.862748\n", "ARI 35.588372 36.873783\n", "FLA 36.392375 36.958453\n", "CBJ 38.090698 37.245836\n", "EDM 38.198247 37.329866\n", "VAN 36.457876 37.376298\n", "BOS 33.265149 37.584670\n", "S.J 37.499523 37.637475\n", "WPG 39.252447 39.239246\n", "BUF 37.702187 39.468667\n", "MIN 37.545626 39.570156\n", "PHI 37.190102 39.699116\n", "OTT 38.729001 40.723540" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Check shot distance adjustment\n", "shot_data_all.loc[shot_data_all.season == \"20172018\", :].groupby(['Home_Team'])[['Shot_Distance','Shot_Distance_Unadj']]\\\n", " .mean().sort_values(['Shot_Distance_Unadj'])" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [], "source": [ "shot_data_all = lookups_data_clean(shot_data_all)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "shot_data_all2 = cumulative_shooting_talent(shot_data_all)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(753814, 36)\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasonGoalResults_inReboundEmptyNet_SAis_Reboundis_RushLN_Last_Event_TimeLastEV_Off_FaceoffLastEV_Def_FaceoffLastEV_Neu_FaceoffLastEV_Off_ShotLastEV_Def_ShotLastEV_Neu_ShotLastEV_Off_GiveLastEV_Def_GiveLastEV_Neu_GiveLN_Rebound_Distance_Traveled_byAngleRegressed_Shooting_IndexedType_BACKHANDType_DEFLECTEDType_SLAP SHOTType_WRAP-AROUNDType_WRIST SHOTShooter_StateGoalie_StateHanded_Class2_OppositeHanded_Class2_SameHanded_Class2_UPlayer_Position2_DPlayer_Position2_FShot_DistanceShot_Distance^2Shot_Distance^3Shot_AngleShot_Angle^2Shot_Angle^3
02010201100.00002.1973362.1973360.00.00.0000000.00.00.0000000.00.00.01.0000000100551000140.3112891625.065505.84420566.6147794437.528774295604.998305
12010201100.00001.0989460.0000000.00.01.0989460.00.00.0000000.00.00.01.0000000100550101058.1377673380.0196505.65386363.4349494023.992732255261.773029
22010201100.00002.3026852.3026850.00.00.0000000.00.00.0000000.00.00.01.0000000001550100131.016125962.029837.5120951.8476103.4136646.307120
32010201100.00003.6376120.0000000.00.00.0000000.00.00.0000000.00.00.00.9973400100541000140.3112891625.065505.84420582.8749846868.262915569207.176806
42010201100.00001.3865440.0000000.00.00.0000000.00.01.3865440.00.00.01.0000000001351000137.4432901402.052495.49321655.8855273123.192134174541.238511
\n", "
" ], "text/plain": [ " season Goal Results_inRebound EmptyNet_SA is_Rebound is_Rush \\\n", "0 20102011 0 0.0 0 0 0 \n", "1 20102011 0 0.0 0 0 0 \n", "2 20102011 0 0.0 0 0 0 \n", "3 20102011 0 0.0 0 0 0 \n", "4 20102011 0 0.0 0 0 0 \n", "\n", " LN_Last_Event_Time LastEV_Off_Faceoff LastEV_Def_Faceoff \\\n", "0 2.197336 2.197336 0.0 \n", "1 1.098946 0.000000 0.0 \n", "2 2.302685 2.302685 0.0 \n", "3 3.637612 0.000000 0.0 \n", "4 1.386544 0.000000 0.0 \n", "\n", " LastEV_Neu_Faceoff LastEV_Off_Shot LastEV_Def_Shot LastEV_Neu_Shot \\\n", "0 0.0 0.000000 0.0 0.0 \n", "1 0.0 1.098946 0.0 0.0 \n", "2 0.0 0.000000 0.0 0.0 \n", "3 0.0 0.000000 0.0 0.0 \n", "4 0.0 0.000000 0.0 0.0 \n", "\n", " LastEV_Off_Give LastEV_Def_Give LastEV_Neu_Give \\\n", "0 0.000000 0.0 0.0 \n", "1 0.000000 0.0 0.0 \n", "2 0.000000 0.0 0.0 \n", "3 0.000000 0.0 0.0 \n", "4 1.386544 0.0 0.0 \n", "\n", " LN_Rebound_Distance_Traveled_byAngle Regressed_Shooting_Indexed \\\n", "0 0.0 1.00000 \n", "1 0.0 1.00000 \n", "2 0.0 1.00000 \n", "3 0.0 0.99734 \n", "4 0.0 1.00000 \n", "\n", " Type_BACKHAND Type_DEFLECTED Type_SLAP SHOT Type_WRAP-AROUND \\\n", "0 0 0 1 0 \n", "1 0 0 1 0 \n", "2 0 0 0 0 \n", "3 0 0 1 0 \n", "4 0 0 0 0 \n", "\n", " Type_WRIST SHOT Shooter_State Goalie_State Handed_Class2_Opposite \\\n", "0 0 5 5 1 \n", "1 0 5 5 0 \n", "2 1 5 5 0 \n", "3 0 5 4 1 \n", "4 1 3 5 1 \n", "\n", " Handed_Class2_Same Handed_Class2_U Player_Position2_D \\\n", "0 0 0 0 \n", "1 1 0 1 \n", "2 1 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " Player_Position2_F Shot_Distance Shot_Distance^2 Shot_Distance^3 \\\n", "0 1 40.311289 1625.0 65505.844205 \n", "1 0 58.137767 3380.0 196505.653863 \n", "2 1 31.016125 962.0 29837.512095 \n", "3 1 40.311289 1625.0 65505.844205 \n", "4 1 37.443290 1402.0 52495.493216 \n", "\n", " Shot_Angle Shot_Angle^2 Shot_Angle^3 \n", "0 66.614779 4437.528774 295604.998305 \n", "1 63.434949 4023.992732 255261.773029 \n", "2 1.847610 3.413664 6.307120 \n", "3 82.874984 6868.262915 569207.176806 \n", "4 55.885527 3123.192134 174541.238511 " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_vars = ['EmptyNet_SA', 'is_Rebound', 'is_Rush', 'LN_Last_Event_Time',\n", " 'LastEV_Off_Faceoff', 'LastEV_Def_Faceoff', 'LastEV_Neu_Faceoff',\n", " 'LastEV_Off_Shot', 'LastEV_Def_Shot', 'LastEV_Neu_Shot',\n", " 'LastEV_Off_Give', 'LastEV_Def_Give', 'LastEV_Neu_Give',\n", " 'LN_Rebound_Distance_Traveled_byAngle', 'Regressed_Shooting_Indexed',\n", " 'Type_BACKHAND', 'Type_DEFLECTED', 'Type_SLAP SHOT', 'Type_WRAP-AROUND',\n", " 'Type_WRIST SHOT', 'Shooter_State', 'Goalie_State',\n", " 'Handed_Class2_Opposite',\n", " 'Player_Position2_F', 'Shot_Distance',\n", " 'Shot_Distance^2', 'Shot_Distance^3', 'Shot_Angle', 'Shot_Angle^2',\n", " 'Shot_Angle^3']\n", "\n", "model_data = feature_generation(shot_data_all2, model_vars = model_vars)\n", "#(744586, 30)\n", "\n", "model_data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Break Data into 2 Season Blocks \n", "Modeling two seasons at a time allows model to adjust to changing goalie performance/shot recorder bias as modeling entire period results in fewer goals relative to expected in later seasons. Two seasons are used rather than 1 as to get 2017-18 some stability." ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasonGoalResults_inReboundEmptyNet_SAis_Reboundis_RushLN_Last_Event_TimeLastEV_Off_FaceoffLastEV_Def_FaceoffLastEV_Neu_FaceoffLastEV_Off_ShotLastEV_Def_ShotLastEV_Neu_ShotLastEV_Off_GiveLastEV_Def_GiveLastEV_Neu_GiveLN_Rebound_Distance_Traveled_byAngleRegressed_Shooting_IndexedType_BACKHANDType_DEFLECTEDType_SLAP SHOTType_WRAP-AROUNDType_WRIST SHOTShooter_StateGoalie_StateHanded_Class2_OppositeHanded_Class2_SameHanded_Class2_UPlayer_Position2_DPlayer_Position2_FShot_DistanceShot_Distance^2Shot_Distance^3Shot_AngleShot_Angle^2Shot_Angle^3season_model
02010201100.00002.1973362.1973360.00.00.0000000.00.00.0000000.00.00.01.0000000100551000140.3112891625.065505.84420566.6147794437.528774295604.9983052011_2012
12010201100.00001.0989460.0000000.00.01.0989460.00.00.0000000.00.00.01.0000000100550101058.1377673380.0196505.65386363.4349494023.992732255261.7730292011_2012
22010201100.00002.3026852.3026850.00.00.0000000.00.00.0000000.00.00.01.0000000001550100131.016125962.029837.5120951.8476103.4136646.3071202011_2012
32010201100.00003.6376120.0000000.00.00.0000000.00.00.0000000.00.00.00.9973400100541000140.3112891625.065505.84420582.8749846868.262915569207.1768062011_2012
42010201100.00001.3865440.0000000.00.00.0000000.00.01.3865440.00.00.01.0000000001351000137.4432901402.052495.49321655.8855273123.192134174541.2385112011_2012
\n", "
" ], "text/plain": [ " season Goal Results_inRebound EmptyNet_SA is_Rebound is_Rush \\\n", "0 20102011 0 0.0 0 0 0 \n", "1 20102011 0 0.0 0 0 0 \n", "2 20102011 0 0.0 0 0 0 \n", "3 20102011 0 0.0 0 0 0 \n", "4 20102011 0 0.0 0 0 0 \n", "\n", " LN_Last_Event_Time LastEV_Off_Faceoff LastEV_Def_Faceoff \\\n", "0 2.197336 2.197336 0.0 \n", "1 1.098946 0.000000 0.0 \n", "2 2.302685 2.302685 0.0 \n", "3 3.637612 0.000000 0.0 \n", "4 1.386544 0.000000 0.0 \n", "\n", " LastEV_Neu_Faceoff LastEV_Off_Shot LastEV_Def_Shot LastEV_Neu_Shot \\\n", "0 0.0 0.000000 0.0 0.0 \n", "1 0.0 1.098946 0.0 0.0 \n", "2 0.0 0.000000 0.0 0.0 \n", "3 0.0 0.000000 0.0 0.0 \n", "4 0.0 0.000000 0.0 0.0 \n", "\n", " LastEV_Off_Give LastEV_Def_Give LastEV_Neu_Give \\\n", "0 0.000000 0.0 0.0 \n", "1 0.000000 0.0 0.0 \n", "2 0.000000 0.0 0.0 \n", "3 0.000000 0.0 0.0 \n", "4 1.386544 0.0 0.0 \n", "\n", " LN_Rebound_Distance_Traveled_byAngle Regressed_Shooting_Indexed \\\n", "0 0.0 1.00000 \n", "1 0.0 1.00000 \n", "2 0.0 1.00000 \n", "3 0.0 0.99734 \n", "4 0.0 1.00000 \n", "\n", " Type_BACKHAND Type_DEFLECTED Type_SLAP SHOT Type_WRAP-AROUND \\\n", "0 0 0 1 0 \n", "1 0 0 1 0 \n", "2 0 0 0 0 \n", "3 0 0 1 0 \n", "4 0 0 0 0 \n", "\n", " Type_WRIST SHOT Shooter_State Goalie_State Handed_Class2_Opposite \\\n", "0 0 5 5 1 \n", "1 0 5 5 0 \n", "2 1 5 5 0 \n", "3 0 5 4 1 \n", "4 1 3 5 1 \n", "\n", " Handed_Class2_Same Handed_Class2_U Player_Position2_D \\\n", "0 0 0 0 \n", "1 1 0 1 \n", "2 1 0 0 \n", "3 0 0 0 \n", "4 0 0 0 \n", "\n", " Player_Position2_F Shot_Distance Shot_Distance^2 Shot_Distance^3 \\\n", "0 1 40.311289 1625.0 65505.844205 \n", "1 0 58.137767 3380.0 196505.653863 \n", "2 1 31.016125 962.0 29837.512095 \n", "3 1 40.311289 1625.0 65505.844205 \n", "4 1 37.443290 1402.0 52495.493216 \n", "\n", " Shot_Angle Shot_Angle^2 Shot_Angle^3 season_model \n", "0 66.614779 4437.528774 295604.998305 2011_2012 \n", "1 63.434949 4023.992732 255261.773029 2011_2012 \n", "2 1.847610 3.413664 6.307120 2011_2012 \n", "3 82.874984 6868.262915 569207.176806 2011_2012 \n", "4 55.885527 3123.192134 174541.238511 2011_2012 " ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model_data['season_model'] = model_data.apply(lambda x: '2011_2012' if x.season in ['20102011','20112012'] else\n", " '2013_2014' if x.season in ['20122013','20132014'] else\n", " '2015_2016' if x.season in ['20142015','20152016'] else\n", " '2017_2018' if x.season in ['20162017','20172018'] else 0, axis = 1)\n", "\n", "#(744586, 30)\n", "model_data.head()" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "season_model\n", "2011_2012 218979\n", "2013_2014 175529\n", "2015_2016 217610\n", "2017_2018 141696\n", "Name: Goal, dtype: int64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Check block counts\n", "model_data.groupby(['season_model'])['Goal'].count()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fit and Score xG and xR Models\n", "\n", "Create function to score xG model based on model_vars, return ROC AUC, then score xR model, return ROC AUC. Save both models and print coefficients.\n", "Both models use Logitstic Regression, 10-fold cross-validation" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def All_Model_Scoring(model_data, data, szn):\n", " print (szn)\n", " \n", " from sklearn.cross_validation import KFold\n", " from sklearn.linear_model import LogisticRegression\n", " from sklearn.grid_search import GridSearchCV\n", " from sklearn.linear_model import LogisticRegressionCV\n", " import pickle\n", "\n", " model_vars = ['EmptyNet_SA', 'is_Rebound', 'is_Rush', 'LN_Last_Event_Time',\n", " 'LastEV_Off_Faceoff', 'LastEV_Def_Faceoff', 'LastEV_Neu_Faceoff',\n", " 'LastEV_Off_Shot', 'LastEV_Def_Shot', 'LastEV_Neu_Shot',\n", " 'LastEV_Off_Give', 'LastEV_Def_Give', 'LastEV_Neu_Give',\n", " 'LN_Rebound_Distance_Traveled_byAngle', 'Regressed_Shooting_Indexed',\n", " 'Type_BACKHAND', 'Type_DEFLECTED', 'Type_SLAP SHOT', 'Type_WRAP-AROUND',\n", " 'Type_WRIST SHOT', 'Shooter_State', 'Goalie_State',\n", " 'Handed_Class2_Opposite',\n", " 'Player_Position2_F', 'Shot_Distance',\n", " 'Shot_Distance^2', 'Shot_Distance^3', 'Shot_Angle', 'Shot_Angle^2',\n", " 'Shot_Angle^3']\n", "\n", " rebound_vars = ['xG_raw', 'EmptyNet_SA', 'is_Rebound', 'is_Rush', 'LN_Last_Event_Time',\n", " 'LastEV_Off_Faceoff', 'LastEV_Def_Faceoff', 'LastEV_Neu_Faceoff',\n", " 'LastEV_Off_Shot', 'LastEV_Def_Shot', 'LastEV_Neu_Shot',\n", " 'LastEV_Off_Give', 'LastEV_Def_Give', 'LastEV_Neu_Give',\n", " 'LN_Rebound_Distance_Traveled_byAngle', 'Regressed_Shooting_Indexed',\n", " 'Type_BACKHAND', 'Type_DEFLECTED', 'Type_SLAP SHOT', 'Type_WRAP-AROUND',\n", " 'Type_WRIST SHOT', 'Shooter_State', 'Goalie_State',\n", " 'Handed_Class2_Opposite',\n", " 'Player_Position2_F', 'Shot_Distance',\n", " 'Shot_Distance^2', 'Shot_Distance^3', 'Shot_Angle', 'Shot_Angle^2',\n", " 'Shot_Angle^3']\n", "\n", " ## Subset data to block, create model data, convert to matrix\n", " szn_data = data.loc[data.season_model == szn, :]\n", " szn_model_data = model_data.loc[model_data.season_model == szn, :].fillna(0)\n", " szn_model_mat = szn_model_data.loc[szn_model_data.season_model == szn, model_vars].as_matrix().astype(np.float)\n", "\n", " ### Train xG Model\n", " goal = szn_model_data.Goal\n", " print (str(szn) + ' seasons dimensions: ' + str(szn_model_mat.shape))\n", " print (str(szn) + ' seasons shooting%: ' + str(sum(goal) / len(goal)))\n", " \n", " ## xG Model\n", " fold = KFold(len(goal), n_folds=10, shuffle=True, random_state=777)\n", "\n", " xG_model_CV = LogisticRegressionCV(\n", " Cs=list(np.power(10.0, np.arange(-10, 10)))\n", " ,penalty='l2'\n", " ,scoring='roc_auc'\n", " ,cv=fold\n", " ,random_state=777\n", " ,max_iter=10000\n", " ,fit_intercept=True\n", " ,solver='newton-cg'\n", " ,tol=10\n", " ) \n", " \n", " ## Fit model\n", " xG_model_CV.fit(szn_model_mat, goal)\n", " \n", " ## Save Model\n", " filename = 'xG_Model_' + str(szn) + '_obj.sav'\n", " pickle.dump(xG_model_CV, open(filename, 'wb'))\n", " \n", " print (str(szn) + 'Max auc_roc:', xG_model_CV.scores_[1].max())\n", " \n", " ## Score Model\n", " xG_raw = xG_model_CV.predict_proba(szn_model_mat)[:,1]\n", "\n", " print (str(szn) + ' seasons goals: ' + str(sum(goal)) + ', season xG: ' + str(sum(xG_raw)))\n", "\n", " ### Assemble data and train xRebound Model\n", " rebound = szn_model_data.Results_inRebound.fillna(0)\n", " print (str(szn) + ' goals scored: ' + str(sum(szn_data.Goal)))\n", " print (str(szn) + ' xG scored: ' + str(sum(xG_raw)))\n", "\n", " print (str(szn) + ' seasons dimensions: ' + str(szn_model_mat.shape))\n", " print (str(szn) + ' seasons rebound%: ' + str(sum(rebound) / len(rebound)))\n", " \n", " fold = KFold(len(rebound), n_folds=10, shuffle=True, random_state=777)\n", "\n", " szn_model_mat = pd.concat([szn_model_data.reset_index(drop=True), \n", " pd.DataFrame(xG_raw, columns = ['xG_raw']).reset_index(drop=True)], axis=1).loc[:,rebound_vars].as_matrix()\n", "\n", " xR_model_CV = LogisticRegressionCV(\n", " Cs=list(np.power(10.0, np.arange(-10, 10)))\n", " ,penalty='l2'\n", " ,scoring='roc_auc'\n", " ,cv=fold\n", " ,random_state=777\n", " ,max_iter=10000\n", " ,fit_intercept=True\n", " ,solver='newton-cg'\n", " ,tol=10\n", " ) \n", " \n", "\n", " xR_model_CV.fit(szn_model_mat, rebound)\n", " \n", " filename = 'xR_Model_' + str(szn) + '_obj.sav'\n", " pickle.dump(xR_model_CV, open(filename, 'wb'))\n", " \n", " print (str(szn) + ' Max auc_roc:', xR_model_CV.scores_[1].max())\n", " \n", " xR_raw = xR_model_CV.predict_proba(szn_model_mat)[:,1]\n", "\n", " print (str(szn) + ' seasons rebounds: ' + str(sum(rebound)) + ', season xR: ' + str(sum(xR_raw)))\n", "\n", " coefs = pd.DataFrame(list(zip(np.array(rebound_vars),xR_model_CV.coef_.T)), \n", " columns = ['Variable','Coef']).sort_values(['Coef'], ascending=False)\n", " \n", " scored_data = pd.concat([\n", " pd.DataFrame(xG_raw, columns = ['xG_raw']).reset_index(drop=True),\n", " pd.DataFrame(xR_raw, columns = ['xR']).reset_index(drop=True),\n", " szn_data.reset_index(drop=True)\n", " ], axis=1)\n", "\n", " scored_data.to_csv(\"scored_data\" + str(szn) + \".csv\", index=False)\n", " \n", " return coefs.T" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2017_2018\n", "2017_2018 seasons dimensions: (141696, 30)\n", "2017_2018 seasons shooting%: 0.0633539408311\n", "2017_2018Max auc_roc: 0.775398676368\n", "2017_2018 seasons goals: 8977, season xG: 8976.17408044\n", "2017_2018 goals scored: 8977\n", "2017_2018 xG scored: 8976.17408044\n", "2017_2018 seasons dimensions: (141696, 30)\n", "2017_2018 seasons rebound%: 0.0323932926829\n", "2017_2018 Max auc_roc: 0.679599683931\n", "2017_2018 seasons rebounds: 4590.0, season xR: 4591.12339643\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2121991418320152385112629302728425121660242210131177
VariableShooter_Stateis_ReboundType_WRAP-AROUNDLastEV_Def_ShotLN_Rebound_Distance_Traveled_byAngleType_SLAP SHOTis_RushType_WRIST SHOTRegressed_Shooting_IndexedHanded_Class2_OppositeLastEV_Off_ShotLastEV_Off_FaceoffLastEV_Off_GiveShot_Distance^2Shot_Angle^2Shot_Angle^3Shot_Distance^3Shot_AngleLN_Last_Event_TimeShot_DistanceLastEV_Def_GiveType_BACKHANDLastEV_Def_FaceoffxG_rawPlayer_Position2_FGoalie_StateLastEV_Neu_ShotLastEV_Neu_GiveEmptyNet_SAType_DEFLECTEDLastEV_Neu_Faceoff
Coef[0.443230762862][0.242593069594][0.236305225426][0.196529997088][0.0938102952462][0.0880709042958][0.0704929456784][0.0687385433112][0.0545326544149][0.0411686094917][0.0204143408809][0.00932446948272][0.00491453331725][0.000910514894216][0.000151931517311][-4.32632277469e-07][-3.29846116297e-06][-0.00738119577147][-0.0102381420226][-0.0786000143722][-0.122034340806][-0.141906765043][-0.144939916331][-0.172781889777][-0.215843836221][-0.317256602872][-0.377557564797][-0.392747493842][-0.512845561177][-0.549577304022][-0.716628355712]
\n", "
" ], "text/plain": [ " 21 2 19 \\\n", "Variable Shooter_State is_Rebound Type_WRAP-AROUND \n", "Coef [0.443230762862] [0.242593069594] [0.236305225426] \n", "\n", " 9 14 \\\n", "Variable LastEV_Def_Shot LN_Rebound_Distance_Traveled_byAngle \n", "Coef [0.196529997088] [0.0938102952462] \n", "\n", " 18 3 20 \\\n", "Variable Type_SLAP SHOT is_Rush Type_WRIST SHOT \n", "Coef [0.0880709042958] [0.0704929456784] [0.0687385433112] \n", "\n", " 15 23 \\\n", "Variable Regressed_Shooting_Indexed Handed_Class2_Opposite \n", "Coef [0.0545326544149] [0.0411686094917] \n", "\n", " 8 5 11 \\\n", "Variable LastEV_Off_Shot LastEV_Off_Faceoff LastEV_Off_Give \n", "Coef [0.0204143408809] [0.00932446948272] [0.00491453331725] \n", "\n", " 26 29 30 \\\n", "Variable Shot_Distance^2 Shot_Angle^2 Shot_Angle^3 \n", "Coef [0.000910514894216] [0.000151931517311] [-4.32632277469e-07] \n", "\n", " 27 28 4 \\\n", "Variable Shot_Distance^3 Shot_Angle LN_Last_Event_Time \n", "Coef [-3.29846116297e-06] [-0.00738119577147] [-0.0102381420226] \n", "\n", " 25 12 16 \\\n", "Variable Shot_Distance LastEV_Def_Give Type_BACKHAND \n", "Coef [-0.0786000143722] [-0.122034340806] [-0.141906765043] \n", "\n", " 6 0 24 \\\n", "Variable LastEV_Def_Faceoff xG_raw Player_Position2_F \n", "Coef [-0.144939916331] [-0.172781889777] [-0.215843836221] \n", "\n", " 22 10 13 \\\n", "Variable Goalie_State LastEV_Neu_Shot LastEV_Neu_Give \n", "Coef [-0.317256602872] [-0.377557564797] [-0.392747493842] \n", "\n", " 1 17 7 \n", "Variable EmptyNet_SA Type_DEFLECTED LastEV_Neu_Faceoff \n", "Coef [-0.512845561177] [-0.549577304022] [-0.716628355712] " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "All_Model_Scoring(model_data, shot_data_all2, '2017_2018')" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2015_2016\n", "2015_2016 seasons dimensions: (217610, 30)\n", "2015_2016 seasons shooting%: 0.0625292955287\n", "2015_2016Max auc_roc: 0.777002681465\n", "2015_2016 seasons goals: 13607, season xG: 13605.6948619\n", "2015_2016 goals scored: 13607\n", "2015_2016 xG scored: 13605.6948619\n", "2015_2016 seasons dimensions: (217610, 30)\n", "2015_2016 seasons rebound%: 0.030462754469\n", "2015_2016 Max auc_roc: 0.676234656964\n", "2015_2016 seasons rebounds: 6629.0, season xR: 6629.73767749\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2121918142023328526302729411981525161262402213101177
VariableShooter_Stateis_ReboundType_WRAP-AROUNDType_SLAP SHOTLN_Rebound_Distance_Traveled_byAngleType_WRIST SHOTHanded_Class2_Oppositeis_RushShot_AngleLastEV_Off_FaceoffShot_Distance^2Shot_Angle^3Shot_Distance^3Shot_Angle^2LN_Last_Event_TimeLastEV_Off_GiveLastEV_Def_ShotLastEV_Off_ShotRegressed_Shooting_IndexedShot_DistanceType_BACKHANDLastEV_Def_GiveLastEV_Def_FaceoffPlayer_Position2_FxG_rawGoalie_StateLastEV_Neu_GiveLastEV_Neu_ShotEmptyNet_SAType_DEFLECTEDLastEV_Neu_Faceoff
Coef[0.434352784035][0.217387469412][0.214124051205][0.171990320473][0.13015520102][0.115009004209][0.0473993716049][0.016660341811][0.00603524793123][0.00288536951206][0.000794841008729][4.33940532904e-07][-2.53664345805e-06][-4.37267199967e-05][-0.00227667337587][-0.00596937422759][-0.0164444195784][-0.0333510073401][-0.0632778157823][-0.0745497498176][-0.0918397745913][-0.137538495271][-0.222625390936][-0.229699315968][-0.261292995408][-0.355609845331][-0.376252326754][-0.405605383995][-0.509204826468][-0.535699799812][-0.566667016696]
\n", "
" ], "text/plain": [ " 21 2 19 \\\n", "Variable Shooter_State is_Rebound Type_WRAP-AROUND \n", "Coef [0.434352784035] [0.217387469412] [0.214124051205] \n", "\n", " 18 14 \\\n", "Variable Type_SLAP SHOT LN_Rebound_Distance_Traveled_byAngle \n", "Coef [0.171990320473] [0.13015520102] \n", "\n", " 20 23 3 \\\n", "Variable Type_WRIST SHOT Handed_Class2_Opposite is_Rush \n", "Coef [0.115009004209] [0.0473993716049] [0.016660341811] \n", "\n", " 28 5 26 \\\n", "Variable Shot_Angle LastEV_Off_Faceoff Shot_Distance^2 \n", "Coef [0.00603524793123] [0.00288536951206] [0.000794841008729] \n", "\n", " 30 27 29 \\\n", "Variable Shot_Angle^3 Shot_Distance^3 Shot_Angle^2 \n", "Coef [4.33940532904e-07] [-2.53664345805e-06] [-4.37267199967e-05] \n", "\n", " 4 11 9 \\\n", "Variable LN_Last_Event_Time LastEV_Off_Give LastEV_Def_Shot \n", "Coef [-0.00227667337587] [-0.00596937422759] [-0.0164444195784] \n", "\n", " 8 15 25 \\\n", "Variable LastEV_Off_Shot Regressed_Shooting_Indexed Shot_Distance \n", "Coef [-0.0333510073401] [-0.0632778157823] [-0.0745497498176] \n", "\n", " 16 12 6 \\\n", "Variable Type_BACKHAND LastEV_Def_Give LastEV_Def_Faceoff \n", "Coef [-0.0918397745913] [-0.137538495271] [-0.222625390936] \n", "\n", " 24 0 22 \\\n", "Variable Player_Position2_F xG_raw Goalie_State \n", "Coef [-0.229699315968] [-0.261292995408] [-0.355609845331] \n", "\n", " 13 10 1 \\\n", "Variable LastEV_Neu_Give LastEV_Neu_Shot EmptyNet_SA \n", "Coef [-0.376252326754] [-0.405605383995] [-0.509204826468] \n", "\n", " 17 7 \n", "Variable Type_DEFLECTED LastEV_Neu_Faceoff \n", "Coef [-0.535699799812] [-0.566667016696] " ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "All_Model_Scoring(model_data, shot_data_all2, '2015_2016')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2013_2014\n", "2013_2014 seasons dimensions: (175529, 30)\n", "2013_2014 seasons shooting%: 0.0626904955876\n", "2013_2014Max auc_roc: 0.770487378959\n", "2013_2014 seasons goals: 11004, season xG: 11002.8547599\n", "2013_2014 goals scored: 11004\n", "2013_2014 xG scored: 11002.8547599\n", "2013_2014 seasons dimensions: (175529, 30)\n", "2013_2014 seasons rebound%: 0.0305818411772\n", "2013_2014 Max auc_roc: 0.654241854268\n", "2013_2014 seasons rebounds: 5368.0, season xR: 5369.87685727\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2119182014265118282629302743239162512150241322710117
VariableShooter_StateType_WRAP-AROUNDType_SLAP SHOTType_WRIST SHOTLN_Rebound_Distance_Traveled_byAngleis_ReboundLastEV_Def_FaceoffLastEV_Off_FaceoffLastEV_Off_GiveLastEV_Off_ShotShot_AngleShot_Distance^2Shot_Angle^2Shot_Angle^3Shot_Distance^3LN_Last_Event_Timeis_RushHanded_Class2_OppositeLastEV_Def_ShotType_BACKHANDShot_DistanceLastEV_Def_GiveRegressed_Shooting_IndexedxG_rawPlayer_Position2_FLastEV_Neu_GiveGoalie_StateLastEV_Neu_FaceoffLastEV_Neu_ShotEmptyNet_SAType_DEFLECTED
Coef[0.421541250626][0.195991297068][0.180731390782][0.14615833415][0.101966515186][0.0995412193419][0.0874812352645][0.0320194794607][0.00778680101523][0.00497227573604][0.00173310331803][0.000885809270975][0.000119358110981][-9.5847370416e-07][-2.92774036083e-06][-0.0104735349129][-0.0149812990545][-0.0217751440318][-0.0265930399084][-0.0665984909775][-0.0780029052968][-0.0879427972701][-0.129339881986][-0.199616920194][-0.203785946043][-0.276553511017][-0.292844672162][-0.310902202645][-0.32141440785][-0.395679564539][-0.623100975278]
\n", "
" ], "text/plain": [ " 21 19 18 \\\n", "Variable Shooter_State Type_WRAP-AROUND Type_SLAP SHOT \n", "Coef [0.421541250626] [0.195991297068] [0.180731390782] \n", "\n", " 20 14 \\\n", "Variable Type_WRIST SHOT LN_Rebound_Distance_Traveled_byAngle \n", "Coef [0.14615833415] [0.101966515186] \n", "\n", " 2 6 5 \\\n", "Variable is_Rebound LastEV_Def_Faceoff LastEV_Off_Faceoff \n", "Coef [0.0995412193419] [0.0874812352645] [0.0320194794607] \n", "\n", " 11 8 28 \\\n", "Variable LastEV_Off_Give LastEV_Off_Shot Shot_Angle \n", "Coef [0.00778680101523] [0.00497227573604] [0.00173310331803] \n", "\n", " 26 29 30 \\\n", "Variable Shot_Distance^2 Shot_Angle^2 Shot_Angle^3 \n", "Coef [0.000885809270975] [0.000119358110981] [-9.5847370416e-07] \n", "\n", " 27 4 3 \\\n", "Variable Shot_Distance^3 LN_Last_Event_Time is_Rush \n", "Coef [-2.92774036083e-06] [-0.0104735349129] [-0.0149812990545] \n", "\n", " 23 9 16 \\\n", "Variable Handed_Class2_Opposite LastEV_Def_Shot Type_BACKHAND \n", "Coef [-0.0217751440318] [-0.0265930399084] [-0.0665984909775] \n", "\n", " 25 12 15 \\\n", "Variable Shot_Distance LastEV_Def_Give Regressed_Shooting_Indexed \n", "Coef [-0.0780029052968] [-0.0879427972701] [-0.129339881986] \n", "\n", " 0 24 13 \\\n", "Variable xG_raw Player_Position2_F LastEV_Neu_Give \n", "Coef [-0.199616920194] [-0.203785946043] [-0.276553511017] \n", "\n", " 22 7 10 \\\n", "Variable Goalie_State LastEV_Neu_Faceoff LastEV_Neu_Shot \n", "Coef [-0.292844672162] [-0.310902202645] [-0.32141440785] \n", "\n", " 1 17 \n", "Variable EmptyNet_SA Type_DEFLECTED \n", "Coef [-0.395679564539] [-0.623100975278] " ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "All_Model_Scoring(model_data, shot_data_all2, '2013_2014')" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2011_2012\n", "2011_2012 seasons dimensions: (218979, 30)\n", "2011_2012 seasons shooting%: 0.0630745413944\n", "2011_2012Max auc_roc: 0.781397035575\n", "2011_2012 seasons goals: 13812, season xG: 13811.9586764\n", "2011_2012 goals scored: 13812\n", "2011_2012 xG scored: 13811.9586764\n", "2011_2012 seasons dimensions: (218979, 30)\n", "2011_2012 seasons rebound%: 0.0304732417264\n", "2011_2012 Max auc_roc: 0.662957428347\n", "2011_2012 seasons rebounds: 6673.0, season xR: 6674.41284226\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
2118201914232511826293027289124153251660242213101717
VariableShooter_StateType_SLAP SHOTType_WRIST SHOTType_WRAP-AROUNDLN_Rebound_Distance_Traveled_byAngleHanded_Class2_Oppositeis_ReboundLastEV_Off_FaceoffLastEV_Off_GiveLastEV_Off_ShotShot_Distance^2Shot_Angle^2Shot_Angle^3Shot_Distance^3Shot_AngleLastEV_Def_ShotLastEV_Def_GiveLN_Last_Event_TimeRegressed_Shooting_Indexedis_RushShot_DistanceType_BACKHANDLastEV_Def_FaceoffxG_rawPlayer_Position2_FGoalie_StateLastEV_Neu_GiveLastEV_Neu_ShotEmptyNet_SALastEV_Neu_FaceoffType_DEFLECTED
Coef[0.383646161431][0.256764086948][0.142488000303][0.113918135507][0.0975769536292][0.0559406316352][0.0344296756424][0.0182337373267][0.0143922911474][0.00225758886072][0.00085221817346][0.000243772347851][-1.78506003457e-06][-2.72353916895e-06][-0.00345344999923][-0.00861552976001][-0.0195882791768][-0.0211591668439][-0.0297224028101][-0.0498855393577][-0.0789407644771][-0.0912119624353][-0.128691218921][-0.204174082684][-0.271887137976][-0.290430989778][-0.366768312254][-0.389375254075][-0.397079300866][-0.510863084948][-0.526980726973]
\n", "
" ], "text/plain": [ " 21 18 20 \\\n", "Variable Shooter_State Type_SLAP SHOT Type_WRIST SHOT \n", "Coef [0.383646161431] [0.256764086948] [0.142488000303] \n", "\n", " 19 14 \\\n", "Variable Type_WRAP-AROUND LN_Rebound_Distance_Traveled_byAngle \n", "Coef [0.113918135507] [0.0975769536292] \n", "\n", " 23 2 5 \\\n", "Variable Handed_Class2_Opposite is_Rebound LastEV_Off_Faceoff \n", "Coef [0.0559406316352] [0.0344296756424] [0.0182337373267] \n", "\n", " 11 8 26 \\\n", "Variable LastEV_Off_Give LastEV_Off_Shot Shot_Distance^2 \n", "Coef [0.0143922911474] [0.00225758886072] [0.00085221817346] \n", "\n", " 29 30 27 \\\n", "Variable Shot_Angle^2 Shot_Angle^3 Shot_Distance^3 \n", "Coef [0.000243772347851] [-1.78506003457e-06] [-2.72353916895e-06] \n", "\n", " 28 9 12 \\\n", "Variable Shot_Angle LastEV_Def_Shot LastEV_Def_Give \n", "Coef [-0.00345344999923] [-0.00861552976001] [-0.0195882791768] \n", "\n", " 4 15 3 \\\n", "Variable LN_Last_Event_Time Regressed_Shooting_Indexed is_Rush \n", "Coef [-0.0211591668439] [-0.0297224028101] [-0.0498855393577] \n", "\n", " 25 16 6 \\\n", "Variable Shot_Distance Type_BACKHAND LastEV_Def_Faceoff \n", "Coef [-0.0789407644771] [-0.0912119624353] [-0.128691218921] \n", "\n", " 0 24 22 \\\n", "Variable xG_raw Player_Position2_F Goalie_State \n", "Coef [-0.204174082684] [-0.271887137976] [-0.290430989778] \n", "\n", " 13 10 1 \\\n", "Variable LastEV_Neu_Give LastEV_Neu_Shot EmptyNet_SA \n", "Coef [-0.366768312254] [-0.389375254075] [-0.397079300866] \n", "\n", " 7 17 \n", "Variable LastEV_Neu_Faceoff Type_DEFLECTED \n", "Coef [-0.510863084948] [-0.526980726973] " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "All_Model_Scoring(model_data, shot_data_all2, '2011_2012')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Fin " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 2 }