{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting Play by Play data from nba.com with players on the floor" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import json\n", "import urllib2\n", "import pandas as pd\n", "\n", "# base url for play by play for game id\n", "GAME_BASE_URL = \"http://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=&RangeType=2&Season=2014-15&SeasonType=Regular+Season&StartPeriod=1&StartRange=0\"\n", "# base url for moment data for event id\n", "MOMENT_BASE_URL = \"http://stats.nba.com/stats/locations_getmoments/?eventid=&gameid=\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "First let's create a function to get the play by play for a single game.\n", "\n", "To see what it looks like as JSON check out this link - http://stats.nba.com/stats/playbyplayv2?EndPeriod=10&EndRange=55800&GameID=0021400001&RangeType=2&Season=2014-15&SeasonType=Regular+Season&StartPeriod=1&StartRange=0" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def getRawPbpForGame(game_id):\n", " # for a given game_id, return a pandas data frame with the raw play by play\n", " url = GAME_BASE_URL.replace(\"\", game_id)\n", " \n", " response = urllib2.urlopen(url)\n", " data = json.loads(response.read())\n", "\n", " game_info = []\n", " plays = []\n", " for line in data['resultSets']:\n", " if 'name' in line.keys() and line['name'] == 'PlayByPlay':\n", " for event in line['rowSet']:\n", " row = dict(zip([header for header in line['headers']],event))\n", " plays.append(row)\n", "\n", " return pd.DataFrame(plays)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EVENTMSGACTIONTYPEEVENTMSGTYPEEVENTNUMGAME_IDHOMEDESCRIPTIONNEUTRALDESCRIPTIONPCTIMESTRINGPERIODPERSON1TYPEPERSON2TYPEPERSON3TYPEPLAYER1_IDPLAYER1_NAMEPLAYER1_TEAM_ABBREVIATIONPLAYER1_TEAM_CITYPLAYER1_TEAM_IDPLAYER1_TEAM_NICKNAMEPLAYER2_IDPLAYER2_NAMEPLAYER2_TEAM_ABBREVIATION
0 0 12 0 0021400001 None None 12:00 1 0 0 0 0 None None None NaN None 0 None None...
1 0 10 1 0021400001 Jump Ball Davis vs. Vucevic: Tip to Holiday None 12:00 1 4 5 4 203076 Anthony Davis NOP New Orleans 1610612740 Pelicans 202696 Nikola Vucevic ORL...
2 1 2 2 0021400001 MISS Davis 20' Jump Shot None 11:43 1 4 0 0 203076 Anthony Davis NOP New Orleans 1610612740 Pelicans 0 None None...
3 0 4 3 0021400001 None None 11:42 1 5 0 0 203095 Evan Fournier ORL Orlando 1610612753 Magic 0 None None...
4 66 1 4 0021400001 None None 11:31 1 5 5 0 202696 Nikola Vucevic ORL Orlando 1610612753 Magic 203901 Elfrid Payton ORL...
\n", "

5 rows × 33 columns

\n", "
" ], "text/plain": [ " EVENTMSGACTIONTYPE EVENTMSGTYPE EVENTNUM GAME_ID \\\n", "0 0 12 0 0021400001 \n", "1 0 10 1 0021400001 \n", "2 1 2 2 0021400001 \n", "3 0 4 3 0021400001 \n", "4 66 1 4 0021400001 \n", "\n", " HOMEDESCRIPTION NEUTRALDESCRIPTION \\\n", "0 None None \n", "1 Jump Ball Davis vs. Vucevic: Tip to Holiday None \n", "2 MISS Davis 20' Jump Shot None \n", "3 None None \n", "4 None None \n", "\n", " PCTIMESTRING PERIOD PERSON1TYPE PERSON2TYPE PERSON3TYPE PLAYER1_ID \\\n", "0 12:00 1 0 0 0 0 \n", "1 12:00 1 4 5 4 203076 \n", "2 11:43 1 4 0 0 203076 \n", "3 11:42 1 5 0 0 203095 \n", "4 11:31 1 5 5 0 202696 \n", "\n", " PLAYER1_NAME PLAYER1_TEAM_ABBREVIATION PLAYER1_TEAM_CITY \\\n", "0 None None None \n", "1 Anthony Davis NOP New Orleans \n", "2 Anthony Davis NOP New Orleans \n", "3 Evan Fournier ORL Orlando \n", "4 Nikola Vucevic ORL Orlando \n", "\n", " PLAYER1_TEAM_ID PLAYER1_TEAM_NICKNAME PLAYER2_ID PLAYER2_NAME \\\n", "0 NaN None 0 None \n", "1 1610612740 Pelicans 202696 Nikola Vucevic \n", "2 1610612740 Pelicans 0 None \n", "3 1610612753 Magic 0 None \n", "4 1610612753 Magic 203901 Elfrid Payton \n", "\n", " PLAYER2_TEAM_ABBREVIATION \n", "0 None ... \n", "1 ORL ... \n", "2 None ... \n", "3 None ... \n", "4 ORL ... \n", "\n", "[5 rows x 33 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "getRawPbpForGame(\"0021400001\").head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's make use of the player tracking moment data on nba.com to find the players on the floor for a given event.\n", "\n", "Here is what the data looks like for one event - http://stats.nba.com/stats/locations_getmoments/?eventid=1&gameid=0021400001\n", "\n", "For a single moment it looks like this:\n", "\n", "[1, 1414541586032, 720.0, 24.0, None, [[-1, -1, 47.4393, 25.94672, 10.65305], [1610612740, 201569, 48.29735, 18.68403, 0.0], [1610612740, 201600, 58.46317, 20.60878, 0.0], [1610612740, 201950, 64.77708, 25.10907, 0.0], [1610612740, 201936, 47.45251, 33.84332, 0.0], [1610612740, 203076, 48.0291, 24.93867, 0.0], [1610612753, 202696, 46.04217, 26.94592, 0.0], [1610612753, 203124, 28.82642, 25.30571, 0.0], [1610612753, 202699, 45.89401, 34.01147, 0.0], [1610612753, 203901, 19.42869, 25.41476, 0.0], [1610612753, 203095, 45.82049, 18.77761, 0.0]]]\n", "\n", "\n", "Here is what all this list represents:\n", "\n", "[period, unix timestamp in ms, seconds remaining in period, seconds remaining on shot clock, ??, [list of player/ball data]]\n", "\n", "And then the list of player/ball data represents:\n", "\n", "[team id, player id, x, y, z]\n", "\n", "\n", "For the ball the team id and player id are -1. Only the ball has z coordinates\n", "\n", "Using this, we can find out who was on the floor for a given event id\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def getPlayersOnFloorForMoment(game_id, event_id):\n", " # for a given game_id and event_id, return a dict with a list players on the floor for each team and team ids\n", " url = MOMENT_BASE_URL.replace(\"\", str(event_id))\n", " url = url + game_id\n", " response = urllib2.urlopen(url)\n", " data = json.loads(response.read())\n", " players = {}\n", " players['home_team_id'] = data[\"moments\"][0][5][1][0]\n", " players['away_team_id'] = data[\"moments\"][0][5][6][0]\n", " players['home_player_ids'] =[]\n", " players['away_player_ids'] =[]\n", " for i in range(1,6):\n", " players['home_player_ids'].append(data[\"moments\"][0][5][i][1])\n", " for i in range(6,11):\n", " players['away_player_ids'].append(data[\"moments\"][0][5][i][1])\n", " return players" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'away_player_ids': [202696, 203124, 202699, 203901, 203095],\n", " 'away_team_id': 1610612753,\n", " 'home_player_ids': [201569, 201600, 201950, 201936, 203076],\n", " 'home_team_id': 1610612740}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "getPlayersOnFloorForMoment(\"0021400001\", 1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Rather than using the above function to get the players on the floor for every event id we can simplify things and use it to get the players that start each period and use the play by play data to substitute players in and out. In the play by play this is when EVENTMSGTYPE = 8. PLAYER1_ID is the player getting subbed out and PLAYER2_ID is the player getting subbed in." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def getPlayersOnFloorForPeriod(period):\n", " # for a given period data frame, return a data frame with new columns for the players on the floor\n", " period = period.reset_index(drop=True)\n", " start_event_num = period['EVENTNUM'].min()\n", " period_number = period['PERIOD'].mean()\n", " if period_number == 1 or period_number == 3 or period_number > 4:\n", " start_event_num += 1\n", " period_starters = getPlayersOnFloorForMoment(game_id, start_event_num)\n", " period['HOME_TEAM_ID'] = period_starters[\"home_team_id\"]\n", " period['AWAY_TEAM_ID'] = period_starters[\"away_team_id\"]\n", " period['HOME_PLAYER1_ID'] = period_starters['home_player_ids'][0]\n", " period['HOME_PLAYER2_ID'] = period_starters['home_player_ids'][1]\n", " period['HOME_PLAYER3_ID'] = period_starters['home_player_ids'][2]\n", " period['HOME_PLAYER4_ID'] = period_starters['home_player_ids'][3]\n", " period['HOME_PLAYER5_ID'] = period_starters['home_player_ids'][4]\n", " period['AWAY_PLAYER1_ID'] = period_starters['away_player_ids'][0]\n", " period['AWAY_PLAYER2_ID'] = period_starters['away_player_ids'][1]\n", " period['AWAY_PLAYER3_ID'] = period_starters['away_player_ids'][2]\n", " period['AWAY_PLAYER4_ID'] = period_starters['away_player_ids'][3]\n", " period['AWAY_PLAYER5_ID'] = period_starters['away_player_ids'][4]\n", " \n", " # get index for all substitutions and for each one sub in and out appropriate players\n", " subs = period[period['EVENTMSGTYPE'] == 8].index.tolist()\n", " end = len(period.index)\n", " for i in range(len(subs)):\n", " if str(period['HOME_PLAYER1_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'HOME_PLAYER1_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['HOME_PLAYER2_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'HOME_PLAYER2_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['HOME_PLAYER3_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'HOME_PLAYER3_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['HOME_PLAYER4_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'HOME_PLAYER4_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['HOME_PLAYER5_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'HOME_PLAYER5_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['AWAY_PLAYER1_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'AWAY_PLAYER1_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['AWAY_PLAYER2_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'AWAY_PLAYER2_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['AWAY_PLAYER3_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'AWAY_PLAYER3_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['AWAY_PLAYER4_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'AWAY_PLAYER4_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " elif str(period['AWAY_PLAYER5_ID'].iloc[subs[i]]) == str(period['PLAYER1_ID'][subs[i]]):\n", " period.ix[subs[i]:end, 'AWAY_PLAYER5_ID'] = str(period['PLAYER2_ID'][subs[i]])\n", " return period" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Putting it all together we can get the play by play data with players on the floor for a full game" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "game_id = \"0021400001\"\n", "pbp = getRawPbpForGame(game_id)\n", "pbp_with_lineups = pbp.groupby(\"PERIOD\").apply(getPlayersOnFloorForPeriod)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HOME_PLAYER1_IDHOME_PLAYER2_IDHOME_PLAYER3_IDHOME_PLAYER4_IDHOME_PLAYER5_IDAWAY_PLAYER1_IDAWAY_PLAYER2_IDAWAY_PLAYER3_IDAWAY_PLAYER4_IDAWAY_PLAYER5_ID
PERIOD
10 201569 201600 201950 201936 203076 202696 203124 202699 203901 203095
1 201569 201600 201950 201936 203076 202696 203124 202699 203901 203095
2 201569 201600 201950 201936 203076 202696 203124 202699 203901 203095
3 201569 201600 201950 201936 203076 202696 203124 202699 203901 203095
4 201569 201600 201950 201936 203076 202696 203124 202699 203901 203095
\n", "

5 rows × 10 columns

\n", "
" ], "text/plain": [ " HOME_PLAYER1_ID HOME_PLAYER2_ID HOME_PLAYER3_ID HOME_PLAYER4_ID \\\n", "PERIOD \n", "1 0 201569 201600 201950 201936 \n", " 1 201569 201600 201950 201936 \n", " 2 201569 201600 201950 201936 \n", " 3 201569 201600 201950 201936 \n", " 4 201569 201600 201950 201936 \n", "\n", " HOME_PLAYER5_ID AWAY_PLAYER1_ID AWAY_PLAYER2_ID AWAY_PLAYER3_ID \\\n", "PERIOD \n", "1 0 203076 202696 203124 202699 \n", " 1 203076 202696 203124 202699 \n", " 2 203076 202696 203124 202699 \n", " 3 203076 202696 203124 202699 \n", " 4 203076 202696 203124 202699 \n", "\n", " AWAY_PLAYER4_ID AWAY_PLAYER5_ID \n", "PERIOD \n", "1 0 203901 203095 \n", " 1 203901 203095 \n", " 2 203901 203095 \n", " 3 203901 203095 \n", " 4 203901 203095 \n", "\n", "[5 rows x 10 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pbp_with_lineups[['HOME_PLAYER1_ID', 'HOME_PLAYER2_ID', 'HOME_PLAYER3_ID', 'HOME_PLAYER4_ID', 'HOME_PLAYER5_ID', 'AWAY_PLAYER1_ID', 'AWAY_PLAYER2_ID', 'AWAY_PLAYER3_ID', 'AWAY_PLAYER4_ID', 'AWAY_PLAYER5_ID']].head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# write to a csv file\n", "pbp_with_lineups.to_csv('pbp_with_lineups.csv',index=False,header=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }