{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python3", "display_name": "Python 3.8.8 64-bit", "metadata": { "interpreter": { "hash": "b3ba2566441a7c06988d0923437866b63cedc61552a5af99d1f4fb67d367b25f" } } } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "import os\n", "\n", "import numpy as np\n", "import pandas as pd\n", "#from pandas.api.types import CategoricalDtype\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import altair as alt\n", "\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from ipywidgets import interact, interactive, fixed, interact_manual\n", "import ipywidgets as widgets" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "years = [2020]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "data = pd.DataFrame()\n", "for i in years: \n", " i_data = pd.read_csv('https://github.com/guga31bb/nflfastR-data/blob/master/data/' \\\n", " 'play_by_play_' + str(i) + '.csv.gz?raw=True',\n", " compression='gzip', low_memory=False)\n", " data = data.append(i_data, sort=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data = data.drop(columns=data.columns[data.columns.str.contains('epa') | data.columns.str.contains('wp')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('home') | data.columns.str.contains('away')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('tackle') | data.columns.str.contains('fumble')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('punt') | data.columns.str.contains('kick')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('xyac') | data.columns.str.contains('prob')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('stadium')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('blocked')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('extra')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('drive')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('fantasy')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('field_goal')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('jersey')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('lateral')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('pass_def')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('qb_hit')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('return')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('run')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('rush')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('two')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('_down_')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('end_')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('_post')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('series_')])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('td_')])\n", "data = data.drop(index=data.index[data.play_type_nfl != 'PASS'])\n", "data = data.drop(columns=data.columns[data.columns.str.contains('penalty')])\n", "data = data.drop(columns=['cp', 'cpoe', 'ep', 'pass_attempt', 'pass_oe', 'sp', 'touchback', 'weather'])\n", "#data = data.drop(columns=data.columns[data.columns.str.contains('end_')])\n", "#data = data.drop(columns=['cp', 'cpoe'])\n", "#data.columns[data.columns.str.contains('end_')]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(18983, 95)" ] }, "metadata": {}, "execution_count": 5 } ], "source": [ "data.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Index(['aborted_play', 'air_yards', 'complete_pass', 'defteam',\n", " 'defteam_score', 'defteam_timeouts_remaining', 'desc', 'div_game',\n", " 'down', 'first_down', 'game_date', 'game_half', 'game_id',\n", " 'game_seconds_remaining', 'goal_to_go', 'half_seconds_remaining', 'id',\n", " 'incomplete_pass', 'interception', 'interception_player_id',\n", " 'interception_player_name', 'location', 'name', 'nfl_api_id',\n", " 'no_huddle', 'old_game_id', 'order_sequence', 'out_of_bounds', 'pass',\n", " 'pass_length', 'pass_location', 'pass_touchdown', 'passer', 'passer_id',\n", " 'passer_player_id', 'passer_player_name', 'passing_yards', 'play',\n", " 'play_clock', 'play_deleted', 'play_id', 'play_type', 'play_type_nfl',\n", " 'posteam', 'posteam_score', 'posteam_timeouts_remaining',\n", " 'posteam_type', 'qb_dropback', 'qb_kneel', 'qb_scramble', 'qb_spike',\n", " 'qtr', 'quarter_end', 'quarter_seconds_remaining', 'receiver',\n", " 'receiver_id', 'receiver_player_id', 'receiver_player_name',\n", " 'receiving_yards', 'replay_or_challenge', 'replay_or_challenge_result',\n", " 'result', 'roof', 'sack', 'safety', 'score_differential', 'season',\n", " 'season_type', 'series', 'shotgun', 'side_of_field', 'special',\n", " 'special_teams_play', 'spread_line', 'st_play_type', 'start_time',\n", " 'success', 'surface', 'temp', 'time', 'time_of_day', 'timeout',\n", " 'timeout_team', 'total', 'total_line', 'touchdown', 'week', 'wind',\n", " 'xpass', 'yardline_100', 'yards_after_catch', 'yards_gained', 'ydsnet',\n", " 'ydstogo', 'yrdln'],\n", " dtype='object')" ] }, "metadata": {}, "execution_count": 6 } ], "source": [ "data.columns" ] }, { "source": [ "# Seaborn Violin Plot" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# https://stackoverflow.com/questions/40372030/pandas-round-to-the-nearest-n\n", "def custom_round(x, base=5):\n", " return (x // base) * base" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def plot_completions_violin(week='ALL', team='ALL'):\n", " if (team == 'ALL') and (week == 'ALL'):\n", " plot_subdata = data\n", " elif (team == 'ALL'):\n", " plot_subdata = data[(data.week == int(week))]\n", " elif (week == 'ALL'):\n", " plot_subdata = data[(data.posteam == team)]\n", " else:\n", " plot_subdata = data[(data.week == int(week)) & (data.posteam == team)]\n", "\n", " completions = int(sum(plot_subdata.complete_pass))\n", " total_passes = plot_subdata.shape[0]\n", " \n", " if plot_subdata.shape[0] == 0:\n", " return None\n", "\n", " plt.figure(figsize=(15, 5))\n", " \n", " ax = sns.violinplot(data=plot_subdata,\n", " x='air_yards', y='pass_location', order=['left', 'middle', 'right'],\n", " hue='complete_pass', split=True, hue_order=[1.0, 0.0],\n", " inner='quartile',\n", " orient='h',\n", " palette={1.0: '#1479FC', 0.0: '0.85'},\n", " linewidth=0.5)\n", " \n", " ax.xaxis.set_major_locator(plt.MultipleLocator(10))\n", " ax.xaxis.set_minor_locator(plt.MultipleLocator(5))\n", " ax.grid(b=True, which='minor', axis='x', linewidth=0.5)\n", "\n", " sns.set_theme(rc={\n", " 'axes.facecolor': '#196F0C',\n", " 'figure.facecolor': '#196F0C',\n", "\n", " 'axes.labelcolor': 'white',\n", " 'axes.titlecolor': 'white',\n", " 'axes.titlesize': 20,\n", "\n", " 'xtick.color': 'white',\n", " 'ytick.color': 'white',\n", " })\n", " plt.setp(ax.collections, edgecolor='black')\n", "\n", " plt.xlabel('Air Yards (from LOS)')\n", " plt.ylabel('Pass Location')\n", " plt.title(f'Passing success ({completions}/{total_passes})')\n", " plt.legend(title = 'Completions')\n", " \n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "weeks = data.week.unique()\n", "teams = data.posteam.unique()\n", "teams.sort()\n", "\n", "weeks = np.append(weeks, 'ALL')\n", "teams = np.append(teams, 'ALL')" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "interactive(children=(Dropdown(description='week', options=('1', '2', '3', '4', '5', '6', '7', '8', '9', '10',…", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "3073dd28a789473383582006c6ac3112" } }, "metadata": {} } ], "source": [ "@interact\n", "def f(week=weeks, team=teams):\n", " return plot_completions_violin(week, team)" ] }, { "source": [ "# Matplotlib Plot" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "#one_game = data[(data.week == 12) & (data.posteam == 'KC')].copy()\n", "def prepare_passing_accuracy_data(one_game):\n", " one_game['air_yards_rounded'] = custom_round(one_game.air_yards)\n", " one_game = one_game.groupby(by=['air_yards_rounded', 'pass_location'], as_index=False).agg(\n", " {\n", " 'complete_pass': ['sum', 'count'] \n", " }\n", " )\n", "\n", " one_game.columns = ['air_yards_rounded', 'location', 'completions', 'passes']\n", " one_game['location_int'] = one_game.location.replace({'left': 2, 'middle': 1, 'right': 0})\n", " one_game['location'] = one_game.location.str.capitalize()\n", " one_game['accuracy'] = one_game.completions / one_game.passes\n", " one_game['text'] = one_game.completions.astype('int64').astype(str) + '/' + one_game.passes.astype(str)\n", " one_game = one_game.sort_values(by=['location_int'])\n", " return one_game" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def normalize_passes_size(passes):\n", " max_passes = passes.max()\n", " if max_passes > 1000:\n", " min_range, max_range = (1000, 7500)\n", " elif max_passes > 50:\n", " min_range, max_range = (1000, 6000)\n", " else:\n", " min_range, max_range = (500, 5000)\n", "\n", " return (max_range - min_range) * (passes - passes.min()) / (passes.max() - passes.min()) + min_range " ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 7500.000000\n", "1 1117.234469\n", "2 1013.026052\n", "3 1000.000000\n", "dtype: float64" ] }, "metadata": {}, "execution_count": 31 } ], "source": [ "normalize_passes_size(pd.Series([10000,200, 40, 20]))" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "def plot_completions_matplotlib(week='ALL', team='ALL'):\n", " if (team == 'ALL') and (week == 'ALL'):\n", " one_game = data\n", " elif (team == 'ALL'):\n", " one_game = data[(data.week == int(week))]\n", " elif (week == 'ALL'):\n", " one_game = data[(data.posteam == team)]\n", " else:\n", " one_game = data[(data.week == int(week)) & (data.posteam == team)]\n", " \n", " one_game = prepare_passing_accuracy_data(one_game)\n", " max_passes = one_game.passes.max()\n", " total_completions = int(sum(one_game.completions))\n", " total_passes = int(sum(one_game.passes))\n", "\n", " fig, ax = plt.subplots(1,1,figsize=(20,10))\n", "\n", " # sns.set_theme(rc={\n", " # 'axes.facecolor': '#196F0C',\n", " # 'figure.facecolor': '#19300C',\n", "\n", " # 'axes.labelcolor': 'white',\n", " # 'axes.titlecolor': 'white',\n", " \n", " # 'xtick.color': 'white',\n", " # 'ytick.color': 'white',\n", " # })\n", "\n", " plt.xlim(-20, 70)\n", " plt.ylim(-1, 3)\n", " plt.xticks(fontsize=20, color='white')\n", " plt.yticks(fontsize=20, color='white')\n", " \n", " ax.xaxis.set_major_locator(plt.MultipleLocator(10))\n", " ax.xaxis.set_minor_locator(plt.MultipleLocator(5))\n", " ax.grid(b=True, which='both', axis='x', linewidth=2.5)\n", " #ax.grid(b=True, which='minor', axis='x', linewidth=1)\n", " ax.grid(b=False, which='both', axis='y')\n", " ax.set_axisbelow(True)\n", "\n", " # if max_passes > 50:\n", " # one_game['passes_size'] = ((6000 - 500) * one_game.passes / max_passes) + 500\n", " # else:\n", " # one_game['passes_size'] = ((5000 - 500) * one_game.passes / max_passes) + 500\n", " one_game['passes_size'] = normalize_passes_size(one_game.passes)\n", "\n", " plt.scatter(x=one_game.air_yards_rounded,\n", " y=one_game.location, \n", " s=one_game.passes_size,\n", " c=one_game.accuracy,\n", " cmap='Oranges',\n", " edgecolors='black')\n", "\n", " for x, y, text in zip(one_game.air_yards_rounded, one_game.location, one_game.text):\n", " plt.text(x=x, y=y, s=text, \n", " ha = 'center', va = 'center',\n", " fontsize=15)\n", "\n", " fig.set_facecolor('#196F0C')\n", " ax.set_facecolor('#196F0C')\n", " #ax.set_labelcolor('white')\n", " #ax.set_titlecolor('white')\n", "\n", " plt.xlabel('Air yards (from LOS)', fontsize=15, color='white')\n", " plt.ylabel('')\n", " plt.title(f'Pass success for Week={week} and Team={team} ({total_completions}/{total_passes})', fontsize=30, color='white')\n", "\n", " plt.show()\n", " plt.rcParams.update(plt.rcParamsDefault)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "interactive(children=(Dropdown(description='week', options=('1', '2', '3', '4', '5', '6', '7', '8', '9', '10',…", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "a92c9d62e5d44f6581d005bfc7f29dfc" } }, "metadata": {} } ], "source": [ "#plot_completions_matplotlib(5, 'KC')\n", "@interact\n", "def f(week=weeks, team=teams):\n", " return plot_completions_matplotlib(week, team)" ] }, { "source": [ "# Altair Plot" ], "cell_type": "markdown", "metadata": {} }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "\n
\n", "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "execution_count": 118 } ], "source": [ "team = 'KC'\n", "week = 'ALL'\n", "if (team == 'ALL') and (week == 'ALL'):\n", " one_game = data\n", "elif (team == 'ALL'):\n", " one_game = data[(data.week == int(week))]\n", "elif (week == 'ALL'):\n", " one_game = data[(data.posteam == team)]\n", "else:\n", " one_game = data[(data.week == int(week)) & (data.posteam == team)]\n", "\n", "one_game = prepare_passing_accuracy_data(one_game)\n", "total_completions = int(sum(one_game.completions))\n", "total_passes = int(sum(one_game.passes))\n", "\n", "points = alt.Chart(one_game).mark_circle().encode(\n", " alt.X('air_yards_rounded', title='Air Yards from Line of Scrimmage', \n", " axis=alt.Axis(values=list(range(-20,70,5))),\n", " scale=alt.Scale(domain=[-20, 70])),\n", " alt.Y('location', title=None),\n", " alt.Size('passes', legend=None),\n", " alt.Color('accuracy', scale=alt.Scale(scheme='lightgreyred'), legend=alt.Legend(title='Accuracy')),\n", ")\n", "\n", "text = alt.Chart(one_game).mark_text(\n", " align='center',\n", " baseline='middle',\n", " fontSize=10,\n", " color='black'\n", ").encode(\n", " x='air_yards_rounded',\n", " y='location',\n", " text='text'\n", ")\n", "\n", "alt.layer(\n", " points, \n", " text\n", ").properties(\n", " title=f'Pass success for Week={week} and Team={team} ({total_completions}/{total_passes})',\n", " #subtitle='here',\n", " width=1000,\n", " height=500\n", ").configure(\n", " background='#196F0C'\n", ").configure_axis(\n", " labelColor='white',\n", " labelFontSize=20,\n", " titleColor='white',\n", " titleFontSize=30,\n", ").configure_scale(\n", " maxSize=6000\n", ").configure_title(\n", " color='white',\n", " fontSize=30\n", ").configure_view(\n", " fill='#19500C'\n", ")#.interactive()" ] }, { "cell_type": "code", "execution_count": 205, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(1093, 9)" ] }, "metadata": {}, "execution_count": 205 } ], "source": [ "season = data.copy()\n", "season['air_yards_rounded'] = custom_round(season.air_yards)\n", "season = season.groupby(by=['posteam', 'air_yards_rounded', 'pass_location'], as_index=False).agg(\n", " {\n", " 'complete_pass': ['sum', 'count'] \n", " }\n", ")\n", "\n", "season.columns = ['team', 'air_yards_rounded', 'location', 'completions', 'passes']\n", "season['location_int'] = season.location.replace({'left': 2, 'middle': 1, 'right': 0})\n", "season['location'] = season.location.str.capitalize()\n", "season['accuracy'] = season.completions / season.passes\n", "season['text'] = season.completions.astype('int64').astype(str) + '/' + season.passes.astype(str)\n", "season = season.sort_values(by=['location_int'])\n", "season = season.reset_index()\n", "\n", "season_2020_url = 'season_2020.json'\n", "season.to_json(season_2020_url, orient='records')\n", "season.shape" ] }, { "cell_type": "code", "execution_count": 208, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/html": "\n\n", "text/plain": [ "alt.LayerChart(...)" ] }, "metadata": {}, "execution_count": 208 } ], "source": [ "input_dropdown = alt.binding_select(options=np.sort(season.team.unique()), name='Team: ')\n", "selection = alt.selection_single(fields=['team'], bind=input_dropdown, init={'team': 'NE'})\n", "github_season_2020_url = 'https://raw.githubusercontent.com/javendano585/NFL_Data/main/season_2020.json'\n", "\n", "points = alt.Chart(github_season_2020_url).mark_circle().encode(\n", " alt.X('air_yards_rounded:Q', title='Air Yards from Line of Scrimmage', \n", " axis=alt.Axis(values=list(range(-20,70,5))),\n", " scale=alt.Scale(domain=[-20, 70])),\n", " alt.Y('location:N', title=None),\n", " alt.Size('passes:Q', legend=None),\n", " alt.Color('accuracy:Q', scale=alt.Scale(scheme='lightgreyred'), legend=alt.Legend(title='Accuracy')),\n", " tooltip='text:N'\n", ")\n", "\n", "text = alt.Chart(github_season_2020_url).mark_text(\n", " align='center',\n", " baseline='middle',\n", " fontSize=10,\n", " color='black'\n", ").encode(\n", " x='air_yards_rounded:Q',\n", " y='location:N',\n", " text='text:N'\n", ")\n", "\n", "passing_chart = alt.layer(\n", " points, \n", " text\n", ").properties(\n", " title={\n", " \"text\": 'Pass success for 2020 Season', \n", " #\"subtitle\": f'Accuracy = {(total_completions / total_passes):.3f}% ({total_completions}/{total_passes})'\n", " },\n", " width=1000,\n", " height=500\n", ").configure(\n", " background='#196F0C'\n", ").configure_axis(\n", " labelColor='white',\n", " labelFontSize=20,\n", " titleColor='white',\n", " titleFontSize=30,\n", ").configure_scale(\n", " maxSize=6000\n", ").configure_title(\n", " anchor='start',\n", " color='white',\n", " fontSize=30,\n", " subtitleFontSize=15,\n", " subtitleColor='white'\n", ").configure_view(\n", " fill='#19500C'\n", ").add_selection(\n", " selection\n", ").transform_filter(\n", " selection\n", ")#.interactive()\n", "\n", "passing_chart.save('Passing_2020.html')\n", "passing_chart" ] } ] }