{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# World Cup Analysis\n", "\n", "In this notebook I present the code required to replicate my World Cup analysis post using event data.\n", "\n", "## Required Libraries" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "import matplotlib.patheffects as path_effects\n", "import matplotlib.font_manager as fm\n", "from matplotlib.colors import LinearSegmentedColormap, Normalize\n", "import matplotlib.patches as mpatches\n", "from matplotlib import cm\n", "import numpy as np\n", "from matplotlib import cm\n", "from highlight_text import fig_text, ax_text\n", "from ast import literal_eval\n", "\n", "from mplsoccer import Pitch, VerticalPitch\n", "\n", "from PIL import Image\n", "import urllib\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "font_path = \"../assets/fonts\"\n", "for x in os.listdir(font_path):\n", " for y in os.listdir(f\"{font_path}/{x}\"):\n", " if (y.split(\".\")[-1] == \"ttf\") or (y.split(\".\")[-1] == \"otf\"):\n", " fm.fontManager.addfont(f\"{font_path}/{x}/{y}\")\n", " try:\n", " fm.FontProperties(weight=y.split(\"-\")[-1].split(\".\")[0].lower(), fname=y.split(\"-\")[0])\n", " except Exception:\n", " continue\n", "\n", "plt.style.use(\"../assets/stylesheets/soc_base.mplstyle\")\n", "plt.rcParams['font.family'] = 'DM Sans'" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Define SOC colormap" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "colors = [\n", " '#efe9e6',\n", " '#e9e1dd',\n", " '#e3dad3',\n", " '#dcd3c9',\n", " '#d4ccc0',\n", " '#ccc6b7',\n", " '#c3bfae',\n", " '#bab9a6',\n", " '#b0b39e',\n", " '#a6ad96',\n", " '#9ba790',\n", " '#90a18a',\n", " '#849b84',\n", " '#789680',\n", " '#6c907c',\n", " '#608a79',\n", " '#538476',\n", " '#467e74',\n", " '#387872',\n", " '#287271',\n", "]\n", "soc_cm = LinearSegmentedColormap.from_list('SOC', colors, N=50)\n", "cm.register_cmap(name='SOC', cmap=soc_cm)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Load the dataset\n", "\n", "The following CSV file contains event level data for every match played during the 2022 FIFA World Cup." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('data/world_cup_data.csv', index_col=0, low_memory=False)\n", "data['qualifiers'] = [literal_eval(x) for x in data['qualifiers']]\n", "data['satisfied_events_types'] = [literal_eval(x) for x in data['satisfied_events_types']]" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "## Viz 1. The player with most passes made during a single match\n", "\n", "For our first chart we will look at the player that made the most passes during a single match in the World Cup (excluding extra-time).\n", "\n", "Let's begin by performing some computations on our dataset to find which player fits the criteria and then proceed with our visual." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# Filter out corners and throw-ins.\n", "data_passes = data.copy()\n", "data_passes['is_throw_or_corner'] = False\n", "for index, row in enumerate(data_passes['qualifiers']):\n", " for element in row:\n", " if 'ThrowIn' in element.keys() or 'CornerTaken' in element.keys():\n", " data_passes.at[index, 'is_throw_or_corner'] = True" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The player with the most passes in a single match was Rodri in Japan - Spain, with 215 passes made.\n" ] } ], "source": [ "data_most_passes = (\n", " data_passes[(data_passes['period'].isin(['FirstHalf', 'SecondHalf'])) & (data_passes['event_type'] == 'Pass') & (~data_passes['is_throw_or_corner'])]\n", " .groupby(['match_id', 'player_name', 'player_id', 'match_string']).size()\n", " .reset_index(name='total_passes')\n", " .sort_values(by='total_passes', ascending=False)\n", " .reset_index(drop=True)\n", ")\n", "player_most_passes = data_most_passes.loc[0]\n", "print(f'The player with the most passes in a single match was {player_most_passes[\"player_name\"]} in {player_most_passes[\"match_string\"]}, with {player_most_passes[\"total_passes\"]:.0f} passes made.')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | event_id | \n", "minute | \n", "second | \n", "team_id | \n", "x | \n", "y | \n", "expanded_minute | \n", "period | \n", "outcome_type | \n", "qualifiers | \n", "... | \n", "is_goal | \n", "player_name | \n", "event_type | \n", "pass_recipient | \n", "team_name | \n", "match_date | \n", "match_id | \n", "match_string | \n", "is_own_goal | \n", "is_throw_or_corner | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "2.488483e+09 | \n", "0 | \n", "15.0 | \n", "338 | \n", "14.7 | \n", "17.3 | \n", "0 | \n", "FirstHalf | \n", "Successful | \n", "[{'Angle': '1.3'}, {'Length': '30.6'}, {'Zone'... | \n", "... | \n", "NaN | \n", "Rodri | \n", "Pass | \n", "Pau Torres | \n", "Spain | \n", "2022-12-01T19:00:00 | \n", "1632107 | \n", "Japan - Spain | \n", "NaN | \n", "False | \n", "
1 | \n", "2.488483e+09 | \n", "0 | \n", "30.0 | \n", "338 | \n", "43.7 | \n", "38.8 | \n", "0 | \n", "FirstHalf | \n", "Successful | \n", "[{'StandingSave': True}, {'Length': '18.5'}, {... | \n", "... | \n", "NaN | \n", "Rodri | \n", "Pass | \n", "Pau Torres | \n", "Spain | \n", "2022-12-01T19:00:00 | \n", "1632107 | \n", "Japan - Spain | \n", "NaN | \n", "False | \n", "
2 | \n", "2.488483e+09 | \n", "0 | \n", "38.0 | \n", "338 | \n", "44.0 | \n", "42.7 | \n", "0 | \n", "FirstHalf | \n", "Successful | \n", "[{'Length': '19.1'}, {'PassEndY': '14.8'}, {'P... | \n", "... | \n", "NaN | \n", "Rodri | \n", "Pass | \n", "César Azpilicueta | \n", "Spain | \n", "2022-12-01T19:00:00 | \n", "1632107 | \n", "Japan - Spain | \n", "NaN | \n", "False | \n", "
3 | \n", "2.488483e+09 | \n", "0 | \n", "47.0 | \n", "338 | \n", "34.9 | \n", "44.2 | \n", "0 | \n", "FirstHalf | \n", "Successful | \n", "[{'PassEndX': '26.5'}, {'Zone': 'Back'}, {'Sta... | \n", "... | \n", "NaN | \n", "Rodri | \n", "Pass | \n", "Pau Torres | \n", "Spain | \n", "2022-12-01T19:00:00 | \n", "1632107 | \n", "Japan - Spain | \n", "NaN | \n", "False | \n", "
4 | \n", "2.488483e+09 | \n", "0 | \n", "57.0 | \n", "338 | \n", "38.7 | \n", "28.3 | \n", "0 | \n", "FirstHalf | \n", "Successful | \n", "[{'Zone': 'Back'}, {'StandingSave': True}, {'A... | \n", "... | \n", "NaN | \n", "Rodri | \n", "Pass | \n", "Pau Torres | \n", "Spain | \n", "2022-12-01T19:00:00 | \n", "1632107 | \n", "Japan - Spain | \n", "NaN | \n", "False | \n", "
5 rows × 33 columns
\n", "\n", " | match_id | \n", "team_name | \n", "team_id | \n", "match_string | \n", "total_recoveries | \n", "
---|---|---|---|---|---|
127 | \n", "1632119 | \n", "Switzerland | \n", "423 | \n", "Serbia - Switzerland | \n", "2 | \n", "
123 | \n", "1638014 | \n", "Wales | \n", "421 | \n", "Wales - England | \n", "3 | \n", "
126 | \n", "1697162 | \n", "Netherlands | \n", "335 | \n", "Netherlands - USA | \n", "3 | \n", "
125 | \n", "1632105 | \n", "Japan | \n", "986 | \n", "Germany - Japan | \n", "3 | \n", "
124 | \n", "1697399 | \n", "South Korea | \n", "1159 | \n", "Brazil - South Korea | \n", "3 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
4 | \n", "1632102 | \n", "Denmark | \n", "425 | \n", "Denmark - Tunisia | \n", "21 | \n", "
3 | \n", "1632101 | \n", "Mexico | \n", "972 | \n", "Saudi Arabia - Mexico | \n", "23 | \n", "
2 | \n", "1638013 | \n", "Wales | \n", "421 | \n", "Wales - Iran | \n", "23 | \n", "
1 | \n", "1632105 | \n", "Germany | \n", "336 | \n", "Germany - Japan | \n", "23 | \n", "
0 | \n", "1697726 | \n", "England | \n", "345 | \n", "England - France | \n", "24 | \n", "
128 rows × 5 columns
\n", "\n", " | event_id | \n", "minute | \n", "second | \n", "team_id | \n", "x | \n", "y | \n", "expanded_minute | \n", "period | \n", "outcome_type | \n", "qualifiers | \n", "... | \n", "is_goal | \n", "player_name | \n", "event_type | \n", "pass_recipient | \n", "team_name | \n", "match_date | \n", "match_id | \n", "match_string | \n", "is_own_goal | \n", "won_possession | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1338 | \n", "2.490214e+09 | \n", "2 | \n", "45.0 | \n", "345 | \n", "68.4 | \n", "39.8 | \n", "2 | \n", "FirstHalf | \n", "Successful | \n", "[] | \n", "... | \n", "NaN | \n", "Bukayo Saka | \n", "BallRecovery | \n", "NaN | \n", "England | \n", "2022-12-10T19:00:00 | \n", "1697726 | \n", "England - France | \n", "NaN | \n", "True | \n", "
1339 | \n", "2.490214e+09 | \n", "3 | \n", "12.0 | \n", "345 | \n", "76.2 | \n", "14.7 | \n", "3 | \n", "FirstHalf | \n", "Successful | \n", "[] | \n", "... | \n", "NaN | \n", "Jordan Henderson | \n", "BallRecovery | \n", "NaN | \n", "England | \n", "2022-12-10T19:00:00 | \n", "1697726 | \n", "England - France | \n", "NaN | \n", "True | \n", "
1340 | \n", "2.490215e+09 | \n", "4 | \n", "23.0 | \n", "345 | \n", "71.1 | \n", "94.7 | \n", "4 | \n", "FirstHalf | \n", "Successful | \n", "[{'Foul': True}, {'Offensive': True}, {'Opposi... | \n", "... | \n", "NaN | \n", "Phil Foden | \n", "Foul | \n", "NaN | \n", "England | \n", "2022-12-10T19:00:00 | \n", "1697726 | \n", "England - France | \n", "NaN | \n", "True | \n", "
1342 | \n", "2.490218e+09 | \n", "18 | \n", "42.0 | \n", "345 | \n", "76.9 | \n", "33.0 | \n", "18 | \n", "FirstHalf | \n", "Successful | \n", "[{'Zone': 'Center'}, {'OppositeRelatedEvent': ... | \n", "... | \n", "NaN | \n", "Bukayo Saka | \n", "Foul | \n", "NaN | \n", "England | \n", "2022-12-10T19:00:00 | \n", "1697726 | \n", "England - France | \n", "NaN | \n", "True | \n", "
1343 | \n", "2.490218e+09 | \n", "21 | \n", "33.0 | \n", "345 | \n", "96.7 | \n", "56.8 | \n", "21 | \n", "FirstHalf | \n", "Successful | \n", "[{'Zone': 'Center'}, {'OppositeRelatedEvent': ... | \n", "... | \n", "NaN | \n", "Jude Bellingham | \n", "Aerial | \n", "NaN | \n", "England | \n", "2022-12-10T19:00:00 | \n", "1697726 | \n", "England - France | \n", "NaN | \n", "True | \n", "
5 rows × 33 columns
\n", "\n", " | match_id | \n", "match_string | \n", "
---|---|---|
20586 | \n", "1632113 | \n", "Canada - Morocco | \n", "
47738 | \n", "1632108 | \n", "Belgium - Canada | \n", "
71171 | \n", "1632111 | \n", "Croatia - Canada | \n", "