{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The autoreload extension is already loaded. To reload it, use:\n", " %reload_ext autoreload\n" ] } ], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "import os; import sys; sys.path.append('../')\n", "import pandas as pd\n", "import tqdm\n", "import warnings\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)\n", "\n", "import socceraction.vaep as vaep" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "## Configure file and folder names\n", "datafolder = \"../data\"\n", "spadl_h5 = os.path.join(datafolder,\"spadl-statsbomb.h5\")\n", "predictions_h5 = os.path.join(datafolder,\"predictions.h5\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nb of games: 64\n" ] } ], "source": [ "games = pd.read_hdf(spadl_h5,\"games\")\n", "games = games[games.competition_name == \"FIFA World Cup\"]\n", "print(\"nb of games:\", len(games))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 64/64 [00:04<00:00, 15.32it/s]\n" ] }, { "data": { "text/plain": [ "Index(['game_id', 'period_id', 'time_seconds', 'timestamp', 'team_id',\n", " 'player_id', 'start_x', 'start_y', 'end_x', 'end_y', 'type_id',\n", " 'result_id', 'bodypart_id', 'type_name', 'result_name', 'bodypart_name',\n", " 'player_name', 'player_nickname', 'jersey_number', 'country_id',\n", " 'country_name', 'extra', 'team_name', 'scores', 'concedes',\n", " 'offensive_value', 'defensive_value', 'vaep_value'],\n", " dtype='object')" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "players = pd.read_hdf(spadl_h5,\"players\")\n", "teams = pd.read_hdf(spadl_h5,\"teams\")\n", "actiontypes = pd.read_hdf(spadl_h5, \"actiontypes\")\n", "bodyparts = pd.read_hdf(spadl_h5, \"bodyparts\")\n", "results = pd.read_hdf(spadl_h5, \"results\")\n", "\n", "A = []\n", "for game in tqdm.tqdm(list(games.itertuples())):\n", " actions = pd.read_hdf(spadl_h5,f\"actions/game_{game.game_id}\")\n", " actions = (\n", " actions.merge(actiontypes)\n", " .merge(results)\n", " .merge(bodyparts)\n", " .merge(players,\"left\",on=\"player_id\")\n", " .merge(teams,\"left\",on=\"team_id\")\n", " .sort_values([\"period_id\", \"time_seconds\", \"timestamp\"])\n", " .reset_index(drop=True)\n", " )\n", " preds = pd.read_hdf(predictions_h5,f\"game_{game.game_id}\")\n", " values = vaep.value(actions,preds.scores,preds.concedes)\n", " A.append(pd.concat([actions,preds,values],axis=1))\n", "A = pd.concat(A).sort_values([\"game_id\",\"period_id\", \"time_seconds\", \"timestamp\"]).reset_index(drop=True)\n", "A.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### (optional) inspect Belgium's top 10 most valuable non-shot actions" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2018-06-28 England 0-1 Belgium 9' dribble Michy Batshuayi Tunga\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-06-28 England 0-1 Belgium 8' pass Marouane Fellaini-Bakkioui\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-07-02 Belgium 3-2 Japan 77' cross Nacer Chadli\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-07-02 Belgium 3-2 Japan 24' cross Dries Mertens\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-07-02 Belgium 3-2 Japan 73' cross Eden Hazard\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-06-23 Belgium 5-2 Tunisia 80' cross Kevin De Bruyne\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-07-10 France 1-0 Belgium 21' corner_crossed Nacer Chadli\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-06-23 Belgium 5-2 Tunisia 89' cross Youri Tielemans\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-06-23 Belgium 5-2 Tunisia 47' pass Thomas Meunier\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "2018-06-23 Belgium 5-2 Tunisia 91' dribble Michy Batshuayi Tunga\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotsoccer\n", "\n", "sorted_A = A.sort_values(\"vaep_value\", ascending=False)\n", "sorted_A = sorted_A[sorted_A.team_name == \"Belgium\"] # view only actions from Belgium\n", "sorted_A = sorted_A[~sorted_A.type_name.str.contains(\"shot\")] #eliminate shots\n", "\n", "def get_time(period_id,time_seconds):\n", " m = int((period_id-1)*45 + time_seconds // 60)\n", " s = time_seconds % 60\n", " if s == int(s):\n", " s = int(s)\n", " return f\"{m}m{s}s\"\n", "\n", "for j in range(0,10):\n", " row = list(sorted_A[j:j+1].itertuples())[0]\n", " i = row.Index\n", " a = A[i - 3 : i+2].copy()\n", " \n", " a[\"player\"] = a[[\"player_nickname\",\"player_name\"]].apply(lambda x: x[0] if x[0] else x[1],axis=1)\n", " \n", " g = list(games[games.game_id == a.game_id.values[0]].itertuples())[0]\n", " game_info = f\"{g.match_date} {g.home_team_name} {g.home_score}-{g.away_score} {g.away_team_name}\"\n", " minute = int((row.period_id-1)*45 + row.time_seconds // 60)\n", " print(f\"{game_info} {minute}' {row.type_name} {row.player_name}\")\n", "\n", " a[\"scores\"] = a.scores.apply(lambda x : \"%.3f\" % x )\n", " a[\"vaep_value\"] = a.vaep_value.apply(lambda x : \"%.3f\" % x )\n", " a[\"time\"] = a[[\"period_id\",\"time_seconds\"]].apply(lambda x: get_time(*x),axis=1)\n", " cols = [\"time\",\"type_name\",\"player\",\"team_name\",\"scores\",\"vaep_value\"]\n", " matplotsoccer.actions(a[[\"start_x\",\"start_y\",\"end_x\",\"end_y\"]],\n", " a.type_name,\n", " team=a.team_name,\n", " result = a.result_name == \"success\",\n", " label=a[cols],\n", " labeltitle = cols,\n", " zoom=False)" ] } ], "metadata": { "kernelspec": { "display_name": "soccer_rating_pressing-3.6.2", "language": "python", "name": "soccer_rating_pressing-3.6.2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true } }, "nbformat": 4, "nbformat_minor": 2 }