{ "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3-final" }, "orig_nbformat": 2, "kernelspec": { "name": "python38364bit88793c7347b34f2aaf15ee9ecb7c5f7a", "display_name": "Python 3.8.3 64-bit" } }, "nbformat": 4, "nbformat_minor": 2, "cells": [ { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from tqdm.notebook import tqdm\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from xgboost import XGBClassifier\n", "from sklearn.metrics import brier_score_loss, roc_auc_score\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.calibration import CalibratedClassifierCV\n", "from scikitplot.metrics import plot_calibration_curve" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings('ignore', category=pd.io.pytables.PerformanceWarning)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "import os\n", "data_dir = os.getcwd() + '/data/wy_scout/'" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": " game_id competition_id season_id game_date home_team_id \\\n54 2057984 28 10078 2018-06-17 15:00:00 3148 \n55 2057979 28 10078 2018-06-17 12:00:00 16871 \n56 2057973 28 10078 2018-06-16 19:00:00 9598 \n57 2057967 28 10078 2018-06-16 16:00:00 15594 \n58 2057972 28 10078 2018-06-16 13:00:00 12274 \n59 2057966 28 10078 2018-06-16 10:00:00 4418 \n60 2057960 28 10078 2018-06-15 18:00:00 9905 \n61 2057961 28 10078 2018-06-15 15:00:00 16216 \n62 2057955 28 10078 2018-06-15 12:00:00 16129 \n63 2057954 28 10078 2018-06-14 15:00:00 14358 \n\n away_team_id \n54 15473 \n55 17322 \n56 16823 \n57 7712 \n58 7839 \n59 8493 \n60 1598 \n61 10840 \n62 15670 \n63 16521 ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
game_idcompetition_idseason_idgame_datehome_team_idaway_team_id
54205798428100782018-06-17 15:00:00314815473
55205797928100782018-06-17 12:00:001687117322
56205797328100782018-06-16 19:00:00959816823
57205796728100782018-06-16 16:00:00155947712
58205797228100782018-06-16 13:00:00122747839
59205796628100782018-06-16 10:00:0044188493
60205796028100782018-06-15 18:00:0099051598
61205796128100782018-06-15 15:00:001621610840
62205795528100782018-06-15 12:00:001612915670
63205795428100782018-06-14 15:00:001435816521
\n
" }, "metadata": {}, "execution_count": 27 } ], "source": [ "df_games = pd.read_hdf(data_dir + 'spadl.h5', 'games')\n", "df_games.tail(10)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "tags": [] }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": "HBox(children=(FloatProgress(value=0.0, max=64.0), HTML(value='')))", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "eab8cfe83e204c669288ef67d6921e48" } }, "metadata": {} }, { "output_type": "stream", "name": "stdout", "text": "\n" }, { "output_type": "execute_result", "data": { "text/plain": " type_pass_a0 type_cross_a0 type_throw_in_a0 \\\n80969 False False False \n80970 True False False \n80971 True False False \n80972 False False False \n80973 True False False \n80974 True False False \n80975 False False False \n80976 False False False \n80977 False False False \n80978 True False False \n\n type_freekick_crossed_a0 type_freekick_short_a0 \\\n80969 False False \n80970 False False \n80971 False False \n80972 False False \n80973 False False \n80974 False False \n80975 False False \n80976 False False \n80977 False False \n80978 False False \n\n type_corner_crossed_a0 type_corner_short_a0 type_take_on_a0 \\\n80969 False False False \n80970 False False False \n80971 False False False \n80972 False False False \n80973 False False False \n80974 False False False \n80975 False False False \n80976 False False False \n80977 False False False \n80978 False False False \n\n type_foul_a0 type_tackle_a0 ... end_angle_to_goal_a0 \\\n80969 False True ... 0.088083 \n80970 False False ... 0.313155 \n80971 False False ... 0.163867 \n80972 False False ... 0.284537 \n80973 False False ... 0.485553 \n80974 False False ... 0.532102 \n80975 False True ... 0.108906 \n80976 True False ... 0.108906 \n80977 False False ... 1.570796 \n80978 False False ... 1.570796 \n\n end_dist_to_goal_a1 end_angle_to_goal_a1 end_dist_to_goal_a2 \\\n80969 46.801607 0.160512 38.019553 \n80970 46.379806 0.088083 46.801607 \n80971 37.524973 0.313155 46.379806 \n80972 50.020077 0.163867 37.524973 \n80973 33.913609 0.284537 50.020077 \n80974 32.055017 0.485553 33.913609 \n80975 83.021503 0.164555 78.096249 \n80976 81.331839 0.108906 83.021503 \n80977 25.717078 0.350897 25.717078 \n80978 105.055033 0.032370 81.331839 \n\n end_angle_to_goal_a2 team_1 team_2 time_delta_1 time_delta_2 \\\n80969 0.107520 True True 3.724954 5.269374 \n80970 0.160512 True True 1.175848 4.900802 \n80971 0.088083 True True 1.412850 2.588698 \n80972 0.313155 True True 1.416485 2.829335 \n80973 0.163867 True True 1.416484 2.832969 \n80974 0.284537 True True 2.531683 3.948167 \n80975 0.192750 False False 2.483783 5.015466 \n80976 0.164555 True False 1.862786 4.346569 \n80977 0.350897 False False 66.594979 68.457765 \n80978 0.108906 False True 43.943937 110.538916 \n\n game_id \n80969 2057954 \n80970 2057954 \n80971 2057954 \n80972 2057954 \n80973 2057954 \n80974 2057954 \n80975 2057954 \n80976 2057954 \n80977 2057954 \n80978 2057954 \n\n[10 rows x 143 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
type_pass_a0type_cross_a0type_throw_in_a0type_freekick_crossed_a0type_freekick_short_a0type_corner_crossed_a0type_corner_short_a0type_take_on_a0type_foul_a0type_tackle_a0...end_angle_to_goal_a0end_dist_to_goal_a1end_angle_to_goal_a1end_dist_to_goal_a2end_angle_to_goal_a2team_1team_2time_delta_1time_delta_2game_id
80969FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue...0.08808346.8016070.16051238.0195530.107520TrueTrue3.7249545.2693742057954
80970TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...0.31315546.3798060.08808346.8016070.160512TrueTrue1.1758484.9008022057954
80971TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...0.16386737.5249730.31315546.3798060.088083TrueTrue1.4128502.5886982057954
80972FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...0.28453750.0200770.16386737.5249730.313155TrueTrue1.4164852.8293352057954
80973TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...0.48555333.9136090.28453750.0200770.163867TrueTrue1.4164842.8329692057954
80974TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...0.53210232.0550170.48555333.9136090.284537TrueTrue2.5316833.9481672057954
80975FalseFalseFalseFalseFalseFalseFalseFalseFalseTrue...0.10890683.0215030.16455578.0962490.192750FalseFalse2.4837835.0154662057954
80976FalseFalseFalseFalseFalseFalseFalseFalseTrueFalse...0.10890681.3318390.10890683.0215030.164555TrueFalse1.8627864.3465692057954
80977FalseFalseFalseFalseFalseFalseFalseFalseFalseFalse...1.57079625.7170780.35089725.7170780.350897FalseFalse66.59497968.4577652057954
80978TrueFalseFalseFalseFalseFalseFalseFalseFalseFalse...1.570796105.0550330.03237081.3318390.108906FalseTrue43.943937110.5389162057954
\n

10 rows × 143 columns

\n
" }, "metadata": {}, "execution_count": 28 } ], "source": [ "dfs_features = []\n", "for _, game in tqdm(df_games.iterrows(), total=len(df_games)):\n", " game_id = game['game_id']\n", " df_features = pd.read_hdf(data_dir + 'features.h5', key=f'game_{game_id}')\n", " df_features['game_id'] = game_id\n", " dfs_features.append(df_features)\n", "df_features = pd.concat(dfs_features).reset_index(drop=True)\n", "df_features.tail(10)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": "\"\\nfor _, game in tqdm(df_games.iterrows(), total=len(df_games)):\\n game_id = game['game_id']\\n df_labels = pd.read_hdf()\\n\"" }, "metadata": {}, "execution_count": 29 } ], "source": [ "dfs_labels = []\n", "\"\"\"\n", "for _, game in tqdm(df_games.iterrows(), total=len(df_games)):\n", " game_id = game['game_id']\n", " df_labels = pd.read_hdf()\n", "\"\"\"" ] } ] }