{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "import os\n", "import warnings\n", "import tqdm\n", "import pandas as pd\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)\n", "\n", "import socceraction.vaep.features as fs\n", "import socceraction.vaep.labels as lab" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "## Configure file and folder names\n", "datafolder = \"../data-fifa\"\n", "spadl_h5 = os.path.join(datafolder, \"spadl-statsbomb.h5\")\n", "features_h5 = os.path.join(datafolder, \"features.h5\")\n", "labels_h5 = os.path.join(datafolder, \"labels.h5\")\n", "predictions_h5 = os.path.join(datafolder, \"predictions.h5\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nb of games: 64\n" ] } ], "source": [ "games = pd.read_hdf(spadl_h5, \"games\")\n", "games = games[games.competition_id == 43]\n", "traingames = games\n", "testgames = games\n", "print(\"nb of games:\", len(games))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Selecting features: 100%|██████████| 64/64 [00:02<00:00, 31.97it/s]\n", "Selecting label: 100%|██████████| 64/64 [00:00<00:00, 87.65it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "X: ['type_id_a0', 'type_pass_a0', 'type_cross_a0', 'type_throw_in_a0', 'type_freekick_crossed_a0', 'type_freekick_short_a0', 'type_corner_crossed_a0', 'type_corner_short_a0', 'type_take_on_a0', 'type_foul_a0', 'type_tackle_a0', 'type_interception_a0', 'type_shot_a0', 'type_shot_penalty_a0', 'type_shot_freekick_a0', 'type_keeper_save_a0', 'type_keeper_claim_a0', 'type_keeper_punch_a0', 'type_keeper_pick_up_a0', 'type_clearance_a0', 'type_bad_touch_a0', 'type_non_action_a0', 'type_dribble_a0', 'type_goalkick_a0', 'bodypart_foot_a0', 'bodypart_head_a0', 'bodypart_other_a0', 'bodypart_head/other_a0', 'result_id_a0', 'result_fail_a0', 'result_success_a0', 'result_offside_a0', 'result_owngoal_a0', 'result_yellow_card_a0', 'result_red_card_a0', 'goalscore_team', 'goalscore_opponent', 'goalscore_diff', 'start_x_a0', 'start_y_a0', 'end_x_a0', 'end_y_a0', 'dx_a0', 'dy_a0', 'movement_a0', 'start_dist_to_goal_a0', 'start_angle_to_goal_a0', 'end_dist_to_goal_a0', 'end_angle_to_goal_a0']\n", "Y: ['scores', 'concedes']\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "# 1. Select feature set X\n", "xfns = [\n", " fs.actiontype,\n", " fs.actiontype_onehot,\n", " #fs.bodypart,\n", " fs.bodypart_onehot,\n", " fs.result,\n", " fs.result_onehot,\n", " fs.goalscore,\n", " fs.startlocation,\n", " fs.endlocation,\n", " fs.movement,\n", " fs.space_delta,\n", " fs.startpolar,\n", " fs.endpolar,\n", " fs.team,\n", " #fs.time,\n", " fs.time_delta,\n", " #fs.actiontype_result_onehot\n", "]\n", "nb_prev_actions = 1\n", "\n", "Xcols = fs.feature_column_names(xfns, nb_prev_actions)\n", "\n", "def getXY(games,Xcols):\n", " # generate the columns of the selected feature\n", " X = []\n", " for game_id in tqdm.tqdm(games.game_id, desc=\"Selecting features\"):\n", " Xi = pd.read_hdf(features_h5, f\"game_{game_id}\")\n", " X.append(Xi[Xcols])\n", " X = pd.concat(X).reset_index(drop=True)\n", "\n", " # 2. Select label Y\n", " Ycols = [\"scores\",\"concedes\"]\n", " Y = []\n", " for game_id in tqdm.tqdm(games.game_id, desc=\"Selecting label\"):\n", " Yi = pd.read_hdf(labels_h5, f\"game_{game_id}\")\n", " Y.append(Yi[Ycols])\n", " Y = pd.concat(Y).reset_index(drop=True)\n", " return X, Y\n", "\n", "X, Y = getXY(traingames,Xcols)\n", "print(\"X:\", list(X.columns))\n", "print(\"Y:\", list(Y.columns))\n", "X = X.fillna(0)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/cw/dtaijupiter/NoCsBack/dtai/pieterr/Projects/socceraction/.venv/lib/python3.6/site-packages/xgboost/sklearn.py:888: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n", " warnings.warn(label_encoder_deprecation_msg, UserWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] WARNING: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:01] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:02] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] WARNING: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/learner.cc:1061: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:03] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 10 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 8 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:04] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 6 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:05] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 12 extra nodes, 0 pruned nodes, max_depth=3\n", "[16:03:05] INFO: /tmp/pip-build-5lk6kad5/xgboost/build/temp.linux-x86_64-3.6/xgboost/src/tree/updater_prune.cc:101: tree pruning end, 14 extra nodes, 0 pruned nodes, max_depth=3\n", "CPU times: user 28.1 s, sys: 151 ms, total: 28.2 s\n", "Wall time: 4.29 s\n" ] } ], "source": [ "%%time\n", "# 3. train classifiers F(X) = Y\n", "import xgboost\n", "\n", "Y_hat = pd.DataFrame()\n", "models = {}\n", "for col in list(Y.columns):\n", " model = xgboost.XGBClassifier(n_estimators=50, max_depth=3, n_jobs=-3, verbosity=2)\n", " model.fit(X, Y[col])\n", " models[col] = model" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "### Y: scores ###\n", " Brier score: 0.00907 (0.82108)\n", " log loss score: 0.04501 (0.73379)\n", " ROC AUC: 0.85998\n", "### Y: concedes ###\n", " Brier score: 0.00235 (0.80988)\n", " log loss score: 0.01342 (0.67484)\n", " ROC AUC: 0.89972\n" ] } ], "source": [ "from sklearn.metrics import brier_score_loss, roc_auc_score, log_loss\n", "\n", "testX, testY = X, Y\n", "\n", "def evaluate(y, y_hat):\n", " p = sum(y) / len(y)\n", " base = [p] * len(y)\n", " brier = brier_score_loss(y, y_hat)\n", " print(f\" Brier score: %.5f (%.5f)\" % (brier, brier / brier_score_loss(y, base)))\n", " ll = log_loss(y, y_hat)\n", " print(f\" log loss score: %.5f (%.5f)\" % (ll, ll / log_loss(y, base)))\n", " print(f\" ROC AUC: %.5f\" % roc_auc_score(y, y_hat))\n", "\n", "for col in testY.columns:\n", " Y_hat[col] = [p[1] for p in models[col].predict_proba(testX)]\n", " print(f\"### Y: {col} ###\")\n", " evaluate(testY[col], Y_hat[col])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save predictions" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Loading game ids: 100%|██████████| 64/64 [00:01<00:00, 49.43it/s]\n", "Saving predictions per game: 100%|██████████| 64/64 [00:03<00:00, 16.09it/s]\n" ] } ], "source": [ "# get rows with game id per action\n", "A = []\n", "for game_id in tqdm.tqdm(games.game_id, \"Loading game ids\"):\n", " Ai = pd.read_hdf(spadl_h5, f\"actions/game_{game_id}\")\n", " A.append(Ai[[\"game_id\"]])\n", "A = pd.concat(A)\n", "A = A.reset_index(drop=True)\n", "\n", "# concatenate action game id rows with predictions and save per game\n", "grouped_predictions = pd.concat([A, Y_hat], axis=1).groupby(\"game_id\")\n", "for k,df in tqdm.tqdm(grouped_predictions, desc=\"Saving predictions per game\"):\n", " df = df.reset_index(drop=True)\n", " df[Y_hat.columns].to_hdf(predictions_h5, f\"game_{int(k)}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "socceraction", "language": "python", "name": "socceraction" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }