{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Gwent Pro Rank analysis\n", "\n", "First we'll need to get the data from the playgwent website. Here requests and BeautifulSoup are used to get the data and parse the html the comes back. Nothing to difficult here. There is a little clean up in the code and there are two lines of code to check if a player was in the top 500 last season. In case the data already exists it is loaded from disk." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [03:16<00:00, 1.32s/it]\n" ] } ], "source": [ "from tqdm import tqdm\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import os\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# Note that seasons should be in order (for the top 500 spots to be picked up correctly in the next step)\n", "seasons = [\n", " ('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2020.xlsx'),\n", " ('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/{page}', './output/season_of_love_2020.xlsx'),\n", " ('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/{page}', './output/season_of_the_bear_2020.xlsx'),\n", " ('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/{page}', './output/season_of_the_elf_2020.xlsx'),\n", " ('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/{page}', './output/season_of_the_viper_2020.xlsx'),\n", " ('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/{page}', './output/season_of_magic_2020.xlsx'),\n", " ('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2020.xlsx'),\n", " ('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2020.xlsx'),\n", " ('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2020.xlsx'),\n", " ('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/{page}', './output/season_of_the_cat_2020.xlsx'),\n", " ('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/{page}', './output/season_of_the_mahakam_2020.xlsx'),\n", " ('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2020.xlsx'),\n", " ('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2021.xlsx'),\n", " ('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/{page}', './output/season_of_love_2021.xlsx'),\n", " ('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/{page}', './output/season_of_the_bear_2021.xlsx'),\n", " ('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/{page}', './output/season_of_the_elf_2021.xlsx'),\n", " ('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/{page}', './output/season_of_the_viper_2021.xlsx'),\n", " ('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/{page}', './output/season_of_the_magic_2021.xlsx'),\n", " ('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2021.xlsx'),\n", " ('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2021.xlsx'),\n", " ('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2021.xlsx'),\n", " ('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/{page}', './output/season_of_the_cat_2021.xlsx'),\n", " ('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/{page}', './output/season_of_the_mahakam_2021.xlsx'),\n", " ('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2021.xlsx'),\n", " ('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2022.xlsx'),\n", " ('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/{page}', './output/season_of_love_2022.xlsx'),\n", "\n", "]\n", "\n", "os.makedirs('./output', exist_ok=True)\n", "\n", "dataframes = []\n", "top500 = []\n", "\n", "\n", "for season, url_template, output_path in seasons:\n", " if os.path.exists(output_path):\n", " df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)\n", " else:\n", " output = []\n", "\n", " for i in tqdm(range(1, 150)):\n", " url = url_template.replace('{page}', str(i))\n", " try:\n", " r = requests.get(url)\n", " soup = BeautifulSoup(r.text, 'html.parser')\n", "\n", " rows = soup.findAll(\"div\", {\"class\": \"c-ranking-table__tr\"})\n", " for row in rows:\n", " flag = row.find(\"i\", {\"class\": \"flag-icon\"})[\"class\"][1]\n", " new_record = {\n", " 'rank': int(row.find(\"div\", {\"class\": \"td-number\"}).text.strip()),\n", " 'name': row.find(\"div\", {\"class\": \"td-nick\"}).text.strip(),\n", " 'country': flag.replace('flag-icon-', '').upper(),\n", " 'matches': int(row.find(\"div\", {\"class\": \"td-matches\"}).text.strip().replace(' matches', '')),\n", " 'mmr': int(row.find(\"div\", {\"class\": \"td-mmr\"}).text.strip().replace(',', '')),\n", " 'season': season\n", " }\n", " output.append(new_record)\n", " except:\n", " pass\n", "\n", "\n", " df = pd.DataFrame(output).drop_duplicates()\n", " df.to_excel(output_path)\n", " \n", " # We'll add a column here if the player was in the top 500 last season\n", " df['previous_top500'] = ['yes' if name in top500 else 'no' for name in df['name']] \n", " top500 = list(df[df['rank'] <= 500]['name'])\n", " \n", " dataframes.append(df)\n", " \n", "# Concatenate dataframes and drop any records with missing values\n", "full_df = pd.concat(dataframes).dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Human readable countries\n", "\n", "We have the 2 letter code for the country of each participant, let's use the pycountry library to convert that to the full name." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ranknamecountrymatchesmmrseasonprevious_top500
01kolemoenGermany43110484M2_01 Wolf 2020no
12kams134Poland92310477M2_01 Wolf 2020no
23TailBotPoland53810472M2_01 Wolf 2020no
34PajabolPoland82010471M2_01 Wolf 2020no
45AdzikovPoland110510442M2_01 Wolf 2020no
\n", "
" ], "text/plain": [ " rank name country matches mmr season previous_top500\n", "0 1 kolemoen Germany 431 10484 M2_01 Wolf 2020 no\n", "1 2 kams134 Poland 923 10477 M2_01 Wolf 2020 no\n", "2 3 TailBot Poland 538 10472 M2_01 Wolf 2020 no\n", "3 4 Pajabol Poland 820 10471 M2_01 Wolf 2020 no\n", "4 5 Adzikov Poland 1105 10442 M2_01 Wolf 2020 no" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pycountry\n", "\n", "def convert_to_full(two_letter_code):\n", " try:\n", " out = pycountry.countries.get(alpha_2=two_letter_code).name\n", " except:\n", " print(f'could not convert {two_letter_code}')\n", " out = two_letter_code\n", " return out\n", "\n", "full_df['country'] = full_df['country'].apply(lambda x: convert_to_full(x))\n", "full_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# National rank\n", "\n", "Apart from the global rank, we'll add a participant's national rank as well." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "full_df['national_rank'] = full_df.groupby(['country','season'])[\"mmr\"].rank(\"first\", ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ladder Efficiency\n", "\n", "While getting to a higher MMR is the main goal of ladder. There is an argument to be made that the number of games a participant plays to reach a certain point should not be overlooked. Someone that played fewer games to get to a certain MMR climbed the ladder in a more efficient way. There are two metrics that can be used, we take the MMR of a player, substract the minimal MMR, which is 9600, and divide that by the number of matches played. Though, as the higher up you are in ladder, the more fierce the competition becomes, to take this into account Lerio2 from Team Legacy proposed to divide by the square-root of the number of matches instead.\n", "\n", "One additional consideration is that players that were in the top 500 the previous season didn't have to climb back up to pro rank. Both metrics actually give them a major advantage. So when comparing efficiency between players the field previous_top500 could be used to allow a fair comparison." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "full_df['efficiency'] = ((full_df['mmr']-9600))/full_df['matches']\n", "full_df['lei'] = ((full_df['mmr']-9600))/np.sqrt(full_df['matches'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ranknamecountrymatchesmmrseasonprevious_top500national_rankefficiencylei
01kolemoenGermany43110484M2_01 Wolf 2020no1.02.05104442.580782
12kams134Poland92310477M2_01 Wolf 2020no1.00.95016328.866807
23TailBotPoland53810472M2_01 Wolf 2020no2.01.62081837.594590
34PajabolPoland82010471M2_01 Wolf 2020no3.01.06219530.416639
45AdzikovPoland110510442M2_01 Wolf 2020no4.00.76199125.329753
\n", "
" ], "text/plain": [ " rank name country matches mmr season previous_top500 \\\n", "0 1 kolemoen Germany 431 10484 M2_01 Wolf 2020 no \n", "1 2 kams134 Poland 923 10477 M2_01 Wolf 2020 no \n", "2 3 TailBot Poland 538 10472 M2_01 Wolf 2020 no \n", "3 4 Pajabol Poland 820 10471 M2_01 Wolf 2020 no \n", "4 5 Adzikov Poland 1105 10442 M2_01 Wolf 2020 no \n", "\n", " national_rank efficiency lei \n", "0 1.0 2.051044 42.580782 \n", "1 1.0 0.950163 28.866807 \n", "2 2.0 1.620818 37.594590 \n", "3 3.0 1.062195 30.416639 \n", "4 4.0 0.761991 25.329753 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "full_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Statistics per Season" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasonmin_mmrmax_mmrnum_matchestop500_cutofftop200_cutofftop64_cutoff
0M2_01 Wolf 20202407104846994969749987210061
1M2_02 Love 20207776105377691729832995210117
2M2_03 Bear 20209427106698622839867999510204
3M2_04 Elf 2020966610751100460399521008710293
4M2_05 Viper 202096351062285964099101002810255
5M2_06 Magic 202096241059779301398961000210191
6M2_07 Griffin 202096981066799651699781010010289
7M2_08 Draconid 202096661054683754599461006110246
8M2_09 Dryad 202096781072585459399461004610183
9M2_10 Cat 202097031080492884599771006710176
10M2_11 Mahakam 2020970610783983150100001009010216
11M2_12 Wild Hunt 20209756107241182353100701017210313
12M3_01 Wolf 202196371065380865199161004410295
13M3_02 Love 202196841071491702799751009710325
14M3_03 Bear 202196371057676650299141002610230
15M3_04 Elf 202196861067894432399921010210323
16M3_05 Viper 202197011075395648499981010610300
17M3_06 Magic 202196811063286926299741008210278
18M3_07 Griffin 202196691063385610399581006710287
19M3_08 Draconid 202196811076791127399541007910281
20M3_09 Dryad 202196881080994065599681007810250
21M3_10 Cat 20219614103667196969879997410083
22M3_11 Mahakam 20219725105801017256100121012210252
23M3_12 Wild Hunt 20219735107141044941100321014010271
24M4_01 Wolf 202296461068488388199411009310340
25M4_02 Love 202296381065285286799321006810312
\n", "
" ], "text/plain": [ " season min_mmr max_mmr num_matches top500_cutoff \\\n", "0 M2_01 Wolf 2020 2407 10484 699496 9749 \n", "1 M2_02 Love 2020 7776 10537 769172 9832 \n", "2 M2_03 Bear 2020 9427 10669 862283 9867 \n", "3 M2_04 Elf 2020 9666 10751 1004603 9952 \n", "4 M2_05 Viper 2020 9635 10622 859640 9910 \n", "5 M2_06 Magic 2020 9624 10597 793013 9896 \n", "6 M2_07 Griffin 2020 9698 10667 996516 9978 \n", "7 M2_08 Draconid 2020 9666 10546 837545 9946 \n", "8 M2_09 Dryad 2020 9678 10725 854593 9946 \n", "9 M2_10 Cat 2020 9703 10804 928845 9977 \n", "10 M2_11 Mahakam 2020 9706 10783 983150 10000 \n", "11 M2_12 Wild Hunt 2020 9756 10724 1182353 10070 \n", "12 M3_01 Wolf 2021 9637 10653 808651 9916 \n", "13 M3_02 Love 2021 9684 10714 917027 9975 \n", "14 M3_03 Bear 2021 9637 10576 766502 9914 \n", "15 M3_04 Elf 2021 9686 10678 944323 9992 \n", "16 M3_05 Viper 2021 9701 10753 956484 9998 \n", "17 M3_06 Magic 2021 9681 10632 869262 9974 \n", "18 M3_07 Griffin 2021 9669 10633 856103 9958 \n", "19 M3_08 Draconid 2021 9681 10767 911273 9954 \n", "20 M3_09 Dryad 2021 9688 10809 940655 9968 \n", "21 M3_10 Cat 2021 9614 10366 719696 9879 \n", "22 M3_11 Mahakam 2021 9725 10580 1017256 10012 \n", "23 M3_12 Wild Hunt 2021 9735 10714 1044941 10032 \n", "24 M4_01 Wolf 2022 9646 10684 883881 9941 \n", "25 M4_02 Love 2022 9638 10652 852867 9932 \n", "\n", " top200_cutoff top64_cutoff \n", "0 9872 10061 \n", "1 9952 10117 \n", "2 9995 10204 \n", "3 10087 10293 \n", "4 10028 10255 \n", "5 10002 10191 \n", "6 10100 10289 \n", "7 10061 10246 \n", "8 10046 10183 \n", "9 10067 10176 \n", "10 10090 10216 \n", "11 10172 10313 \n", "12 10044 10295 \n", "13 10097 10325 \n", "14 10026 10230 \n", "15 10102 10323 \n", "16 10106 10300 \n", "17 10082 10278 \n", "18 10067 10287 \n", "19 10079 10281 \n", "20 10078 10250 \n", "21 9974 10083 \n", "22 10122 10252 \n", "23 10140 10271 \n", "24 10093 10340 \n", "25 10068 10312 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "per_season_df = full_df.groupby(['season']).agg(\n", " min_mmr = pd.NamedAgg('mmr', 'min'),\n", " max_mmr = pd.NamedAgg('mmr', 'max'),\n", " num_matches = pd.NamedAgg('matches', 'sum')\n", ").reset_index()\n", "\n", "top500_cutoffs = full_df[full_df['rank'] == 500][['season', 'mmr']].rename(columns={'mmr': 'top500_cutoff'})\n", "top200_cutoffs = full_df[full_df['rank'] == 200][['season', 'mmr']].rename(columns={'mmr': 'top200_cutoff'})\n", "top64_cutoffs = full_df[full_df['rank'] == 64][['season', 'mmr']].rename(columns={'mmr': 'top64_cutoff'})\n", "\n", "per_season_df = pd.merge(per_season_df, top500_cutoffs, on='season')\n", "per_season_df = pd.merge(per_season_df, top200_cutoffs, on='season')\n", "per_season_df = pd.merge(per_season_df, top64_cutoffs, on='season')\n", "per_season_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Player Summaries\n", "\n", "We can summerize player stats over all seasons easily using the code below. We'll store the number of times they were on pro ladder, the worst, best and average MMR scores, ..." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecountryappearancesmin_mmrmean_mmrmax_mmrmean_matchesnum_matchesbest_rankbest_national_rank
0$Lancer$China2971097109792545.5010912011272.0
1$rBuRn$Spain2976797679831225.00450117629.0
2$r_BuRn$Spain4970197019848123.7549596825.0
3++++Gwynbleidd++++Poland1980398039803231.002311482122.0
4+Nightshadow+Ukraine1967796779677365.003652261187.0
\n", "
" ], "text/plain": [ " name country appearances min_mmr mean_mmr max_mmr \\\n", "0 $Lancer$ China 2 9710 9710 9792 \n", "1 $rBuRn$ Spain 2 9767 9767 9831 \n", "2 $r_BuRn$ Spain 4 9701 9701 9848 \n", "3 ++++Gwynbleidd++++ Poland 1 9803 9803 9803 \n", "4 +Nightshadow+ Ukraine 1 9677 9677 9677 \n", "\n", " mean_matches num_matches best_rank best_national_rank \n", "0 545.50 1091 2011 272.0 \n", "1 225.00 450 1176 29.0 \n", "2 123.75 495 968 25.0 \n", "3 231.00 231 1482 122.0 \n", "4 365.00 365 2261 187.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "player_summaries = full_df.groupby(['name', 'country']).agg(\n", " appearances = pd.NamedAgg('matches', 'count'),\n", " min_mmr = pd.NamedAgg('mmr', 'min'),\n", " mean_mmr = pd.NamedAgg('mmr', 'min'),\n", " max_mmr = pd.NamedAgg('mmr', 'max'),\n", " mean_matches = pd.NamedAgg('matches', 'mean'),\n", " num_matches = pd.NamedAgg('matches', 'sum'),\n", " best_rank = pd.NamedAgg('rank', 'min'),\n", " best_national_rank = pd.NamedAgg('national_rank', 'min'),\n", ").reset_index()\n", "\n", "player_summaries.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namecountryappearancesmin_mmrmean_mmrmax_mmrmean_matchesnum_matchesbest_rankbest_national_rank
21691seproBelgium7972497249903259.14285718146802.0
\n", "
" ], "text/plain": [ " name country appearances min_mmr mean_mmr max_mmr mean_matches \\\n", "21691 sepro Belgium 7 9724 9724 9903 259.142857 \n", "\n", " num_matches best_rank best_national_rank \n", "21691 1814 680 2.0 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "player_summaries[player_summaries['name'] == 'sepro']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Get population for each country\n", "\n", "To calculate the number of pro players per capita we'll need the number of inhabitants per country. On Kaggle a dataset was available for 2019. As this doesn't differ much from one year to another (and those will only result into changes several digits behind the decimal point it) the 2019 list is enough for our purpose. The code below loads the data, and generates country names from the two character ISO code using the same package as used before to avoid mismatches.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "could not convert nan\n" ] } ], "source": [ "# data from kaggle https://www.kaggle.com/erikbruin/countries-of-the-world-iso-codes-and-population/data#\n", "# the number are indicated very strangely in the csv with a decimal point and need to be multiplied with 1000\n", "\n", "population_df = pd.read_csv('./data/countries_by_population_2019.csv')[['name', 'pop2019']]\n", "country_codes = pd.read_csv('./data/country_codes_2020.csv')[['name', 'cca2']]\n", "population_df = pd.merge(population_df, country_codes, how='left', left_on='name', right_on='name')\n", "\n", "population_df['country'] = population_df['cca2'].apply(lambda x: convert_to_full(x))\n", "population_df['pop2019'] = (population_df['pop2019'] * 1000).astype(int)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountrytotal_matchesnum_playerspop2019pro_players_per_millionmatches_per_player
0M2_01 Wolf 2020Algeria250143053054.00.023227250.000000
1M2_01 Wolf 2020Argentina1480444780677.00.089324370.000000
2M2_01 Wolf 2020Armenia54612957731.00.338097546.000000
3M2_01 Wolf 2020Australia61932725203198.01.071293229.370370
4M2_01 Wolf 2020Austria3981168955102.01.786691248.812500
........................
2313M4_02 Love 2022United Kingdom206567167530172.01.051382290.929577
2314M4_02 Love 2022United States47946161329064917.00.489265297.801242
2315M4_02 Love 2022Uzbekistan1116532981716.00.151599223.200000
2316M4_02 Love 2022Viet Nam22771096462106.00.103668227.700000
2317M4_02 Love 2022Zimbabwe199114645468.00.068281199.000000
\n", "

2318 rows × 7 columns

\n", "
" ], "text/plain": [ " season country total_matches num_players \\\n", "0 M2_01 Wolf 2020 Algeria 250 1 \n", "1 M2_01 Wolf 2020 Argentina 1480 4 \n", "2 M2_01 Wolf 2020 Armenia 546 1 \n", "3 M2_01 Wolf 2020 Australia 6193 27 \n", "4 M2_01 Wolf 2020 Austria 3981 16 \n", "... ... ... ... ... \n", "2313 M4_02 Love 2022 United Kingdom 20656 71 \n", "2314 M4_02 Love 2022 United States 47946 161 \n", "2315 M4_02 Love 2022 Uzbekistan 1116 5 \n", "2316 M4_02 Love 2022 Viet Nam 2277 10 \n", "2317 M4_02 Love 2022 Zimbabwe 199 1 \n", "\n", " pop2019 pro_players_per_million matches_per_player \n", "0 43053054.0 0.023227 250.000000 \n", "1 44780677.0 0.089324 370.000000 \n", "2 2957731.0 0.338097 546.000000 \n", "3 25203198.0 1.071293 229.370370 \n", "4 8955102.0 1.786691 248.812500 \n", "... ... ... ... \n", "2313 67530172.0 1.051382 290.929577 \n", "2314 329064917.0 0.489265 297.801242 \n", "2315 32981716.0 0.151599 223.200000 \n", "2316 96462106.0 0.103668 227.700000 \n", "2317 14645468.0 0.068281 199.000000 \n", "\n", "[2318 rows x 7 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Lets do some counts per season and country\n", "national_stats = full_df.groupby(['season','country']).agg(\n", " total_matches = pd.NamedAgg('matches', 'sum'),\n", " num_players = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "\n", "# Add the population (from 2019) to each country and calculate the number of pro players (top 2860) per million inhabitants\n", "national_stats = pd.merge(national_stats, population_df.drop(['name', 'cca2'], axis=1), on='country', how='left')\n", "national_stats['pro_players_per_million'] = (national_stats['num_players']*1000000)/national_stats['pop2019']\n", "national_stats['matches_per_player'] = national_stats['total_matches']/national_stats['num_players']\n", "\n", "national_stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Adding popularity metrics\n", "\n", "\n", "We can now calculate the number of pro players per million inhabitants, this could be a proxy for Gwent's popularity in \n", "different parts of the world" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountrytotal_matchesnum_playerspro_players_per_millionmatches_per_player
657M2_08 Draconid 2020Poland722252677.047129270.505618
617M2_08 Draconid 2020Estonia172675.280436246.571429
660M2_08 Draconid 2020Russian Federation1955946724.606770291.062500
599M2_08 Draconid 2020Belarus10261394.125931263.102564
677M2_08 Draconid 2020Ukraine523331623.682351323.043210
626M2_08 Draconid 2020Iceland30712.949583307.000000
618M2_08 Draconid 2020Finland4053142.530659289.500000
624M2_08 Draconid 2020Hong Kong5154182.420606286.333333
644M2_08 Draconid 2020Malta22912.270807229.000000
610M2_08 Draconid 2020Croatia224792.179016249.666667
\n", "
" ], "text/plain": [ " season country total_matches num_players \\\n", "657 M2_08 Draconid 2020 Poland 72225 267 \n", "617 M2_08 Draconid 2020 Estonia 1726 7 \n", "660 M2_08 Draconid 2020 Russian Federation 195594 672 \n", "599 M2_08 Draconid 2020 Belarus 10261 39 \n", "677 M2_08 Draconid 2020 Ukraine 52333 162 \n", "626 M2_08 Draconid 2020 Iceland 307 1 \n", "618 M2_08 Draconid 2020 Finland 4053 14 \n", "624 M2_08 Draconid 2020 Hong Kong 5154 18 \n", "644 M2_08 Draconid 2020 Malta 229 1 \n", "610 M2_08 Draconid 2020 Croatia 2247 9 \n", "\n", " pro_players_per_million matches_per_player \n", "657 7.047129 270.505618 \n", "617 5.280436 246.571429 \n", "660 4.606770 291.062500 \n", "599 4.125931 263.102564 \n", "677 3.682351 323.043210 \n", "626 2.949583 307.000000 \n", "618 2.530659 289.500000 \n", "624 2.420606 286.333333 \n", "644 2.270807 229.000000 \n", "610 2.179016 249.666667 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('pro_players_per_million', ascending=False).drop(columns=['pop2019'])\n", "top_popularity.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "An alternative way is to find places where the die-hard players are located, those that play the most games per season on average." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountrytotal_matchesnum_playerspro_players_per_millionmatches_per_player
611M2_08 Draconid 2020Cyprus121721.668648608.500000
608M2_08 Draconid 2020Congo, The Democratic Republic of the57010.011522570.000000
593M2_08 Draconid 2020Armenia100220.676194501.000000
631M2_08 Draconid 2020Ireland298861.228880498.000000
596M2_08 Draconid 2020Azerbaijan144730.298575482.333333
654M2_08 Draconid 2020Panama45310.235491453.000000
669M2_08 Draconid 2020Switzerland258060.698376430.000000
603M2_08 Draconid 2020Bulgaria5516131.857111424.307692
662M2_08 Draconid 2020Serbia4074101.139960407.400000
656M2_08 Draconid 2020Philippines5347140.129490381.928571
\n", "
" ], "text/plain": [ " season country \\\n", "611 M2_08 Draconid 2020 Cyprus \n", "608 M2_08 Draconid 2020 Congo, The Democratic Republic of the \n", "593 M2_08 Draconid 2020 Armenia \n", "631 M2_08 Draconid 2020 Ireland \n", "596 M2_08 Draconid 2020 Azerbaijan \n", "654 M2_08 Draconid 2020 Panama \n", "669 M2_08 Draconid 2020 Switzerland \n", "603 M2_08 Draconid 2020 Bulgaria \n", "662 M2_08 Draconid 2020 Serbia \n", "656 M2_08 Draconid 2020 Philippines \n", "\n", " total_matches num_players pro_players_per_million matches_per_player \n", "611 1217 2 1.668648 608.500000 \n", "608 570 1 0.011522 570.000000 \n", "593 1002 2 0.676194 501.000000 \n", "631 2988 6 1.228880 498.000000 \n", "596 1447 3 0.298575 482.333333 \n", "654 453 1 0.235491 453.000000 \n", "669 2580 6 0.698376 430.000000 \n", "603 5516 13 1.857111 424.307692 \n", "662 4074 10 1.139960 407.400000 \n", "656 5347 14 0.129490 381.928571 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('matches_per_player', ascending=False).drop(columns=['pop2019'])\n", "top_popularity.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating a ranking of nations\n", "\n", "We'll select countries with three or more playing in the pro rank, select the top 3 players and calculate the average MMR and total MMR for the top\n", "3 players of the country.\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountryranknamematchesmmrprevious_top500national_rankefficiencylei
0M2_01 Wolf 2020Argentina174Srchenko3939887no1.00.73028014.477235
1M2_01 Wolf 2020Argentina471eltroco3399758no2.00.4660778.581382
2M2_01 Wolf 2020Argentina846MorgannFreemann2089660no3.00.2884624.160251
4M2_01 Wolf 2020Australia135whitecool4049914no1.00.77722815.622084
5M2_01 Wolf 2020Australia211Pacifier37504539863no2.00.58057412.356818
.................................
73410M4_02 Love 2022Uzbekistan1045Frag.1569781no2.01.16025614.491598
73411M4_02 Love 2022Uzbekistan1119SMEAGOOL1809770no3.00.94444412.671052
73414M4_02 Love 2022Viet Nam655HaoThien3259868no1.00.82461514.865965
73415M4_02 Love 2022Viet Nam940captainstef4019800no2.00.4987539.987523
73416M4_02 Love 2022Viet Nam1381__cAnh2929734no3.00.4589047.841757
\n", "

4956 rows × 10 columns

\n", "
" ], "text/plain": [ " season country rank name matches mmr \\\n", "0 M2_01 Wolf 2020 Argentina 174 Srchenko 393 9887 \n", "1 M2_01 Wolf 2020 Argentina 471 eltroco 339 9758 \n", "2 M2_01 Wolf 2020 Argentina 846 MorgannFreemann 208 9660 \n", "4 M2_01 Wolf 2020 Australia 135 whitecool 404 9914 \n", "5 M2_01 Wolf 2020 Australia 211 Pacifier3750 453 9863 \n", "... ... ... ... ... ... ... \n", "73410 M4_02 Love 2022 Uzbekistan 1045 Frag. 156 9781 \n", "73411 M4_02 Love 2022 Uzbekistan 1119 SMEAGOOL 180 9770 \n", "73414 M4_02 Love 2022 Viet Nam 655 HaoThien 325 9868 \n", "73415 M4_02 Love 2022 Viet Nam 940 captainstef 401 9800 \n", "73416 M4_02 Love 2022 Viet Nam 1381 __cAnh 292 9734 \n", "\n", " previous_top500 national_rank efficiency lei \n", "0 no 1.0 0.730280 14.477235 \n", "1 no 2.0 0.466077 8.581382 \n", "2 no 3.0 0.288462 4.160251 \n", "4 no 1.0 0.777228 15.622084 \n", "5 no 2.0 0.580574 12.356818 \n", "... ... ... ... ... \n", "73410 no 2.0 1.160256 14.491598 \n", "73411 no 3.0 0.944444 12.671052 \n", "73414 no 1.0 0.824615 14.865965 \n", "73415 no 2.0 0.498753 9.987523 \n", "73416 no 3.0 0.458904 7.841757 \n", "\n", "[4956 rows x 10 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_player_included = 3 # Here we define the number of players in Pro Ladder to be considered\n", "\n", "players_per_country = full_df.groupby(['season','country']).agg(\n", " num_players = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "\n", "players_per_country = players_per_country[players_per_country['num_players'] >= num_player_included]\n", "\n", "top_per_country = pd.merge(players_per_country[['season', 'country']], full_df, how='inner', on=['season', 'country'])\n", "top_per_country = top_per_country[top_per_country['national_rank'] <= num_player_included]\n", "top_per_country" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountrymean_mmrtotal_mmrmean_matches_per_playertotal_matchesnation_rankefficiencylei
0M2_01 Wolf 2020Argentina9768.33333329305313.33333394034.00.5372349.509700
1M2_01 Wolf 2020Australia9869.66666729609336.333333100922.00.80178414.704231
2M2_01 Wolf 2020Austria9905.66666729717392.333333117719.00.77909915.431938
3M2_01 Wolf 2020Belarus10064.33333330193390.000000117011.01.19059823.512432
4M2_01 Wolf 2020Belgium9901.00000029703415.666667124721.00.72413814.763655
..............................
1647M4_02 Love 2022Ukraine10458.00000031374748.66666722463.01.14603731.357616
1648M4_02 Love 2022United Kingdom10223.66666730671692.000000207612.00.90125223.708249
1649M4_02 Love 2022United States10201.33333330604562.666667168814.01.06872025.350684
1650M4_02 Love 2022Uzbekistan9810.33333329431220.33333366153.00.95461414.169940
1651M4_02 Love 2022Viet Nam9800.66666729402339.333333101856.00.59135610.893363
\n", "

1652 rows × 9 columns

\n", "
" ], "text/plain": [ " season country mean_mmr total_mmr \\\n", "0 M2_01 Wolf 2020 Argentina 9768.333333 29305 \n", "1 M2_01 Wolf 2020 Australia 9869.666667 29609 \n", "2 M2_01 Wolf 2020 Austria 9905.666667 29717 \n", "3 M2_01 Wolf 2020 Belarus 10064.333333 30193 \n", "4 M2_01 Wolf 2020 Belgium 9901.000000 29703 \n", "... ... ... ... ... \n", "1647 M4_02 Love 2022 Ukraine 10458.000000 31374 \n", "1648 M4_02 Love 2022 United Kingdom 10223.666667 30671 \n", "1649 M4_02 Love 2022 United States 10201.333333 30604 \n", "1650 M4_02 Love 2022 Uzbekistan 9810.333333 29431 \n", "1651 M4_02 Love 2022 Viet Nam 9800.666667 29402 \n", "\n", " mean_matches_per_player total_matches nation_rank efficiency \\\n", "0 313.333333 940 34.0 0.537234 \n", "1 336.333333 1009 22.0 0.801784 \n", "2 392.333333 1177 19.0 0.779099 \n", "3 390.000000 1170 11.0 1.190598 \n", "4 415.666667 1247 21.0 0.724138 \n", "... ... ... ... ... \n", "1647 748.666667 2246 3.0 1.146037 \n", "1648 692.000000 2076 12.0 0.901252 \n", "1649 562.666667 1688 14.0 1.068720 \n", "1650 220.333333 661 53.0 0.954614 \n", "1651 339.333333 1018 56.0 0.591356 \n", "\n", " lei \n", "0 9.509700 \n", "1 14.704231 \n", "2 15.431938 \n", "3 23.512432 \n", "4 14.763655 \n", "... ... \n", "1647 31.357616 \n", "1648 23.708249 \n", "1649 25.350684 \n", "1650 14.169940 \n", "1651 10.893363 \n", "\n", "[1652 rows x 9 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_stats = top_per_country.groupby(['season','country']).agg(\n", " mean_mmr = pd.NamedAgg('mmr', 'mean'),\n", " total_mmr = pd.NamedAgg('mmr', 'sum'),\n", " mean_matches_per_player = pd.NamedAgg('matches', 'mean'),\n", " total_matches = pd.NamedAgg('matches', 'sum')\n", ").reset_index()\n", "\n", "# Here we add the same ranking and effeciency statistics for national teams as for individual players\n", "top_stats['nation_rank'] = top_stats.groupby(['season'])[\"mean_mmr\"].rank(\"first\", ascending=False)\n", "top_stats['efficiency'] = ((top_stats['mean_mmr']-9600))/top_stats['mean_matches_per_player']\n", "top_stats['lei'] = ((top_stats['mean_mmr']-9600))/np.sqrt(top_stats['mean_matches_per_player'])\n", "\n", "top_stats" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasoncountrymean_mmrtotal_mmrmean_matches_per_playertotal_matchesnation_rankefficiencylei
432M2_08 Draconid 2020China10489.33333331468409.00000012271.02.17440943.974703
466M2_08 Draconid 2020Russian Federation10479.66666731439636.00000019082.01.38312434.881052
463M2_08 Draconid 2020Poland10439.33333331318657.00000019713.01.27752432.745512
440M2_08 Draconid 2020Germany10399.66666731199440.00000013204.01.81742438.122613
450M2_08 Draconid 2020Japan10345.00000031035708.66666721265.01.05127027.985642
..............................
446M2_08 Draconid 2020Iran, Islamic Republic of9787.00000029361264.00000079260.00.70833311.509054
431M2_08 Draconid 2020Chile9782.66666729348310.00000093061.00.58924710.374770
469M2_08 Draconid 2020Singapore9772.66666729318325.00000097562.00.5312829.577823
483M2_08 Draconid 2020Venezuela, Bolivarian Republic of9766.00000029298271.33333381463.00.61179410.077586
471M2_08 Draconid 2020Slovenia9743.66666729231273.33333382064.00.5256108.689799
\n", "

64 rows × 9 columns

\n", "
" ], "text/plain": [ " season country mean_mmr \\\n", "432 M2_08 Draconid 2020 China 10489.333333 \n", "466 M2_08 Draconid 2020 Russian Federation 10479.666667 \n", "463 M2_08 Draconid 2020 Poland 10439.333333 \n", "440 M2_08 Draconid 2020 Germany 10399.666667 \n", "450 M2_08 Draconid 2020 Japan 10345.000000 \n", ".. ... ... ... \n", "446 M2_08 Draconid 2020 Iran, Islamic Republic of 9787.000000 \n", "431 M2_08 Draconid 2020 Chile 9782.666667 \n", "469 M2_08 Draconid 2020 Singapore 9772.666667 \n", "483 M2_08 Draconid 2020 Venezuela, Bolivarian Republic of 9766.000000 \n", "471 M2_08 Draconid 2020 Slovenia 9743.666667 \n", "\n", " total_mmr mean_matches_per_player total_matches nation_rank \\\n", "432 31468 409.000000 1227 1.0 \n", "466 31439 636.000000 1908 2.0 \n", "463 31318 657.000000 1971 3.0 \n", "440 31199 440.000000 1320 4.0 \n", "450 31035 708.666667 2126 5.0 \n", ".. ... ... ... ... \n", "446 29361 264.000000 792 60.0 \n", "431 29348 310.000000 930 61.0 \n", "469 29318 325.000000 975 62.0 \n", "483 29298 271.333333 814 63.0 \n", "471 29231 273.333333 820 64.0 \n", "\n", " efficiency lei \n", "432 2.174409 43.974703 \n", "466 1.383124 34.881052 \n", "463 1.277524 32.745512 \n", "440 1.817424 38.122613 \n", "450 1.051270 27.985642 \n", ".. ... ... \n", "446 0.708333 11.509054 \n", "431 0.589247 10.374770 \n", "469 0.531282 9.577823 \n", "483 0.611794 10.077586 \n", "471 0.525610 8.689799 \n", "\n", "[64 rows x 9 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_stats[top_stats['season'] == 'M2_08 Draconid 2020'].sort_values('nation_rank')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Export everything\n", "\n", "Lets exports all data to Excel files so they can be further analysed without pandas/python. " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "full_df.to_excel('./output/player_stats.xlsx')\n", "national_stats.to_excel('./output/national_stats.xlsx')\n", "per_season_df.to_excel('./output/seasonal_stats.xlsx')\n", "player_summaries.to_excel('./output/player_summaries.xlsx')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
seasonprevious_top500count
2M2_02 Love 2020yes304
4M2_03 Bear 2020yes313
6M2_04 Elf 2020yes294
8M2_05 Viper 2020yes317
10M2_06 Magic 2020yes314
12M2_07 Griffin 2020yes277
14M2_08 Draconid 2020yes299
16M2_09 Dryad 2020yes264
18M2_10 Cat 2020yes277
20M2_11 Mahakam 2020yes266
22M2_12 Wild Hunt 2020yes242
24M3_01 Wolf 2021yes267
26M3_02 Love 2021yes286
28M3_03 Bear 2021yes286
30M3_04 Elf 2021yes266
32M3_05 Viper 2021yes284
34M3_06 Magic 2021yes270
36M3_07 Griffin 2021yes257
38M3_08 Draconid 2021yes230
40M3_09 Dryad 2021yes258
42M3_10 Cat 2021yes210
44M3_11 Mahakam 2021yes233
46M3_12 Wild Hunt 2021yes249
48M4_01 Wolf 2022yes299
50M4_02 Love 2022yes275
\n", "
" ], "text/plain": [ " season previous_top500 count\n", "2 M2_02 Love 2020 yes 304\n", "4 M2_03 Bear 2020 yes 313\n", "6 M2_04 Elf 2020 yes 294\n", "8 M2_05 Viper 2020 yes 317\n", "10 M2_06 Magic 2020 yes 314\n", "12 M2_07 Griffin 2020 yes 277\n", "14 M2_08 Draconid 2020 yes 299\n", "16 M2_09 Dryad 2020 yes 264\n", "18 M2_10 Cat 2020 yes 277\n", "20 M2_11 Mahakam 2020 yes 266\n", "22 M2_12 Wild Hunt 2020 yes 242\n", "24 M3_01 Wolf 2021 yes 267\n", "26 M3_02 Love 2021 yes 286\n", "28 M3_03 Bear 2021 yes 286\n", "30 M3_04 Elf 2021 yes 266\n", "32 M3_05 Viper 2021 yes 284\n", "34 M3_06 Magic 2021 yes 270\n", "36 M3_07 Griffin 2021 yes 257\n", "38 M3_08 Draconid 2021 yes 230\n", "40 M3_09 Dryad 2021 yes 258\n", "42 M3_10 Cat 2021 yes 210\n", "44 M3_11 Mahakam 2021 yes 233\n", "46 M3_12 Wild Hunt 2021 yes 249\n", "48 M4_01 Wolf 2022 yes 299\n", "50 M4_02 Love 2022 yes 275" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bracket = 500\n", "\n", "returning = full_df[full_df['rank'] <= bracket].groupby(['season', 'previous_top500']).agg(\n", " count = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "returning = returning[returning.previous_top500 == 'yes']\n", "returning" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "54.695999999999984" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "returning['%'] = returning['count'] * 100/bracket\n", "np.mean(returning['%'])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }