{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Gwent Pro Rank analysis\n", "\n", "First we'll need to get the data from the playgwent website. Here requests and BeautifulSoup are used to get the data and parse the html the comes back. Nothing to difficult here. There is a little clean up in the code and there are two lines of code to check if a player was in the top 500 last season. In case the data already exists it is loaded from disk." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [03:16<00:00, 1.32s/it]\n" ] } ], "source": [ "from tqdm import tqdm\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import os\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# Note that seasons should be in order (for the top 500 spots to be picked up correctly in the next step)\n", "seasons = [\n", " ('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2020.xlsx'),\n", " ('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/{page}', './output/season_of_love_2020.xlsx'),\n", " ('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/{page}', './output/season_of_the_bear_2020.xlsx'),\n", " ('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/{page}', './output/season_of_the_elf_2020.xlsx'),\n", " ('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/{page}', './output/season_of_the_viper_2020.xlsx'),\n", " ('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/{page}', './output/season_of_magic_2020.xlsx'),\n", " ('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2020.xlsx'),\n", " ('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2020.xlsx'),\n", " ('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2020.xlsx'),\n", " ('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/{page}', './output/season_of_the_cat_2020.xlsx'),\n", " ('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/{page}', './output/season_of_the_mahakam_2020.xlsx'),\n", " ('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2020.xlsx'),\n", " ('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2021.xlsx'),\n", " ('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/{page}', './output/season_of_love_2021.xlsx'),\n", " ('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/{page}', './output/season_of_the_bear_2021.xlsx'),\n", " ('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/{page}', './output/season_of_the_elf_2021.xlsx'),\n", " ('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/{page}', './output/season_of_the_viper_2021.xlsx'),\n", " ('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/{page}', './output/season_of_the_magic_2021.xlsx'),\n", " ('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2021.xlsx'),\n", " ('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2021.xlsx'),\n", " ('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2021.xlsx'),\n", " ('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/{page}', './output/season_of_the_cat_2021.xlsx'),\n", " ('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/{page}', './output/season_of_the_mahakam_2021.xlsx'),\n", " ('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2021.xlsx'),\n", " ('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2022.xlsx'),\n", " ('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/{page}', './output/season_of_love_2022.xlsx'),\n", "\n", "]\n", "\n", "os.makedirs('./output', exist_ok=True)\n", "\n", "dataframes = []\n", "top500 = []\n", "\n", "\n", "for season, url_template, output_path in seasons:\n", " if os.path.exists(output_path):\n", " df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)\n", " else:\n", " output = []\n", "\n", " for i in tqdm(range(1, 150)):\n", " url = url_template.replace('{page}', str(i))\n", " try:\n", " r = requests.get(url)\n", " soup = BeautifulSoup(r.text, 'html.parser')\n", "\n", " rows = soup.findAll(\"div\", {\"class\": \"c-ranking-table__tr\"})\n", " for row in rows:\n", " flag = row.find(\"i\", {\"class\": \"flag-icon\"})[\"class\"][1]\n", " new_record = {\n", " 'rank': int(row.find(\"div\", {\"class\": \"td-number\"}).text.strip()),\n", " 'name': row.find(\"div\", {\"class\": \"td-nick\"}).text.strip(),\n", " 'country': flag.replace('flag-icon-', '').upper(),\n", " 'matches': int(row.find(\"div\", {\"class\": \"td-matches\"}).text.strip().replace(' matches', '')),\n", " 'mmr': int(row.find(\"div\", {\"class\": \"td-mmr\"}).text.strip().replace(',', '')),\n", " 'season': season\n", " }\n", " output.append(new_record)\n", " except:\n", " pass\n", "\n", "\n", " df = pd.DataFrame(output).drop_duplicates()\n", " df.to_excel(output_path)\n", " \n", " # We'll add a column here if the player was in the top 500 last season\n", " df['previous_top500'] = ['yes' if name in top500 else 'no' for name in df['name']] \n", " top500 = list(df[df['rank'] <= 500]['name'])\n", " \n", " dataframes.append(df)\n", " \n", "# Concatenate dataframes and drop any records with missing values\n", "full_df = pd.concat(dataframes).dropna()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Human readable countries\n", "\n", "We have the 2 letter code for the country of each participant, let's use the pycountry library to convert that to the full name." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>rank</th>\n", " <th>name</th>\n", " <th>country</th>\n", " <th>matches</th>\n", " <th>mmr</th>\n", " <th>season</th>\n", " <th>previous_top500</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>kolemoen</td>\n", " <td>Germany</td>\n", " <td>431</td>\n", " <td>10484</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>kams134</td>\n", " <td>Poland</td>\n", " <td>923</td>\n", " <td>10477</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>TailBot</td>\n", " <td>Poland</td>\n", " <td>538</td>\n", " <td>10472</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Pajabol</td>\n", " <td>Poland</td>\n", " <td>820</td>\n", " <td>10471</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Adzikov</td>\n", " <td>Poland</td>\n", " <td>1105</td>\n", " <td>10442</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " rank name country matches mmr season previous_top500\n", "0 1 kolemoen Germany 431 10484 M2_01 Wolf 2020 no\n", "1 2 kams134 Poland 923 10477 M2_01 Wolf 2020 no\n", "2 3 TailBot Poland 538 10472 M2_01 Wolf 2020 no\n", "3 4 Pajabol Poland 820 10471 M2_01 Wolf 2020 no\n", "4 5 Adzikov Poland 1105 10442 M2_01 Wolf 2020 no" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pycountry\n", "\n", "def convert_to_full(two_letter_code):\n", " try:\n", " out = pycountry.countries.get(alpha_2=two_letter_code).name\n", " except:\n", " print(f'could not convert {two_letter_code}')\n", " out = two_letter_code\n", " return out\n", "\n", "full_df['country'] = full_df['country'].apply(lambda x: convert_to_full(x))\n", "full_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# National rank\n", "\n", "Apart from the global rank, we'll add a participant's national rank as well." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "full_df['national_rank'] = full_df.groupby(['country','season'])[\"mmr\"].rank(\"first\", ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ladder Efficiency\n", "\n", "While getting to a higher MMR is the main goal of ladder. There is an argument to be made that the number of games a participant plays to reach a certain point should not be overlooked. Someone that played fewer games to get to a certain MMR climbed the ladder in a more efficient way. There are two metrics that can be used, we take the MMR of a player, substract the minimal MMR, which is 9600, and divide that by the number of matches played. Though, as the higher up you are in ladder, the more fierce the competition becomes, to take this into account Lerio2 from Team Legacy proposed to divide by the square-root of the number of matches instead.\n", "\n", "One additional consideration is that players that were in the top 500 the previous season didn't have to climb back up to pro rank. Both metrics actually give them a major advantage. So when comparing efficiency between players the field previous_top500 could be used to allow a fair comparison." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "full_df['efficiency'] = ((full_df['mmr']-9600))/full_df['matches']\n", "full_df['lei'] = ((full_df['mmr']-9600))/np.sqrt(full_df['matches'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>rank</th>\n", " <th>name</th>\n", " <th>country</th>\n", " <th>matches</th>\n", " <th>mmr</th>\n", " <th>season</th>\n", " <th>previous_top500</th>\n", " <th>national_rank</th>\n", " <th>efficiency</th>\n", " <th>lei</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>kolemoen</td>\n", " <td>Germany</td>\n", " <td>431</td>\n", " <td>10484</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>1.0</td>\n", " <td>2.051044</td>\n", " <td>42.580782</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>kams134</td>\n", " <td>Poland</td>\n", " <td>923</td>\n", " <td>10477</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>1.0</td>\n", " <td>0.950163</td>\n", " <td>28.866807</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>TailBot</td>\n", " <td>Poland</td>\n", " <td>538</td>\n", " <td>10472</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>2.0</td>\n", " <td>1.620818</td>\n", " <td>37.594590</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Pajabol</td>\n", " <td>Poland</td>\n", " <td>820</td>\n", " <td>10471</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>3.0</td>\n", " <td>1.062195</td>\n", " <td>30.416639</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Adzikov</td>\n", " <td>Poland</td>\n", " <td>1105</td>\n", " <td>10442</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>4.0</td>\n", " <td>0.761991</td>\n", " <td>25.329753</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " rank name country matches mmr season previous_top500 \\\n", "0 1 kolemoen Germany 431 10484 M2_01 Wolf 2020 no \n", "1 2 kams134 Poland 923 10477 M2_01 Wolf 2020 no \n", "2 3 TailBot Poland 538 10472 M2_01 Wolf 2020 no \n", "3 4 Pajabol Poland 820 10471 M2_01 Wolf 2020 no \n", "4 5 Adzikov Poland 1105 10442 M2_01 Wolf 2020 no \n", "\n", " national_rank efficiency lei \n", "0 1.0 2.051044 42.580782 \n", "1 1.0 0.950163 28.866807 \n", "2 2.0 1.620818 37.594590 \n", "3 3.0 1.062195 30.416639 \n", "4 4.0 0.761991 25.329753 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "full_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Statistics per Season" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>min_mmr</th>\n", " <th>max_mmr</th>\n", " <th>num_matches</th>\n", " <th>top500_cutoff</th>\n", " <th>top200_cutoff</th>\n", " <th>top64_cutoff</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>2407</td>\n", " <td>10484</td>\n", " <td>699496</td>\n", " <td>9749</td>\n", " <td>9872</td>\n", " <td>10061</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>M2_02 Love 2020</td>\n", " <td>7776</td>\n", " <td>10537</td>\n", " <td>769172</td>\n", " <td>9832</td>\n", " <td>9952</td>\n", " <td>10117</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>M2_03 Bear 2020</td>\n", " <td>9427</td>\n", " <td>10669</td>\n", " <td>862283</td>\n", " <td>9867</td>\n", " <td>9995</td>\n", " <td>10204</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>M2_04 Elf 2020</td>\n", " <td>9666</td>\n", " <td>10751</td>\n", " <td>1004603</td>\n", " <td>9952</td>\n", " <td>10087</td>\n", " <td>10293</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>M2_05 Viper 2020</td>\n", " <td>9635</td>\n", " <td>10622</td>\n", " <td>859640</td>\n", " <td>9910</td>\n", " <td>10028</td>\n", " <td>10255</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>M2_06 Magic 2020</td>\n", " <td>9624</td>\n", " <td>10597</td>\n", " <td>793013</td>\n", " <td>9896</td>\n", " <td>10002</td>\n", " <td>10191</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>M2_07 Griffin 2020</td>\n", " <td>9698</td>\n", " <td>10667</td>\n", " <td>996516</td>\n", " <td>9978</td>\n", " <td>10100</td>\n", " <td>10289</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>9666</td>\n", " <td>10546</td>\n", " <td>837545</td>\n", " <td>9946</td>\n", " <td>10061</td>\n", " <td>10246</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>M2_09 Dryad 2020</td>\n", " <td>9678</td>\n", " <td>10725</td>\n", " <td>854593</td>\n", " <td>9946</td>\n", " <td>10046</td>\n", " <td>10183</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>M2_10 Cat 2020</td>\n", " <td>9703</td>\n", " <td>10804</td>\n", " <td>928845</td>\n", " <td>9977</td>\n", " <td>10067</td>\n", " <td>10176</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>M2_11 Mahakam 2020</td>\n", " <td>9706</td>\n", " <td>10783</td>\n", " <td>983150</td>\n", " <td>10000</td>\n", " <td>10090</td>\n", " <td>10216</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>M2_12 Wild Hunt 2020</td>\n", " <td>9756</td>\n", " <td>10724</td>\n", " <td>1182353</td>\n", " <td>10070</td>\n", " <td>10172</td>\n", " <td>10313</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>M3_01 Wolf 2021</td>\n", " <td>9637</td>\n", " <td>10653</td>\n", " <td>808651</td>\n", " <td>9916</td>\n", " <td>10044</td>\n", " <td>10295</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>M3_02 Love 2021</td>\n", " <td>9684</td>\n", " <td>10714</td>\n", " <td>917027</td>\n", " <td>9975</td>\n", " <td>10097</td>\n", " <td>10325</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>M3_03 Bear 2021</td>\n", " <td>9637</td>\n", " <td>10576</td>\n", " <td>766502</td>\n", " <td>9914</td>\n", " <td>10026</td>\n", " <td>10230</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>M3_04 Elf 2021</td>\n", " <td>9686</td>\n", " <td>10678</td>\n", " <td>944323</td>\n", " <td>9992</td>\n", " <td>10102</td>\n", " <td>10323</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>M3_05 Viper 2021</td>\n", " <td>9701</td>\n", " <td>10753</td>\n", " <td>956484</td>\n", " <td>9998</td>\n", " <td>10106</td>\n", " <td>10300</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>M3_06 Magic 2021</td>\n", " <td>9681</td>\n", " <td>10632</td>\n", " <td>869262</td>\n", " <td>9974</td>\n", " <td>10082</td>\n", " <td>10278</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>M3_07 Griffin 2021</td>\n", " <td>9669</td>\n", " <td>10633</td>\n", " <td>856103</td>\n", " <td>9958</td>\n", " <td>10067</td>\n", " <td>10287</td>\n", " </tr>\n", " <tr>\n", " <th>19</th>\n", " <td>M3_08 Draconid 2021</td>\n", " <td>9681</td>\n", " <td>10767</td>\n", " <td>911273</td>\n", " <td>9954</td>\n", " <td>10079</td>\n", " <td>10281</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>M3_09 Dryad 2021</td>\n", " <td>9688</td>\n", " <td>10809</td>\n", " <td>940655</td>\n", " <td>9968</td>\n", " <td>10078</td>\n", " <td>10250</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>M3_10 Cat 2021</td>\n", " <td>9614</td>\n", " <td>10366</td>\n", " <td>719696</td>\n", " <td>9879</td>\n", " <td>9974</td>\n", " <td>10083</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>M3_11 Mahakam 2021</td>\n", " <td>9725</td>\n", " <td>10580</td>\n", " <td>1017256</td>\n", " <td>10012</td>\n", " <td>10122</td>\n", " <td>10252</td>\n", " </tr>\n", " <tr>\n", " <th>23</th>\n", " <td>M3_12 Wild Hunt 2021</td>\n", " <td>9735</td>\n", " <td>10714</td>\n", " <td>1044941</td>\n", " <td>10032</td>\n", " <td>10140</td>\n", " <td>10271</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>M4_01 Wolf 2022</td>\n", " <td>9646</td>\n", " <td>10684</td>\n", " <td>883881</td>\n", " <td>9941</td>\n", " <td>10093</td>\n", " <td>10340</td>\n", " </tr>\n", " <tr>\n", " <th>25</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>9638</td>\n", " <td>10652</td>\n", " <td>852867</td>\n", " <td>9932</td>\n", " <td>10068</td>\n", " <td>10312</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " season min_mmr max_mmr num_matches top500_cutoff \\\n", "0 M2_01 Wolf 2020 2407 10484 699496 9749 \n", "1 M2_02 Love 2020 7776 10537 769172 9832 \n", "2 M2_03 Bear 2020 9427 10669 862283 9867 \n", "3 M2_04 Elf 2020 9666 10751 1004603 9952 \n", "4 M2_05 Viper 2020 9635 10622 859640 9910 \n", "5 M2_06 Magic 2020 9624 10597 793013 9896 \n", "6 M2_07 Griffin 2020 9698 10667 996516 9978 \n", "7 M2_08 Draconid 2020 9666 10546 837545 9946 \n", "8 M2_09 Dryad 2020 9678 10725 854593 9946 \n", "9 M2_10 Cat 2020 9703 10804 928845 9977 \n", "10 M2_11 Mahakam 2020 9706 10783 983150 10000 \n", "11 M2_12 Wild Hunt 2020 9756 10724 1182353 10070 \n", "12 M3_01 Wolf 2021 9637 10653 808651 9916 \n", "13 M3_02 Love 2021 9684 10714 917027 9975 \n", "14 M3_03 Bear 2021 9637 10576 766502 9914 \n", "15 M3_04 Elf 2021 9686 10678 944323 9992 \n", "16 M3_05 Viper 2021 9701 10753 956484 9998 \n", "17 M3_06 Magic 2021 9681 10632 869262 9974 \n", "18 M3_07 Griffin 2021 9669 10633 856103 9958 \n", "19 M3_08 Draconid 2021 9681 10767 911273 9954 \n", "20 M3_09 Dryad 2021 9688 10809 940655 9968 \n", "21 M3_10 Cat 2021 9614 10366 719696 9879 \n", "22 M3_11 Mahakam 2021 9725 10580 1017256 10012 \n", "23 M3_12 Wild Hunt 2021 9735 10714 1044941 10032 \n", "24 M4_01 Wolf 2022 9646 10684 883881 9941 \n", "25 M4_02 Love 2022 9638 10652 852867 9932 \n", "\n", " top200_cutoff top64_cutoff \n", "0 9872 10061 \n", "1 9952 10117 \n", "2 9995 10204 \n", "3 10087 10293 \n", "4 10028 10255 \n", "5 10002 10191 \n", "6 10100 10289 \n", "7 10061 10246 \n", "8 10046 10183 \n", "9 10067 10176 \n", "10 10090 10216 \n", "11 10172 10313 \n", "12 10044 10295 \n", "13 10097 10325 \n", "14 10026 10230 \n", "15 10102 10323 \n", "16 10106 10300 \n", "17 10082 10278 \n", "18 10067 10287 \n", "19 10079 10281 \n", "20 10078 10250 \n", "21 9974 10083 \n", "22 10122 10252 \n", "23 10140 10271 \n", "24 10093 10340 \n", "25 10068 10312 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "per_season_df = full_df.groupby(['season']).agg(\n", " min_mmr = pd.NamedAgg('mmr', 'min'),\n", " max_mmr = pd.NamedAgg('mmr', 'max'),\n", " num_matches = pd.NamedAgg('matches', 'sum')\n", ").reset_index()\n", "\n", "top500_cutoffs = full_df[full_df['rank'] == 500][['season', 'mmr']].rename(columns={'mmr': 'top500_cutoff'})\n", "top200_cutoffs = full_df[full_df['rank'] == 200][['season', 'mmr']].rename(columns={'mmr': 'top200_cutoff'})\n", "top64_cutoffs = full_df[full_df['rank'] == 64][['season', 'mmr']].rename(columns={'mmr': 'top64_cutoff'})\n", "\n", "per_season_df = pd.merge(per_season_df, top500_cutoffs, on='season')\n", "per_season_df = pd.merge(per_season_df, top200_cutoffs, on='season')\n", "per_season_df = pd.merge(per_season_df, top64_cutoffs, on='season')\n", "per_season_df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Player Summaries\n", "\n", "We can summerize player stats over all seasons easily using the code below. We'll store the number of times they were on pro ladder, the worst, best and average MMR scores, ..." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>name</th>\n", " <th>country</th>\n", " <th>appearances</th>\n", " <th>min_mmr</th>\n", " <th>mean_mmr</th>\n", " <th>max_mmr</th>\n", " <th>mean_matches</th>\n", " <th>num_matches</th>\n", " <th>best_rank</th>\n", " <th>best_national_rank</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>$Lancer$</td>\n", " <td>China</td>\n", " <td>2</td>\n", " <td>9710</td>\n", " <td>9710</td>\n", " <td>9792</td>\n", " <td>545.50</td>\n", " <td>1091</td>\n", " <td>2011</td>\n", " <td>272.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>$rBuRn$</td>\n", " <td>Spain</td>\n", " <td>2</td>\n", " <td>9767</td>\n", " <td>9767</td>\n", " <td>9831</td>\n", " <td>225.00</td>\n", " <td>450</td>\n", " <td>1176</td>\n", " <td>29.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>$r_BuRn$</td>\n", " <td>Spain</td>\n", " <td>4</td>\n", " <td>9701</td>\n", " <td>9701</td>\n", " <td>9848</td>\n", " <td>123.75</td>\n", " <td>495</td>\n", " <td>968</td>\n", " <td>25.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>++++Gwynbleidd++++</td>\n", " <td>Poland</td>\n", " <td>1</td>\n", " <td>9803</td>\n", " <td>9803</td>\n", " <td>9803</td>\n", " <td>231.00</td>\n", " <td>231</td>\n", " <td>1482</td>\n", " <td>122.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>+Nightshadow+</td>\n", " <td>Ukraine</td>\n", " <td>1</td>\n", " <td>9677</td>\n", " <td>9677</td>\n", " <td>9677</td>\n", " <td>365.00</td>\n", " <td>365</td>\n", " <td>2261</td>\n", " <td>187.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " name country appearances min_mmr mean_mmr max_mmr \\\n", "0 $Lancer$ China 2 9710 9710 9792 \n", "1 $rBuRn$ Spain 2 9767 9767 9831 \n", "2 $r_BuRn$ Spain 4 9701 9701 9848 \n", "3 ++++Gwynbleidd++++ Poland 1 9803 9803 9803 \n", "4 +Nightshadow+ Ukraine 1 9677 9677 9677 \n", "\n", " mean_matches num_matches best_rank best_national_rank \n", "0 545.50 1091 2011 272.0 \n", "1 225.00 450 1176 29.0 \n", "2 123.75 495 968 25.0 \n", "3 231.00 231 1482 122.0 \n", "4 365.00 365 2261 187.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "player_summaries = full_df.groupby(['name', 'country']).agg(\n", " appearances = pd.NamedAgg('matches', 'count'),\n", " min_mmr = pd.NamedAgg('mmr', 'min'),\n", " mean_mmr = pd.NamedAgg('mmr', 'min'),\n", " max_mmr = pd.NamedAgg('mmr', 'max'),\n", " mean_matches = pd.NamedAgg('matches', 'mean'),\n", " num_matches = pd.NamedAgg('matches', 'sum'),\n", " best_rank = pd.NamedAgg('rank', 'min'),\n", " best_national_rank = pd.NamedAgg('national_rank', 'min'),\n", ").reset_index()\n", "\n", "player_summaries.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>name</th>\n", " <th>country</th>\n", " <th>appearances</th>\n", " <th>min_mmr</th>\n", " <th>mean_mmr</th>\n", " <th>max_mmr</th>\n", " <th>mean_matches</th>\n", " <th>num_matches</th>\n", " <th>best_rank</th>\n", " <th>best_national_rank</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>21691</th>\n", " <td>sepro</td>\n", " <td>Belgium</td>\n", " <td>7</td>\n", " <td>9724</td>\n", " <td>9724</td>\n", " <td>9903</td>\n", " <td>259.142857</td>\n", " <td>1814</td>\n", " <td>680</td>\n", " <td>2.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " name country appearances min_mmr mean_mmr max_mmr mean_matches \\\n", "21691 sepro Belgium 7 9724 9724 9903 259.142857 \n", "\n", " num_matches best_rank best_national_rank \n", "21691 1814 680 2.0 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "player_summaries[player_summaries['name'] == 'sepro']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Get population for each country\n", "\n", "To calculate the number of pro players per capita we'll need the number of inhabitants per country. On Kaggle a dataset was available for 2019. As this doesn't differ much from one year to another (and those will only result into changes several digits behind the decimal point it) the 2019 list is enough for our purpose. The code below loads the data, and generates country names from the two character ISO code using the same package as used before to avoid mismatches.\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "could not convert nan\n" ] } ], "source": [ "# data from kaggle https://www.kaggle.com/erikbruin/countries-of-the-world-iso-codes-and-population/data#\n", "# the number are indicated very strangely in the csv with a decimal point and need to be multiplied with 1000\n", "\n", "population_df = pd.read_csv('./data/countries_by_population_2019.csv')[['name', 'pop2019']]\n", "country_codes = pd.read_csv('./data/country_codes_2020.csv')[['name', 'cca2']]\n", "population_df = pd.merge(population_df, country_codes, how='left', left_on='name', right_on='name')\n", "\n", "population_df['country'] = population_df['cca2'].apply(lambda x: convert_to_full(x))\n", "population_df['pop2019'] = (population_df['pop2019'] * 1000).astype(int)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>total_matches</th>\n", " <th>num_players</th>\n", " <th>pop2019</th>\n", " <th>pro_players_per_million</th>\n", " <th>matches_per_player</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Algeria</td>\n", " <td>250</td>\n", " <td>1</td>\n", " <td>43053054.0</td>\n", " <td>0.023227</td>\n", " <td>250.000000</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Argentina</td>\n", " <td>1480</td>\n", " <td>4</td>\n", " <td>44780677.0</td>\n", " <td>0.089324</td>\n", " <td>370.000000</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Armenia</td>\n", " <td>546</td>\n", " <td>1</td>\n", " <td>2957731.0</td>\n", " <td>0.338097</td>\n", " <td>546.000000</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Australia</td>\n", " <td>6193</td>\n", " <td>27</td>\n", " <td>25203198.0</td>\n", " <td>1.071293</td>\n", " <td>229.370370</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Austria</td>\n", " <td>3981</td>\n", " <td>16</td>\n", " <td>8955102.0</td>\n", " <td>1.786691</td>\n", " <td>248.812500</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2313</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>United Kingdom</td>\n", " <td>20656</td>\n", " <td>71</td>\n", " <td>67530172.0</td>\n", " <td>1.051382</td>\n", " <td>290.929577</td>\n", " </tr>\n", " <tr>\n", " <th>2314</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>United States</td>\n", " <td>47946</td>\n", " <td>161</td>\n", " <td>329064917.0</td>\n", " <td>0.489265</td>\n", " <td>297.801242</td>\n", " </tr>\n", " <tr>\n", " <th>2315</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Uzbekistan</td>\n", " <td>1116</td>\n", " <td>5</td>\n", " <td>32981716.0</td>\n", " <td>0.151599</td>\n", " <td>223.200000</td>\n", " </tr>\n", " <tr>\n", " <th>2316</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Viet Nam</td>\n", " <td>2277</td>\n", " <td>10</td>\n", " <td>96462106.0</td>\n", " <td>0.103668</td>\n", " <td>227.700000</td>\n", " </tr>\n", " <tr>\n", " <th>2317</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Zimbabwe</td>\n", " <td>199</td>\n", " <td>1</td>\n", " <td>14645468.0</td>\n", " <td>0.068281</td>\n", " <td>199.000000</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>2318 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ " season country total_matches num_players \\\n", "0 M2_01 Wolf 2020 Algeria 250 1 \n", "1 M2_01 Wolf 2020 Argentina 1480 4 \n", "2 M2_01 Wolf 2020 Armenia 546 1 \n", "3 M2_01 Wolf 2020 Australia 6193 27 \n", "4 M2_01 Wolf 2020 Austria 3981 16 \n", "... ... ... ... ... \n", "2313 M4_02 Love 2022 United Kingdom 20656 71 \n", "2314 M4_02 Love 2022 United States 47946 161 \n", "2315 M4_02 Love 2022 Uzbekistan 1116 5 \n", "2316 M4_02 Love 2022 Viet Nam 2277 10 \n", "2317 M4_02 Love 2022 Zimbabwe 199 1 \n", "\n", " pop2019 pro_players_per_million matches_per_player \n", "0 43053054.0 0.023227 250.000000 \n", "1 44780677.0 0.089324 370.000000 \n", "2 2957731.0 0.338097 546.000000 \n", "3 25203198.0 1.071293 229.370370 \n", "4 8955102.0 1.786691 248.812500 \n", "... ... ... ... \n", "2313 67530172.0 1.051382 290.929577 \n", "2314 329064917.0 0.489265 297.801242 \n", "2315 32981716.0 0.151599 223.200000 \n", "2316 96462106.0 0.103668 227.700000 \n", "2317 14645468.0 0.068281 199.000000 \n", "\n", "[2318 rows x 7 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Lets do some counts per season and country\n", "national_stats = full_df.groupby(['season','country']).agg(\n", " total_matches = pd.NamedAgg('matches', 'sum'),\n", " num_players = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "\n", "# Add the population (from 2019) to each country and calculate the number of pro players (top 2860) per million inhabitants\n", "national_stats = pd.merge(national_stats, population_df.drop(['name', 'cca2'], axis=1), on='country', how='left')\n", "national_stats['pro_players_per_million'] = (national_stats['num_players']*1000000)/national_stats['pop2019']\n", "national_stats['matches_per_player'] = national_stats['total_matches']/national_stats['num_players']\n", "\n", "national_stats" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Adding popularity metrics\n", "\n", "\n", "We can now calculate the number of pro players per million inhabitants, this could be a proxy for Gwent's popularity in \n", "different parts of the world" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>total_matches</th>\n", " <th>num_players</th>\n", " <th>pro_players_per_million</th>\n", " <th>matches_per_player</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>657</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Poland</td>\n", " <td>72225</td>\n", " <td>267</td>\n", " <td>7.047129</td>\n", " <td>270.505618</td>\n", " </tr>\n", " <tr>\n", " <th>617</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Estonia</td>\n", " <td>1726</td>\n", " <td>7</td>\n", " <td>5.280436</td>\n", " <td>246.571429</td>\n", " </tr>\n", " <tr>\n", " <th>660</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Russian Federation</td>\n", " <td>195594</td>\n", " <td>672</td>\n", " <td>4.606770</td>\n", " <td>291.062500</td>\n", " </tr>\n", " <tr>\n", " <th>599</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Belarus</td>\n", " <td>10261</td>\n", " <td>39</td>\n", " <td>4.125931</td>\n", " <td>263.102564</td>\n", " </tr>\n", " <tr>\n", " <th>677</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Ukraine</td>\n", " <td>52333</td>\n", " <td>162</td>\n", " <td>3.682351</td>\n", " <td>323.043210</td>\n", " </tr>\n", " <tr>\n", " <th>626</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Iceland</td>\n", " <td>307</td>\n", " <td>1</td>\n", " <td>2.949583</td>\n", " <td>307.000000</td>\n", " </tr>\n", " <tr>\n", " <th>618</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Finland</td>\n", " <td>4053</td>\n", " <td>14</td>\n", " <td>2.530659</td>\n", " <td>289.500000</td>\n", " </tr>\n", " <tr>\n", " <th>624</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Hong Kong</td>\n", " <td>5154</td>\n", " <td>18</td>\n", " <td>2.420606</td>\n", " <td>286.333333</td>\n", " </tr>\n", " <tr>\n", " <th>644</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Malta</td>\n", " <td>229</td>\n", " <td>1</td>\n", " <td>2.270807</td>\n", " <td>229.000000</td>\n", " </tr>\n", " <tr>\n", " <th>610</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Croatia</td>\n", " <td>2247</td>\n", " <td>9</td>\n", " <td>2.179016</td>\n", " <td>249.666667</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " season country total_matches num_players \\\n", "657 M2_08 Draconid 2020 Poland 72225 267 \n", "617 M2_08 Draconid 2020 Estonia 1726 7 \n", "660 M2_08 Draconid 2020 Russian Federation 195594 672 \n", "599 M2_08 Draconid 2020 Belarus 10261 39 \n", "677 M2_08 Draconid 2020 Ukraine 52333 162 \n", "626 M2_08 Draconid 2020 Iceland 307 1 \n", "618 M2_08 Draconid 2020 Finland 4053 14 \n", "624 M2_08 Draconid 2020 Hong Kong 5154 18 \n", "644 M2_08 Draconid 2020 Malta 229 1 \n", "610 M2_08 Draconid 2020 Croatia 2247 9 \n", "\n", " pro_players_per_million matches_per_player \n", "657 7.047129 270.505618 \n", "617 5.280436 246.571429 \n", "660 4.606770 291.062500 \n", "599 4.125931 263.102564 \n", "677 3.682351 323.043210 \n", "626 2.949583 307.000000 \n", "618 2.530659 289.500000 \n", "624 2.420606 286.333333 \n", "644 2.270807 229.000000 \n", "610 2.179016 249.666667 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('pro_players_per_million', ascending=False).drop(columns=['pop2019'])\n", "top_popularity.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "An alternative way is to find places where the die-hard players are located, those that play the most games per season on average." ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>total_matches</th>\n", " <th>num_players</th>\n", " <th>pro_players_per_million</th>\n", " <th>matches_per_player</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>611</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Cyprus</td>\n", " <td>1217</td>\n", " <td>2</td>\n", " <td>1.668648</td>\n", " <td>608.500000</td>\n", " </tr>\n", " <tr>\n", " <th>608</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Congo, The Democratic Republic of the</td>\n", " <td>570</td>\n", " <td>1</td>\n", " <td>0.011522</td>\n", " <td>570.000000</td>\n", " </tr>\n", " <tr>\n", " <th>593</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Armenia</td>\n", " <td>1002</td>\n", " <td>2</td>\n", " <td>0.676194</td>\n", " <td>501.000000</td>\n", " </tr>\n", " <tr>\n", " <th>631</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Ireland</td>\n", " <td>2988</td>\n", " <td>6</td>\n", " <td>1.228880</td>\n", " <td>498.000000</td>\n", " </tr>\n", " <tr>\n", " <th>596</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Azerbaijan</td>\n", " <td>1447</td>\n", " <td>3</td>\n", " <td>0.298575</td>\n", " <td>482.333333</td>\n", " </tr>\n", " <tr>\n", " <th>654</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Panama</td>\n", " <td>453</td>\n", " <td>1</td>\n", " <td>0.235491</td>\n", " <td>453.000000</td>\n", " </tr>\n", " <tr>\n", " <th>669</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Switzerland</td>\n", " <td>2580</td>\n", " <td>6</td>\n", " <td>0.698376</td>\n", " <td>430.000000</td>\n", " </tr>\n", " <tr>\n", " <th>603</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Bulgaria</td>\n", " <td>5516</td>\n", " <td>13</td>\n", " <td>1.857111</td>\n", " <td>424.307692</td>\n", " </tr>\n", " <tr>\n", " <th>662</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Serbia</td>\n", " <td>4074</td>\n", " <td>10</td>\n", " <td>1.139960</td>\n", " <td>407.400000</td>\n", " </tr>\n", " <tr>\n", " <th>656</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Philippines</td>\n", " <td>5347</td>\n", " <td>14</td>\n", " <td>0.129490</td>\n", " <td>381.928571</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " season country \\\n", "611 M2_08 Draconid 2020 Cyprus \n", "608 M2_08 Draconid 2020 Congo, The Democratic Republic of the \n", "593 M2_08 Draconid 2020 Armenia \n", "631 M2_08 Draconid 2020 Ireland \n", "596 M2_08 Draconid 2020 Azerbaijan \n", "654 M2_08 Draconid 2020 Panama \n", "669 M2_08 Draconid 2020 Switzerland \n", "603 M2_08 Draconid 2020 Bulgaria \n", "662 M2_08 Draconid 2020 Serbia \n", "656 M2_08 Draconid 2020 Philippines \n", "\n", " total_matches num_players pro_players_per_million matches_per_player \n", "611 1217 2 1.668648 608.500000 \n", "608 570 1 0.011522 570.000000 \n", "593 1002 2 0.676194 501.000000 \n", "631 2988 6 1.228880 498.000000 \n", "596 1447 3 0.298575 482.333333 \n", "654 453 1 0.235491 453.000000 \n", "669 2580 6 0.698376 430.000000 \n", "603 5516 13 1.857111 424.307692 \n", "662 4074 10 1.139960 407.400000 \n", "656 5347 14 0.129490 381.928571 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('matches_per_player', ascending=False).drop(columns=['pop2019'])\n", "top_popularity.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Creating a ranking of nations\n", "\n", "We'll select countries with three or more playing in the pro rank, select the top 3 players and calculate the average MMR and total MMR for the top\n", "3 players of the country.\n", "\n" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>rank</th>\n", " <th>name</th>\n", " <th>matches</th>\n", " <th>mmr</th>\n", " <th>previous_top500</th>\n", " <th>national_rank</th>\n", " <th>efficiency</th>\n", " <th>lei</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Argentina</td>\n", " <td>174</td>\n", " <td>Srchenko</td>\n", " <td>393</td>\n", " <td>9887</td>\n", " <td>no</td>\n", " <td>1.0</td>\n", " <td>0.730280</td>\n", " <td>14.477235</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Argentina</td>\n", " <td>471</td>\n", " <td>eltroco</td>\n", " <td>339</td>\n", " <td>9758</td>\n", " <td>no</td>\n", " <td>2.0</td>\n", " <td>0.466077</td>\n", " <td>8.581382</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Argentina</td>\n", " <td>846</td>\n", " <td>MorgannFreemann</td>\n", " <td>208</td>\n", " <td>9660</td>\n", " <td>no</td>\n", " <td>3.0</td>\n", " <td>0.288462</td>\n", " <td>4.160251</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Australia</td>\n", " <td>135</td>\n", " <td>whitecool</td>\n", " <td>404</td>\n", " <td>9914</td>\n", " <td>no</td>\n", " <td>1.0</td>\n", " <td>0.777228</td>\n", " <td>15.622084</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Australia</td>\n", " <td>211</td>\n", " <td>Pacifier3750</td>\n", " <td>453</td>\n", " <td>9863</td>\n", " <td>no</td>\n", " <td>2.0</td>\n", " <td>0.580574</td>\n", " <td>12.356818</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>73410</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Uzbekistan</td>\n", " <td>1045</td>\n", " <td>Frag.</td>\n", " <td>156</td>\n", " <td>9781</td>\n", " <td>no</td>\n", " <td>2.0</td>\n", " <td>1.160256</td>\n", " <td>14.491598</td>\n", " </tr>\n", " <tr>\n", " <th>73411</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Uzbekistan</td>\n", " <td>1119</td>\n", " <td>SMEAGOOL</td>\n", " <td>180</td>\n", " <td>9770</td>\n", " <td>no</td>\n", " <td>3.0</td>\n", " <td>0.944444</td>\n", " <td>12.671052</td>\n", " </tr>\n", " <tr>\n", " <th>73414</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Viet Nam</td>\n", " <td>655</td>\n", " <td>HaoThien</td>\n", " <td>325</td>\n", " <td>9868</td>\n", " <td>no</td>\n", " <td>1.0</td>\n", " <td>0.824615</td>\n", " <td>14.865965</td>\n", " </tr>\n", " <tr>\n", " <th>73415</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Viet Nam</td>\n", " <td>940</td>\n", " <td>captainstef</td>\n", " <td>401</td>\n", " <td>9800</td>\n", " <td>no</td>\n", " <td>2.0</td>\n", " <td>0.498753</td>\n", " <td>9.987523</td>\n", " </tr>\n", " <tr>\n", " <th>73416</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Viet Nam</td>\n", " <td>1381</td>\n", " <td>__cAnh</td>\n", " <td>292</td>\n", " <td>9734</td>\n", " <td>no</td>\n", " <td>3.0</td>\n", " <td>0.458904</td>\n", " <td>7.841757</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>4956 rows × 10 columns</p>\n", "</div>" ], "text/plain": [ " season country rank name matches mmr \\\n", "0 M2_01 Wolf 2020 Argentina 174 Srchenko 393 9887 \n", "1 M2_01 Wolf 2020 Argentina 471 eltroco 339 9758 \n", "2 M2_01 Wolf 2020 Argentina 846 MorgannFreemann 208 9660 \n", "4 M2_01 Wolf 2020 Australia 135 whitecool 404 9914 \n", "5 M2_01 Wolf 2020 Australia 211 Pacifier3750 453 9863 \n", "... ... ... ... ... ... ... \n", "73410 M4_02 Love 2022 Uzbekistan 1045 Frag. 156 9781 \n", "73411 M4_02 Love 2022 Uzbekistan 1119 SMEAGOOL 180 9770 \n", "73414 M4_02 Love 2022 Viet Nam 655 HaoThien 325 9868 \n", "73415 M4_02 Love 2022 Viet Nam 940 captainstef 401 9800 \n", "73416 M4_02 Love 2022 Viet Nam 1381 __cAnh 292 9734 \n", "\n", " previous_top500 national_rank efficiency lei \n", "0 no 1.0 0.730280 14.477235 \n", "1 no 2.0 0.466077 8.581382 \n", "2 no 3.0 0.288462 4.160251 \n", "4 no 1.0 0.777228 15.622084 \n", "5 no 2.0 0.580574 12.356818 \n", "... ... ... ... ... \n", "73410 no 2.0 1.160256 14.491598 \n", "73411 no 3.0 0.944444 12.671052 \n", "73414 no 1.0 0.824615 14.865965 \n", "73415 no 2.0 0.498753 9.987523 \n", "73416 no 3.0 0.458904 7.841757 \n", "\n", "[4956 rows x 10 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "num_player_included = 3 # Here we define the number of players in Pro Ladder to be considered\n", "\n", "players_per_country = full_df.groupby(['season','country']).agg(\n", " num_players = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "\n", "players_per_country = players_per_country[players_per_country['num_players'] >= num_player_included]\n", "\n", "top_per_country = pd.merge(players_per_country[['season', 'country']], full_df, how='inner', on=['season', 'country'])\n", "top_per_country = top_per_country[top_per_country['national_rank'] <= num_player_included]\n", "top_per_country" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>mean_mmr</th>\n", " <th>total_mmr</th>\n", " <th>mean_matches_per_player</th>\n", " <th>total_matches</th>\n", " <th>nation_rank</th>\n", " <th>efficiency</th>\n", " <th>lei</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Argentina</td>\n", " <td>9768.333333</td>\n", " <td>29305</td>\n", " <td>313.333333</td>\n", " <td>940</td>\n", " <td>34.0</td>\n", " <td>0.537234</td>\n", " <td>9.509700</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Australia</td>\n", " <td>9869.666667</td>\n", " <td>29609</td>\n", " <td>336.333333</td>\n", " <td>1009</td>\n", " <td>22.0</td>\n", " <td>0.801784</td>\n", " <td>14.704231</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Austria</td>\n", " <td>9905.666667</td>\n", " <td>29717</td>\n", " <td>392.333333</td>\n", " <td>1177</td>\n", " <td>19.0</td>\n", " <td>0.779099</td>\n", " <td>15.431938</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Belarus</td>\n", " <td>10064.333333</td>\n", " <td>30193</td>\n", " <td>390.000000</td>\n", " <td>1170</td>\n", " <td>11.0</td>\n", " <td>1.190598</td>\n", " <td>23.512432</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>Belgium</td>\n", " <td>9901.000000</td>\n", " <td>29703</td>\n", " <td>415.666667</td>\n", " <td>1247</td>\n", " <td>21.0</td>\n", " <td>0.724138</td>\n", " <td>14.763655</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>1647</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Ukraine</td>\n", " <td>10458.000000</td>\n", " <td>31374</td>\n", " <td>748.666667</td>\n", " <td>2246</td>\n", " <td>3.0</td>\n", " <td>1.146037</td>\n", " <td>31.357616</td>\n", " </tr>\n", " <tr>\n", " <th>1648</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>United Kingdom</td>\n", " <td>10223.666667</td>\n", " <td>30671</td>\n", " <td>692.000000</td>\n", " <td>2076</td>\n", " <td>12.0</td>\n", " <td>0.901252</td>\n", " <td>23.708249</td>\n", " </tr>\n", " <tr>\n", " <th>1649</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>United States</td>\n", " <td>10201.333333</td>\n", " <td>30604</td>\n", " <td>562.666667</td>\n", " <td>1688</td>\n", " <td>14.0</td>\n", " <td>1.068720</td>\n", " <td>25.350684</td>\n", " </tr>\n", " <tr>\n", " <th>1650</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Uzbekistan</td>\n", " <td>9810.333333</td>\n", " <td>29431</td>\n", " <td>220.333333</td>\n", " <td>661</td>\n", " <td>53.0</td>\n", " <td>0.954614</td>\n", " <td>14.169940</td>\n", " </tr>\n", " <tr>\n", " <th>1651</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>Viet Nam</td>\n", " <td>9800.666667</td>\n", " <td>29402</td>\n", " <td>339.333333</td>\n", " <td>1018</td>\n", " <td>56.0</td>\n", " <td>0.591356</td>\n", " <td>10.893363</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>1652 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " season country mean_mmr total_mmr \\\n", "0 M2_01 Wolf 2020 Argentina 9768.333333 29305 \n", "1 M2_01 Wolf 2020 Australia 9869.666667 29609 \n", "2 M2_01 Wolf 2020 Austria 9905.666667 29717 \n", "3 M2_01 Wolf 2020 Belarus 10064.333333 30193 \n", "4 M2_01 Wolf 2020 Belgium 9901.000000 29703 \n", "... ... ... ... ... \n", "1647 M4_02 Love 2022 Ukraine 10458.000000 31374 \n", "1648 M4_02 Love 2022 United Kingdom 10223.666667 30671 \n", "1649 M4_02 Love 2022 United States 10201.333333 30604 \n", "1650 M4_02 Love 2022 Uzbekistan 9810.333333 29431 \n", "1651 M4_02 Love 2022 Viet Nam 9800.666667 29402 \n", "\n", " mean_matches_per_player total_matches nation_rank efficiency \\\n", "0 313.333333 940 34.0 0.537234 \n", "1 336.333333 1009 22.0 0.801784 \n", "2 392.333333 1177 19.0 0.779099 \n", "3 390.000000 1170 11.0 1.190598 \n", "4 415.666667 1247 21.0 0.724138 \n", "... ... ... ... ... \n", "1647 748.666667 2246 3.0 1.146037 \n", "1648 692.000000 2076 12.0 0.901252 \n", "1649 562.666667 1688 14.0 1.068720 \n", "1650 220.333333 661 53.0 0.954614 \n", "1651 339.333333 1018 56.0 0.591356 \n", "\n", " lei \n", "0 9.509700 \n", "1 14.704231 \n", "2 15.431938 \n", "3 23.512432 \n", "4 14.763655 \n", "... ... \n", "1647 31.357616 \n", "1648 23.708249 \n", "1649 25.350684 \n", "1650 14.169940 \n", "1651 10.893363 \n", "\n", "[1652 rows x 9 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_stats = top_per_country.groupby(['season','country']).agg(\n", " mean_mmr = pd.NamedAgg('mmr', 'mean'),\n", " total_mmr = pd.NamedAgg('mmr', 'sum'),\n", " mean_matches_per_player = pd.NamedAgg('matches', 'mean'),\n", " total_matches = pd.NamedAgg('matches', 'sum')\n", ").reset_index()\n", "\n", "# Here we add the same ranking and effeciency statistics for national teams as for individual players\n", "top_stats['nation_rank'] = top_stats.groupby(['season'])[\"mean_mmr\"].rank(\"first\", ascending=False)\n", "top_stats['efficiency'] = ((top_stats['mean_mmr']-9600))/top_stats['mean_matches_per_player']\n", "top_stats['lei'] = ((top_stats['mean_mmr']-9600))/np.sqrt(top_stats['mean_matches_per_player'])\n", "\n", "top_stats" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>country</th>\n", " <th>mean_mmr</th>\n", " <th>total_mmr</th>\n", " <th>mean_matches_per_player</th>\n", " <th>total_matches</th>\n", " <th>nation_rank</th>\n", " <th>efficiency</th>\n", " <th>lei</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>432</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>China</td>\n", " <td>10489.333333</td>\n", " <td>31468</td>\n", " <td>409.000000</td>\n", " <td>1227</td>\n", " <td>1.0</td>\n", " <td>2.174409</td>\n", " <td>43.974703</td>\n", " </tr>\n", " <tr>\n", " <th>466</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Russian Federation</td>\n", " <td>10479.666667</td>\n", " <td>31439</td>\n", " <td>636.000000</td>\n", " <td>1908</td>\n", " <td>2.0</td>\n", " <td>1.383124</td>\n", " <td>34.881052</td>\n", " </tr>\n", " <tr>\n", " <th>463</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Poland</td>\n", " <td>10439.333333</td>\n", " <td>31318</td>\n", " <td>657.000000</td>\n", " <td>1971</td>\n", " <td>3.0</td>\n", " <td>1.277524</td>\n", " <td>32.745512</td>\n", " </tr>\n", " <tr>\n", " <th>440</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Germany</td>\n", " <td>10399.666667</td>\n", " <td>31199</td>\n", " <td>440.000000</td>\n", " <td>1320</td>\n", " <td>4.0</td>\n", " <td>1.817424</td>\n", " <td>38.122613</td>\n", " </tr>\n", " <tr>\n", " <th>450</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Japan</td>\n", " <td>10345.000000</td>\n", " <td>31035</td>\n", " <td>708.666667</td>\n", " <td>2126</td>\n", " <td>5.0</td>\n", " <td>1.051270</td>\n", " <td>27.985642</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>446</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Iran, Islamic Republic of</td>\n", " <td>9787.000000</td>\n", " <td>29361</td>\n", " <td>264.000000</td>\n", " <td>792</td>\n", " <td>60.0</td>\n", " <td>0.708333</td>\n", " <td>11.509054</td>\n", " </tr>\n", " <tr>\n", " <th>431</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Chile</td>\n", " <td>9782.666667</td>\n", " <td>29348</td>\n", " <td>310.000000</td>\n", " <td>930</td>\n", " <td>61.0</td>\n", " <td>0.589247</td>\n", " <td>10.374770</td>\n", " </tr>\n", " <tr>\n", " <th>469</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Singapore</td>\n", " <td>9772.666667</td>\n", " <td>29318</td>\n", " <td>325.000000</td>\n", " <td>975</td>\n", " <td>62.0</td>\n", " <td>0.531282</td>\n", " <td>9.577823</td>\n", " </tr>\n", " <tr>\n", " <th>483</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Venezuela, Bolivarian Republic of</td>\n", " <td>9766.000000</td>\n", " <td>29298</td>\n", " <td>271.333333</td>\n", " <td>814</td>\n", " <td>63.0</td>\n", " <td>0.611794</td>\n", " <td>10.077586</td>\n", " </tr>\n", " <tr>\n", " <th>471</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>Slovenia</td>\n", " <td>9743.666667</td>\n", " <td>29231</td>\n", " <td>273.333333</td>\n", " <td>820</td>\n", " <td>64.0</td>\n", " <td>0.525610</td>\n", " <td>8.689799</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>64 rows × 9 columns</p>\n", "</div>" ], "text/plain": [ " season country mean_mmr \\\n", "432 M2_08 Draconid 2020 China 10489.333333 \n", "466 M2_08 Draconid 2020 Russian Federation 10479.666667 \n", "463 M2_08 Draconid 2020 Poland 10439.333333 \n", "440 M2_08 Draconid 2020 Germany 10399.666667 \n", "450 M2_08 Draconid 2020 Japan 10345.000000 \n", ".. ... ... ... \n", "446 M2_08 Draconid 2020 Iran, Islamic Republic of 9787.000000 \n", "431 M2_08 Draconid 2020 Chile 9782.666667 \n", "469 M2_08 Draconid 2020 Singapore 9772.666667 \n", "483 M2_08 Draconid 2020 Venezuela, Bolivarian Republic of 9766.000000 \n", "471 M2_08 Draconid 2020 Slovenia 9743.666667 \n", "\n", " total_mmr mean_matches_per_player total_matches nation_rank \\\n", "432 31468 409.000000 1227 1.0 \n", "466 31439 636.000000 1908 2.0 \n", "463 31318 657.000000 1971 3.0 \n", "440 31199 440.000000 1320 4.0 \n", "450 31035 708.666667 2126 5.0 \n", ".. ... ... ... ... \n", "446 29361 264.000000 792 60.0 \n", "431 29348 310.000000 930 61.0 \n", "469 29318 325.000000 975 62.0 \n", "483 29298 271.333333 814 63.0 \n", "471 29231 273.333333 820 64.0 \n", "\n", " efficiency lei \n", "432 2.174409 43.974703 \n", "466 1.383124 34.881052 \n", "463 1.277524 32.745512 \n", "440 1.817424 38.122613 \n", "450 1.051270 27.985642 \n", ".. ... ... \n", "446 0.708333 11.509054 \n", "431 0.589247 10.374770 \n", "469 0.531282 9.577823 \n", "483 0.611794 10.077586 \n", "471 0.525610 8.689799 \n", "\n", "[64 rows x 9 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "top_stats[top_stats['season'] == 'M2_08 Draconid 2020'].sort_values('nation_rank')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Export everything\n", "\n", "Lets exports all data to Excel files so they can be further analysed without pandas/python. " ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "full_df.to_excel('./output/player_stats.xlsx')\n", "national_stats.to_excel('./output/national_stats.xlsx')\n", "per_season_df.to_excel('./output/seasonal_stats.xlsx')\n", "player_summaries.to_excel('./output/player_summaries.xlsx')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>season</th>\n", " <th>previous_top500</th>\n", " <th>count</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>2</th>\n", " <td>M2_02 Love 2020</td>\n", " <td>yes</td>\n", " <td>304</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>M2_03 Bear 2020</td>\n", " <td>yes</td>\n", " <td>313</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>M2_04 Elf 2020</td>\n", " <td>yes</td>\n", " <td>294</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>M2_05 Viper 2020</td>\n", " <td>yes</td>\n", " <td>317</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>M2_06 Magic 2020</td>\n", " <td>yes</td>\n", " <td>314</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>M2_07 Griffin 2020</td>\n", " <td>yes</td>\n", " <td>277</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>M2_08 Draconid 2020</td>\n", " <td>yes</td>\n", " <td>299</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>M2_09 Dryad 2020</td>\n", " <td>yes</td>\n", " <td>264</td>\n", " </tr>\n", " <tr>\n", " <th>18</th>\n", " <td>M2_10 Cat 2020</td>\n", " <td>yes</td>\n", " <td>277</td>\n", " </tr>\n", " <tr>\n", " <th>20</th>\n", " <td>M2_11 Mahakam 2020</td>\n", " <td>yes</td>\n", " <td>266</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>M2_12 Wild Hunt 2020</td>\n", " <td>yes</td>\n", " <td>242</td>\n", " </tr>\n", " <tr>\n", " <th>24</th>\n", " <td>M3_01 Wolf 2021</td>\n", " <td>yes</td>\n", " <td>267</td>\n", " </tr>\n", " <tr>\n", " <th>26</th>\n", " <td>M3_02 Love 2021</td>\n", " <td>yes</td>\n", " <td>286</td>\n", " </tr>\n", " <tr>\n", " <th>28</th>\n", " <td>M3_03 Bear 2021</td>\n", " <td>yes</td>\n", " <td>286</td>\n", " </tr>\n", " <tr>\n", " <th>30</th>\n", " <td>M3_04 Elf 2021</td>\n", " <td>yes</td>\n", " <td>266</td>\n", " </tr>\n", " <tr>\n", " <th>32</th>\n", " <td>M3_05 Viper 2021</td>\n", " <td>yes</td>\n", " <td>284</td>\n", " </tr>\n", " <tr>\n", " <th>34</th>\n", " <td>M3_06 Magic 2021</td>\n", " <td>yes</td>\n", " <td>270</td>\n", " </tr>\n", " <tr>\n", " <th>36</th>\n", " <td>M3_07 Griffin 2021</td>\n", " <td>yes</td>\n", " <td>257</td>\n", " </tr>\n", " <tr>\n", " <th>38</th>\n", " <td>M3_08 Draconid 2021</td>\n", " <td>yes</td>\n", " <td>230</td>\n", " </tr>\n", " <tr>\n", " <th>40</th>\n", " <td>M3_09 Dryad 2021</td>\n", " <td>yes</td>\n", " <td>258</td>\n", " </tr>\n", " <tr>\n", " <th>42</th>\n", " <td>M3_10 Cat 2021</td>\n", " <td>yes</td>\n", " <td>210</td>\n", " </tr>\n", " <tr>\n", " <th>44</th>\n", " <td>M3_11 Mahakam 2021</td>\n", " <td>yes</td>\n", " <td>233</td>\n", " </tr>\n", " <tr>\n", " <th>46</th>\n", " <td>M3_12 Wild Hunt 2021</td>\n", " <td>yes</td>\n", " <td>249</td>\n", " </tr>\n", " <tr>\n", " <th>48</th>\n", " <td>M4_01 Wolf 2022</td>\n", " <td>yes</td>\n", " <td>299</td>\n", " </tr>\n", " <tr>\n", " <th>50</th>\n", " <td>M4_02 Love 2022</td>\n", " <td>yes</td>\n", " <td>275</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " season previous_top500 count\n", "2 M2_02 Love 2020 yes 304\n", "4 M2_03 Bear 2020 yes 313\n", "6 M2_04 Elf 2020 yes 294\n", "8 M2_05 Viper 2020 yes 317\n", "10 M2_06 Magic 2020 yes 314\n", "12 M2_07 Griffin 2020 yes 277\n", "14 M2_08 Draconid 2020 yes 299\n", "16 M2_09 Dryad 2020 yes 264\n", "18 M2_10 Cat 2020 yes 277\n", "20 M2_11 Mahakam 2020 yes 266\n", "22 M2_12 Wild Hunt 2020 yes 242\n", "24 M3_01 Wolf 2021 yes 267\n", "26 M3_02 Love 2021 yes 286\n", "28 M3_03 Bear 2021 yes 286\n", "30 M3_04 Elf 2021 yes 266\n", "32 M3_05 Viper 2021 yes 284\n", "34 M3_06 Magic 2021 yes 270\n", "36 M3_07 Griffin 2021 yes 257\n", "38 M3_08 Draconid 2021 yes 230\n", "40 M3_09 Dryad 2021 yes 258\n", "42 M3_10 Cat 2021 yes 210\n", "44 M3_11 Mahakam 2021 yes 233\n", "46 M3_12 Wild Hunt 2021 yes 249\n", "48 M4_01 Wolf 2022 yes 299\n", "50 M4_02 Love 2022 yes 275" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "bracket = 500\n", "\n", "returning = full_df[full_df['rank'] <= bracket].groupby(['season', 'previous_top500']).agg(\n", " count = pd.NamedAgg('name', 'count')\n", ").reset_index()\n", "returning = returning[returning.previous_top500 == 'yes']\n", "returning" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "54.695999999999984" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "returning['%'] = returning['count'] * 100/bracket\n", "np.mean(returning['%'])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }