{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Gwent Pro Rank analysis\n",
    "\n",
    "First we'll need to get the data from the playgwent website. Here requests and BeautifulSoup are used to get the data and parse the html the comes back. Nothing to difficult here. There is a little clean up in the code and there are two lines of code to check if a player was in the top 500 last season. In case the data already exists it is loaded from disk."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|████████████████████████████████████████████████████████████████████████████████| 149/149 [03:16<00:00,  1.32s/it]\n"
     ]
    }
   ],
   "source": [
    "from tqdm import tqdm\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "# Note that seasons should be in order (for the top 500 spots to be picked up correctly in the next step)\n",
    "seasons = [\n",
    "    ('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2020.xlsx'),\n",
    "    ('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/{page}', './output/season_of_love_2020.xlsx'),\n",
    "    ('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/{page}', './output/season_of_the_bear_2020.xlsx'),\n",
    "    ('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/{page}', './output/season_of_the_elf_2020.xlsx'),\n",
    "    ('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/{page}', './output/season_of_the_viper_2020.xlsx'),\n",
    "    ('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/{page}', './output/season_of_magic_2020.xlsx'),\n",
    "    ('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2020.xlsx'),\n",
    "    ('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2020.xlsx'),\n",
    "    ('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2020.xlsx'),\n",
    "    ('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/{page}', './output/season_of_the_cat_2020.xlsx'),\n",
    "    ('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/{page}', './output/season_of_the_mahakam_2020.xlsx'),\n",
    "    ('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2020.xlsx'),\n",
    "    ('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2021.xlsx'),\n",
    "    ('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/{page}', './output/season_of_love_2021.xlsx'),\n",
    "    ('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/{page}', './output/season_of_the_bear_2021.xlsx'),\n",
    "    ('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/{page}', './output/season_of_the_elf_2021.xlsx'),\n",
    "    ('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/{page}', './output/season_of_the_viper_2021.xlsx'),\n",
    "    ('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/{page}', './output/season_of_the_magic_2021.xlsx'),\n",
    "    ('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/{page}', './output/season_of_the_griffin_2021.xlsx'),\n",
    "    ('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/{page}', './output/season_of_the_draconid_2021.xlsx'),\n",
    "    ('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/{page}', './output/season_of_the_dryad_2021.xlsx'),\n",
    "    ('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/{page}', './output/season_of_the_cat_2021.xlsx'),\n",
    "    ('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/{page}', './output/season_of_the_mahakam_2021.xlsx'),\n",
    "    ('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/{page}', './output/season_of_the_wild_hunt_2021.xlsx'),\n",
    "    ('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/{page}', './output/season_of_the_wolf_2022.xlsx'),\n",
    "    ('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/{page}', './output/season_of_love_2022.xlsx'),\n",
    "\n",
    "]\n",
    "\n",
    "os.makedirs('./output', exist_ok=True)\n",
    "\n",
    "dataframes = []\n",
    "top500 = []\n",
    "\n",
    "\n",
    "for season, url_template, output_path in seasons:\n",
    "    if os.path.exists(output_path):\n",
    "        df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)\n",
    "    else:\n",
    "        output = []\n",
    "\n",
    "        for i in tqdm(range(1, 150)):\n",
    "            url = url_template.replace('{page}', str(i))\n",
    "            try:\n",
    "                r = requests.get(url)\n",
    "                soup = BeautifulSoup(r.text, 'html.parser')\n",
    "\n",
    "                rows = soup.findAll(\"div\", {\"class\": \"c-ranking-table__tr\"})\n",
    "                for row in rows:\n",
    "                    flag = row.find(\"i\", {\"class\": \"flag-icon\"})[\"class\"][1]\n",
    "                    new_record = {\n",
    "                        'rank': int(row.find(\"div\", {\"class\": \"td-number\"}).text.strip()),\n",
    "                        'name': row.find(\"div\", {\"class\": \"td-nick\"}).text.strip(),\n",
    "                        'country': flag.replace('flag-icon-', '').upper(),\n",
    "                        'matches': int(row.find(\"div\", {\"class\": \"td-matches\"}).text.strip().replace(' matches', '')),\n",
    "                        'mmr': int(row.find(\"div\", {\"class\": \"td-mmr\"}).text.strip().replace(',', '')),\n",
    "                        'season': season\n",
    "                    }\n",
    "                    output.append(new_record)\n",
    "            except:\n",
    "                pass\n",
    "\n",
    "\n",
    "        df = pd.DataFrame(output).drop_duplicates()\n",
    "        df.to_excel(output_path)\n",
    "    \n",
    "    # We'll add a column here if the player was in the top 500 last season\n",
    "    df['previous_top500'] = ['yes' if name in top500 else 'no' for name in df['name']]    \n",
    "    top500 = list(df[df['rank'] <= 500]['name'])\n",
    "    \n",
    "    dataframes.append(df)\n",
    "    \n",
    "# Concatenate dataframes and drop any records with missing values\n",
    "full_df = pd.concat(dataframes).dropna()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Human readable countries\n",
    "\n",
    "We have the 2 letter code for the country of each participant, let's use the pycountry library to convert that to the full name."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rank</th>\n",
       "      <th>name</th>\n",
       "      <th>country</th>\n",
       "      <th>matches</th>\n",
       "      <th>mmr</th>\n",
       "      <th>season</th>\n",
       "      <th>previous_top500</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>kolemoen</td>\n",
       "      <td>Germany</td>\n",
       "      <td>431</td>\n",
       "      <td>10484</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>kams134</td>\n",
       "      <td>Poland</td>\n",
       "      <td>923</td>\n",
       "      <td>10477</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>TailBot</td>\n",
       "      <td>Poland</td>\n",
       "      <td>538</td>\n",
       "      <td>10472</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Pajabol</td>\n",
       "      <td>Poland</td>\n",
       "      <td>820</td>\n",
       "      <td>10471</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Adzikov</td>\n",
       "      <td>Poland</td>\n",
       "      <td>1105</td>\n",
       "      <td>10442</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   rank      name  country  matches    mmr           season previous_top500\n",
       "0     1  kolemoen  Germany      431  10484  M2_01 Wolf 2020              no\n",
       "1     2   kams134   Poland      923  10477  M2_01 Wolf 2020              no\n",
       "2     3   TailBot   Poland      538  10472  M2_01 Wolf 2020              no\n",
       "3     4   Pajabol   Poland      820  10471  M2_01 Wolf 2020              no\n",
       "4     5   Adzikov   Poland     1105  10442  M2_01 Wolf 2020              no"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pycountry\n",
    "\n",
    "def convert_to_full(two_letter_code):\n",
    "    try:\n",
    "        out = pycountry.countries.get(alpha_2=two_letter_code).name\n",
    "    except:\n",
    "        print(f'could not convert {two_letter_code}')\n",
    "        out = two_letter_code\n",
    "    return out\n",
    "\n",
    "full_df['country'] = full_df['country'].apply(lambda x: convert_to_full(x))\n",
    "full_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# National rank\n",
    "\n",
    "Apart from the global rank, we'll add a participant's national rank as well."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df['national_rank'] = full_df.groupby(['country','season'])[\"mmr\"].rank(\"first\", ascending=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ladder Efficiency\n",
    "\n",
    "While getting to a higher MMR is the main goal of ladder. There is an argument to be made that the number of games a participant plays to reach a certain point should not be overlooked. Someone that played fewer games to get to a certain MMR climbed the ladder in a more efficient way. There are two metrics that can be used, we take the MMR of a player, substract the minimal MMR, which is 9600, and divide that by the number of matches played. Though, as the higher up you are in ladder, the more fierce the competition becomes, to take this into account Lerio2 from Team Legacy proposed to divide by the square-root of the number of matches instead.\n",
    "\n",
    "One additional consideration is that players that were in the top 500 the previous season didn't have to climb back up to pro rank. Both metrics actually give them a major advantage. So when comparing efficiency between players the field previous_top500 could be used to allow a fair comparison."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df['efficiency'] = ((full_df['mmr']-9600))/full_df['matches']\n",
    "full_df['lei'] = ((full_df['mmr']-9600))/np.sqrt(full_df['matches'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rank</th>\n",
       "      <th>name</th>\n",
       "      <th>country</th>\n",
       "      <th>matches</th>\n",
       "      <th>mmr</th>\n",
       "      <th>season</th>\n",
       "      <th>previous_top500</th>\n",
       "      <th>national_rank</th>\n",
       "      <th>efficiency</th>\n",
       "      <th>lei</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>kolemoen</td>\n",
       "      <td>Germany</td>\n",
       "      <td>431</td>\n",
       "      <td>10484</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.051044</td>\n",
       "      <td>42.580782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>kams134</td>\n",
       "      <td>Poland</td>\n",
       "      <td>923</td>\n",
       "      <td>10477</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.950163</td>\n",
       "      <td>28.866807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>TailBot</td>\n",
       "      <td>Poland</td>\n",
       "      <td>538</td>\n",
       "      <td>10472</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.620818</td>\n",
       "      <td>37.594590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Pajabol</td>\n",
       "      <td>Poland</td>\n",
       "      <td>820</td>\n",
       "      <td>10471</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.062195</td>\n",
       "      <td>30.416639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Adzikov</td>\n",
       "      <td>Poland</td>\n",
       "      <td>1105</td>\n",
       "      <td>10442</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.761991</td>\n",
       "      <td>25.329753</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   rank      name  country  matches    mmr           season previous_top500  \\\n",
       "0     1  kolemoen  Germany      431  10484  M2_01 Wolf 2020              no   \n",
       "1     2   kams134   Poland      923  10477  M2_01 Wolf 2020              no   \n",
       "2     3   TailBot   Poland      538  10472  M2_01 Wolf 2020              no   \n",
       "3     4   Pajabol   Poland      820  10471  M2_01 Wolf 2020              no   \n",
       "4     5   Adzikov   Poland     1105  10442  M2_01 Wolf 2020              no   \n",
       "\n",
       "   national_rank  efficiency        lei  \n",
       "0            1.0    2.051044  42.580782  \n",
       "1            1.0    0.950163  28.866807  \n",
       "2            2.0    1.620818  37.594590  \n",
       "3            3.0    1.062195  30.416639  \n",
       "4            4.0    0.761991  25.329753  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "full_df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Statistics per Season"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>min_mmr</th>\n",
       "      <th>max_mmr</th>\n",
       "      <th>num_matches</th>\n",
       "      <th>top500_cutoff</th>\n",
       "      <th>top200_cutoff</th>\n",
       "      <th>top64_cutoff</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>2407</td>\n",
       "      <td>10484</td>\n",
       "      <td>699496</td>\n",
       "      <td>9749</td>\n",
       "      <td>9872</td>\n",
       "      <td>10061</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2_02 Love 2020</td>\n",
       "      <td>7776</td>\n",
       "      <td>10537</td>\n",
       "      <td>769172</td>\n",
       "      <td>9832</td>\n",
       "      <td>9952</td>\n",
       "      <td>10117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M2_03 Bear 2020</td>\n",
       "      <td>9427</td>\n",
       "      <td>10669</td>\n",
       "      <td>862283</td>\n",
       "      <td>9867</td>\n",
       "      <td>9995</td>\n",
       "      <td>10204</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M2_04 Elf 2020</td>\n",
       "      <td>9666</td>\n",
       "      <td>10751</td>\n",
       "      <td>1004603</td>\n",
       "      <td>9952</td>\n",
       "      <td>10087</td>\n",
       "      <td>10293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M2_05 Viper 2020</td>\n",
       "      <td>9635</td>\n",
       "      <td>10622</td>\n",
       "      <td>859640</td>\n",
       "      <td>9910</td>\n",
       "      <td>10028</td>\n",
       "      <td>10255</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M2_06 Magic 2020</td>\n",
       "      <td>9624</td>\n",
       "      <td>10597</td>\n",
       "      <td>793013</td>\n",
       "      <td>9896</td>\n",
       "      <td>10002</td>\n",
       "      <td>10191</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M2_07 Griffin 2020</td>\n",
       "      <td>9698</td>\n",
       "      <td>10667</td>\n",
       "      <td>996516</td>\n",
       "      <td>9978</td>\n",
       "      <td>10100</td>\n",
       "      <td>10289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>9666</td>\n",
       "      <td>10546</td>\n",
       "      <td>837545</td>\n",
       "      <td>9946</td>\n",
       "      <td>10061</td>\n",
       "      <td>10246</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>M2_09 Dryad 2020</td>\n",
       "      <td>9678</td>\n",
       "      <td>10725</td>\n",
       "      <td>854593</td>\n",
       "      <td>9946</td>\n",
       "      <td>10046</td>\n",
       "      <td>10183</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>M2_10 Cat 2020</td>\n",
       "      <td>9703</td>\n",
       "      <td>10804</td>\n",
       "      <td>928845</td>\n",
       "      <td>9977</td>\n",
       "      <td>10067</td>\n",
       "      <td>10176</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>M2_11 Mahakam 2020</td>\n",
       "      <td>9706</td>\n",
       "      <td>10783</td>\n",
       "      <td>983150</td>\n",
       "      <td>10000</td>\n",
       "      <td>10090</td>\n",
       "      <td>10216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>M2_12 Wild Hunt 2020</td>\n",
       "      <td>9756</td>\n",
       "      <td>10724</td>\n",
       "      <td>1182353</td>\n",
       "      <td>10070</td>\n",
       "      <td>10172</td>\n",
       "      <td>10313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>M3_01 Wolf 2021</td>\n",
       "      <td>9637</td>\n",
       "      <td>10653</td>\n",
       "      <td>808651</td>\n",
       "      <td>9916</td>\n",
       "      <td>10044</td>\n",
       "      <td>10295</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>M3_02 Love 2021</td>\n",
       "      <td>9684</td>\n",
       "      <td>10714</td>\n",
       "      <td>917027</td>\n",
       "      <td>9975</td>\n",
       "      <td>10097</td>\n",
       "      <td>10325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>M3_03 Bear 2021</td>\n",
       "      <td>9637</td>\n",
       "      <td>10576</td>\n",
       "      <td>766502</td>\n",
       "      <td>9914</td>\n",
       "      <td>10026</td>\n",
       "      <td>10230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>M3_04 Elf 2021</td>\n",
       "      <td>9686</td>\n",
       "      <td>10678</td>\n",
       "      <td>944323</td>\n",
       "      <td>9992</td>\n",
       "      <td>10102</td>\n",
       "      <td>10323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>M3_05 Viper 2021</td>\n",
       "      <td>9701</td>\n",
       "      <td>10753</td>\n",
       "      <td>956484</td>\n",
       "      <td>9998</td>\n",
       "      <td>10106</td>\n",
       "      <td>10300</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>M3_06 Magic 2021</td>\n",
       "      <td>9681</td>\n",
       "      <td>10632</td>\n",
       "      <td>869262</td>\n",
       "      <td>9974</td>\n",
       "      <td>10082</td>\n",
       "      <td>10278</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>M3_07 Griffin 2021</td>\n",
       "      <td>9669</td>\n",
       "      <td>10633</td>\n",
       "      <td>856103</td>\n",
       "      <td>9958</td>\n",
       "      <td>10067</td>\n",
       "      <td>10287</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>M3_08 Draconid 2021</td>\n",
       "      <td>9681</td>\n",
       "      <td>10767</td>\n",
       "      <td>911273</td>\n",
       "      <td>9954</td>\n",
       "      <td>10079</td>\n",
       "      <td>10281</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>M3_09 Dryad 2021</td>\n",
       "      <td>9688</td>\n",
       "      <td>10809</td>\n",
       "      <td>940655</td>\n",
       "      <td>9968</td>\n",
       "      <td>10078</td>\n",
       "      <td>10250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>M3_10 Cat 2021</td>\n",
       "      <td>9614</td>\n",
       "      <td>10366</td>\n",
       "      <td>719696</td>\n",
       "      <td>9879</td>\n",
       "      <td>9974</td>\n",
       "      <td>10083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>M3_11 Mahakam 2021</td>\n",
       "      <td>9725</td>\n",
       "      <td>10580</td>\n",
       "      <td>1017256</td>\n",
       "      <td>10012</td>\n",
       "      <td>10122</td>\n",
       "      <td>10252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>M3_12 Wild Hunt 2021</td>\n",
       "      <td>9735</td>\n",
       "      <td>10714</td>\n",
       "      <td>1044941</td>\n",
       "      <td>10032</td>\n",
       "      <td>10140</td>\n",
       "      <td>10271</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>M4_01 Wolf 2022</td>\n",
       "      <td>9646</td>\n",
       "      <td>10684</td>\n",
       "      <td>883881</td>\n",
       "      <td>9941</td>\n",
       "      <td>10093</td>\n",
       "      <td>10340</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>9638</td>\n",
       "      <td>10652</td>\n",
       "      <td>852867</td>\n",
       "      <td>9932</td>\n",
       "      <td>10068</td>\n",
       "      <td>10312</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  season  min_mmr  max_mmr  num_matches  top500_cutoff  \\\n",
       "0        M2_01 Wolf 2020     2407    10484       699496           9749   \n",
       "1        M2_02 Love 2020     7776    10537       769172           9832   \n",
       "2        M2_03 Bear 2020     9427    10669       862283           9867   \n",
       "3         M2_04 Elf 2020     9666    10751      1004603           9952   \n",
       "4       M2_05 Viper 2020     9635    10622       859640           9910   \n",
       "5       M2_06 Magic 2020     9624    10597       793013           9896   \n",
       "6     M2_07 Griffin 2020     9698    10667       996516           9978   \n",
       "7    M2_08 Draconid 2020     9666    10546       837545           9946   \n",
       "8       M2_09 Dryad 2020     9678    10725       854593           9946   \n",
       "9         M2_10 Cat 2020     9703    10804       928845           9977   \n",
       "10    M2_11 Mahakam 2020     9706    10783       983150          10000   \n",
       "11  M2_12 Wild Hunt 2020     9756    10724      1182353          10070   \n",
       "12       M3_01 Wolf 2021     9637    10653       808651           9916   \n",
       "13       M3_02 Love 2021     9684    10714       917027           9975   \n",
       "14       M3_03 Bear 2021     9637    10576       766502           9914   \n",
       "15        M3_04 Elf 2021     9686    10678       944323           9992   \n",
       "16      M3_05 Viper 2021     9701    10753       956484           9998   \n",
       "17      M3_06 Magic 2021     9681    10632       869262           9974   \n",
       "18    M3_07 Griffin 2021     9669    10633       856103           9958   \n",
       "19   M3_08 Draconid 2021     9681    10767       911273           9954   \n",
       "20      M3_09 Dryad 2021     9688    10809       940655           9968   \n",
       "21        M3_10 Cat 2021     9614    10366       719696           9879   \n",
       "22    M3_11 Mahakam 2021     9725    10580      1017256          10012   \n",
       "23  M3_12 Wild Hunt 2021     9735    10714      1044941          10032   \n",
       "24       M4_01 Wolf 2022     9646    10684       883881           9941   \n",
       "25       M4_02 Love 2022     9638    10652       852867           9932   \n",
       "\n",
       "    top200_cutoff  top64_cutoff  \n",
       "0            9872         10061  \n",
       "1            9952         10117  \n",
       "2            9995         10204  \n",
       "3           10087         10293  \n",
       "4           10028         10255  \n",
       "5           10002         10191  \n",
       "6           10100         10289  \n",
       "7           10061         10246  \n",
       "8           10046         10183  \n",
       "9           10067         10176  \n",
       "10          10090         10216  \n",
       "11          10172         10313  \n",
       "12          10044         10295  \n",
       "13          10097         10325  \n",
       "14          10026         10230  \n",
       "15          10102         10323  \n",
       "16          10106         10300  \n",
       "17          10082         10278  \n",
       "18          10067         10287  \n",
       "19          10079         10281  \n",
       "20          10078         10250  \n",
       "21           9974         10083  \n",
       "22          10122         10252  \n",
       "23          10140         10271  \n",
       "24          10093         10340  \n",
       "25          10068         10312  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "per_season_df = full_df.groupby(['season']).agg(\n",
    "    min_mmr     = pd.NamedAgg('mmr', 'min'),\n",
    "    max_mmr     = pd.NamedAgg('mmr', 'max'),\n",
    "    num_matches = pd.NamedAgg('matches', 'sum')\n",
    ").reset_index()\n",
    "\n",
    "top500_cutoffs = full_df[full_df['rank'] == 500][['season', 'mmr']].rename(columns={'mmr': 'top500_cutoff'})\n",
    "top200_cutoffs = full_df[full_df['rank'] == 200][['season', 'mmr']].rename(columns={'mmr': 'top200_cutoff'})\n",
    "top64_cutoffs  = full_df[full_df['rank'] == 64][['season', 'mmr']].rename(columns={'mmr': 'top64_cutoff'})\n",
    "\n",
    "per_season_df = pd.merge(per_season_df, top500_cutoffs, on='season')\n",
    "per_season_df = pd.merge(per_season_df, top200_cutoffs, on='season')\n",
    "per_season_df = pd.merge(per_season_df, top64_cutoffs, on='season')\n",
    "per_season_df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Player Summaries\n",
    "\n",
    "We can summerize player stats over all seasons easily using the code below. We'll store the number of times they were on pro ladder, the worst, best and average MMR scores, ..."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>country</th>\n",
       "      <th>appearances</th>\n",
       "      <th>min_mmr</th>\n",
       "      <th>mean_mmr</th>\n",
       "      <th>max_mmr</th>\n",
       "      <th>mean_matches</th>\n",
       "      <th>num_matches</th>\n",
       "      <th>best_rank</th>\n",
       "      <th>best_national_rank</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>$Lancer$</td>\n",
       "      <td>China</td>\n",
       "      <td>2</td>\n",
       "      <td>9710</td>\n",
       "      <td>9710</td>\n",
       "      <td>9792</td>\n",
       "      <td>545.50</td>\n",
       "      <td>1091</td>\n",
       "      <td>2011</td>\n",
       "      <td>272.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>$rBuRn$</td>\n",
       "      <td>Spain</td>\n",
       "      <td>2</td>\n",
       "      <td>9767</td>\n",
       "      <td>9767</td>\n",
       "      <td>9831</td>\n",
       "      <td>225.00</td>\n",
       "      <td>450</td>\n",
       "      <td>1176</td>\n",
       "      <td>29.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>$r_BuRn$</td>\n",
       "      <td>Spain</td>\n",
       "      <td>4</td>\n",
       "      <td>9701</td>\n",
       "      <td>9701</td>\n",
       "      <td>9848</td>\n",
       "      <td>123.75</td>\n",
       "      <td>495</td>\n",
       "      <td>968</td>\n",
       "      <td>25.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>++++Gwynbleidd++++</td>\n",
       "      <td>Poland</td>\n",
       "      <td>1</td>\n",
       "      <td>9803</td>\n",
       "      <td>9803</td>\n",
       "      <td>9803</td>\n",
       "      <td>231.00</td>\n",
       "      <td>231</td>\n",
       "      <td>1482</td>\n",
       "      <td>122.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>+Nightshadow+</td>\n",
       "      <td>Ukraine</td>\n",
       "      <td>1</td>\n",
       "      <td>9677</td>\n",
       "      <td>9677</td>\n",
       "      <td>9677</td>\n",
       "      <td>365.00</td>\n",
       "      <td>365</td>\n",
       "      <td>2261</td>\n",
       "      <td>187.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 name  country  appearances  min_mmr  mean_mmr  max_mmr  \\\n",
       "0            $Lancer$    China            2     9710      9710     9792   \n",
       "1             $rBuRn$    Spain            2     9767      9767     9831   \n",
       "2            $r_BuRn$    Spain            4     9701      9701     9848   \n",
       "3  ++++Gwynbleidd++++   Poland            1     9803      9803     9803   \n",
       "4       +Nightshadow+  Ukraine            1     9677      9677     9677   \n",
       "\n",
       "   mean_matches  num_matches  best_rank  best_national_rank  \n",
       "0        545.50         1091       2011               272.0  \n",
       "1        225.00          450       1176                29.0  \n",
       "2        123.75          495        968                25.0  \n",
       "3        231.00          231       1482               122.0  \n",
       "4        365.00          365       2261               187.0  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "player_summaries = full_df.groupby(['name', 'country']).agg(\n",
    "    appearances = pd.NamedAgg('matches', 'count'),\n",
    "    min_mmr     = pd.NamedAgg('mmr', 'min'),\n",
    "    mean_mmr     = pd.NamedAgg('mmr', 'min'),\n",
    "    max_mmr     = pd.NamedAgg('mmr', 'max'),\n",
    "    mean_matches = pd.NamedAgg('matches', 'mean'),\n",
    "    num_matches = pd.NamedAgg('matches', 'sum'),\n",
    "    best_rank = pd.NamedAgg('rank', 'min'),\n",
    "    best_national_rank = pd.NamedAgg('national_rank', 'min'),\n",
    ").reset_index()\n",
    "\n",
    "player_summaries.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>name</th>\n",
       "      <th>country</th>\n",
       "      <th>appearances</th>\n",
       "      <th>min_mmr</th>\n",
       "      <th>mean_mmr</th>\n",
       "      <th>max_mmr</th>\n",
       "      <th>mean_matches</th>\n",
       "      <th>num_matches</th>\n",
       "      <th>best_rank</th>\n",
       "      <th>best_national_rank</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>21691</th>\n",
       "      <td>sepro</td>\n",
       "      <td>Belgium</td>\n",
       "      <td>7</td>\n",
       "      <td>9724</td>\n",
       "      <td>9724</td>\n",
       "      <td>9903</td>\n",
       "      <td>259.142857</td>\n",
       "      <td>1814</td>\n",
       "      <td>680</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        name  country  appearances  min_mmr  mean_mmr  max_mmr  mean_matches  \\\n",
       "21691  sepro  Belgium            7     9724      9724     9903    259.142857   \n",
       "\n",
       "       num_matches  best_rank  best_national_rank  \n",
       "21691         1814        680                 2.0  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "player_summaries[player_summaries['name'] == 'sepro']"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Get population for each country\n",
    "\n",
    "To calculate the number of pro players per capita we'll need the number of inhabitants per country. On Kaggle a dataset was available for 2019. As this doesn't differ much from one year to another (and those will only result into changes several digits behind the decimal point it) the 2019 list is enough for our purpose. The code below loads the data, and generates country names from the two character ISO code using the same package as used before to avoid mismatches.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "could not convert nan\n"
     ]
    }
   ],
   "source": [
    "# data from kaggle https://www.kaggle.com/erikbruin/countries-of-the-world-iso-codes-and-population/data#\n",
    "# the number are indicated very strangely in the csv with a decimal point and need to be multiplied with 1000\n",
    "\n",
    "population_df = pd.read_csv('./data/countries_by_population_2019.csv')[['name', 'pop2019']]\n",
    "country_codes = pd.read_csv('./data/country_codes_2020.csv')[['name', 'cca2']]\n",
    "population_df = pd.merge(population_df, country_codes, how='left', left_on='name', right_on='name')\n",
    "\n",
    "population_df['country'] = population_df['cca2'].apply(lambda x: convert_to_full(x))\n",
    "population_df['pop2019'] = (population_df['pop2019'] * 1000).astype(int)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>total_matches</th>\n",
       "      <th>num_players</th>\n",
       "      <th>pop2019</th>\n",
       "      <th>pro_players_per_million</th>\n",
       "      <th>matches_per_player</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>250</td>\n",
       "      <td>1</td>\n",
       "      <td>43053054.0</td>\n",
       "      <td>0.023227</td>\n",
       "      <td>250.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>1480</td>\n",
       "      <td>4</td>\n",
       "      <td>44780677.0</td>\n",
       "      <td>0.089324</td>\n",
       "      <td>370.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Armenia</td>\n",
       "      <td>546</td>\n",
       "      <td>1</td>\n",
       "      <td>2957731.0</td>\n",
       "      <td>0.338097</td>\n",
       "      <td>546.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Australia</td>\n",
       "      <td>6193</td>\n",
       "      <td>27</td>\n",
       "      <td>25203198.0</td>\n",
       "      <td>1.071293</td>\n",
       "      <td>229.370370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Austria</td>\n",
       "      <td>3981</td>\n",
       "      <td>16</td>\n",
       "      <td>8955102.0</td>\n",
       "      <td>1.786691</td>\n",
       "      <td>248.812500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2313</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>20656</td>\n",
       "      <td>71</td>\n",
       "      <td>67530172.0</td>\n",
       "      <td>1.051382</td>\n",
       "      <td>290.929577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2314</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>United States</td>\n",
       "      <td>47946</td>\n",
       "      <td>161</td>\n",
       "      <td>329064917.0</td>\n",
       "      <td>0.489265</td>\n",
       "      <td>297.801242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2315</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Uzbekistan</td>\n",
       "      <td>1116</td>\n",
       "      <td>5</td>\n",
       "      <td>32981716.0</td>\n",
       "      <td>0.151599</td>\n",
       "      <td>223.200000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2316</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Viet Nam</td>\n",
       "      <td>2277</td>\n",
       "      <td>10</td>\n",
       "      <td>96462106.0</td>\n",
       "      <td>0.103668</td>\n",
       "      <td>227.700000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2317</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Zimbabwe</td>\n",
       "      <td>199</td>\n",
       "      <td>1</td>\n",
       "      <td>14645468.0</td>\n",
       "      <td>0.068281</td>\n",
       "      <td>199.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>2318 rows × 7 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               season         country  total_matches  num_players  \\\n",
       "0     M2_01 Wolf 2020         Algeria            250            1   \n",
       "1     M2_01 Wolf 2020       Argentina           1480            4   \n",
       "2     M2_01 Wolf 2020         Armenia            546            1   \n",
       "3     M2_01 Wolf 2020       Australia           6193           27   \n",
       "4     M2_01 Wolf 2020         Austria           3981           16   \n",
       "...               ...             ...            ...          ...   \n",
       "2313  M4_02 Love 2022  United Kingdom          20656           71   \n",
       "2314  M4_02 Love 2022   United States          47946          161   \n",
       "2315  M4_02 Love 2022      Uzbekistan           1116            5   \n",
       "2316  M4_02 Love 2022        Viet Nam           2277           10   \n",
       "2317  M4_02 Love 2022        Zimbabwe            199            1   \n",
       "\n",
       "          pop2019  pro_players_per_million  matches_per_player  \n",
       "0      43053054.0                 0.023227          250.000000  \n",
       "1      44780677.0                 0.089324          370.000000  \n",
       "2       2957731.0                 0.338097          546.000000  \n",
       "3      25203198.0                 1.071293          229.370370  \n",
       "4       8955102.0                 1.786691          248.812500  \n",
       "...           ...                      ...                 ...  \n",
       "2313   67530172.0                 1.051382          290.929577  \n",
       "2314  329064917.0                 0.489265          297.801242  \n",
       "2315   32981716.0                 0.151599          223.200000  \n",
       "2316   96462106.0                 0.103668          227.700000  \n",
       "2317   14645468.0                 0.068281          199.000000  \n",
       "\n",
       "[2318 rows x 7 columns]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Lets do some counts per season and country\n",
    "national_stats = full_df.groupby(['season','country']).agg(\n",
    "    total_matches = pd.NamedAgg('matches', 'sum'),\n",
    "    num_players = pd.NamedAgg('name', 'count')\n",
    ").reset_index()\n",
    "\n",
    "# Add the population (from 2019) to each country and calculate the number of pro players (top 2860) per million inhabitants\n",
    "national_stats = pd.merge(national_stats, population_df.drop(['name', 'cca2'], axis=1), on='country', how='left')\n",
    "national_stats['pro_players_per_million'] = (national_stats['num_players']*1000000)/national_stats['pop2019']\n",
    "national_stats['matches_per_player'] = national_stats['total_matches']/national_stats['num_players']\n",
    "\n",
    "national_stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Adding popularity metrics\n",
    "\n",
    "\n",
    "We can now calculate the number of pro players per million inhabitants, this could be a proxy for Gwent's popularity in \n",
    "different parts of the world"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>total_matches</th>\n",
       "      <th>num_players</th>\n",
       "      <th>pro_players_per_million</th>\n",
       "      <th>matches_per_player</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>657</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Poland</td>\n",
       "      <td>72225</td>\n",
       "      <td>267</td>\n",
       "      <td>7.047129</td>\n",
       "      <td>270.505618</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>617</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Estonia</td>\n",
       "      <td>1726</td>\n",
       "      <td>7</td>\n",
       "      <td>5.280436</td>\n",
       "      <td>246.571429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>660</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Russian Federation</td>\n",
       "      <td>195594</td>\n",
       "      <td>672</td>\n",
       "      <td>4.606770</td>\n",
       "      <td>291.062500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>599</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Belarus</td>\n",
       "      <td>10261</td>\n",
       "      <td>39</td>\n",
       "      <td>4.125931</td>\n",
       "      <td>263.102564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>677</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Ukraine</td>\n",
       "      <td>52333</td>\n",
       "      <td>162</td>\n",
       "      <td>3.682351</td>\n",
       "      <td>323.043210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>626</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Iceland</td>\n",
       "      <td>307</td>\n",
       "      <td>1</td>\n",
       "      <td>2.949583</td>\n",
       "      <td>307.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>618</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Finland</td>\n",
       "      <td>4053</td>\n",
       "      <td>14</td>\n",
       "      <td>2.530659</td>\n",
       "      <td>289.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>624</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Hong Kong</td>\n",
       "      <td>5154</td>\n",
       "      <td>18</td>\n",
       "      <td>2.420606</td>\n",
       "      <td>286.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>644</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Malta</td>\n",
       "      <td>229</td>\n",
       "      <td>1</td>\n",
       "      <td>2.270807</td>\n",
       "      <td>229.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>610</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Croatia</td>\n",
       "      <td>2247</td>\n",
       "      <td>9</td>\n",
       "      <td>2.179016</td>\n",
       "      <td>249.666667</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  season             country  total_matches  num_players  \\\n",
       "657  M2_08 Draconid 2020              Poland          72225          267   \n",
       "617  M2_08 Draconid 2020             Estonia           1726            7   \n",
       "660  M2_08 Draconid 2020  Russian Federation         195594          672   \n",
       "599  M2_08 Draconid 2020             Belarus          10261           39   \n",
       "677  M2_08 Draconid 2020             Ukraine          52333          162   \n",
       "626  M2_08 Draconid 2020             Iceland            307            1   \n",
       "618  M2_08 Draconid 2020             Finland           4053           14   \n",
       "624  M2_08 Draconid 2020           Hong Kong           5154           18   \n",
       "644  M2_08 Draconid 2020               Malta            229            1   \n",
       "610  M2_08 Draconid 2020             Croatia           2247            9   \n",
       "\n",
       "     pro_players_per_million  matches_per_player  \n",
       "657                 7.047129          270.505618  \n",
       "617                 5.280436          246.571429  \n",
       "660                 4.606770          291.062500  \n",
       "599                 4.125931          263.102564  \n",
       "677                 3.682351          323.043210  \n",
       "626                 2.949583          307.000000  \n",
       "618                 2.530659          289.500000  \n",
       "624                 2.420606          286.333333  \n",
       "644                 2.270807          229.000000  \n",
       "610                 2.179016          249.666667  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('pro_players_per_million', ascending=False).drop(columns=['pop2019'])\n",
    "top_popularity.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "An alternative way is to find places where the die-hard players are located, those that play the most games per season on average."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>total_matches</th>\n",
       "      <th>num_players</th>\n",
       "      <th>pro_players_per_million</th>\n",
       "      <th>matches_per_player</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>611</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Cyprus</td>\n",
       "      <td>1217</td>\n",
       "      <td>2</td>\n",
       "      <td>1.668648</td>\n",
       "      <td>608.500000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>608</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Congo, The Democratic Republic of the</td>\n",
       "      <td>570</td>\n",
       "      <td>1</td>\n",
       "      <td>0.011522</td>\n",
       "      <td>570.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>593</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Armenia</td>\n",
       "      <td>1002</td>\n",
       "      <td>2</td>\n",
       "      <td>0.676194</td>\n",
       "      <td>501.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>631</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Ireland</td>\n",
       "      <td>2988</td>\n",
       "      <td>6</td>\n",
       "      <td>1.228880</td>\n",
       "      <td>498.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>596</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Azerbaijan</td>\n",
       "      <td>1447</td>\n",
       "      <td>3</td>\n",
       "      <td>0.298575</td>\n",
       "      <td>482.333333</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>654</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Panama</td>\n",
       "      <td>453</td>\n",
       "      <td>1</td>\n",
       "      <td>0.235491</td>\n",
       "      <td>453.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>669</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Switzerland</td>\n",
       "      <td>2580</td>\n",
       "      <td>6</td>\n",
       "      <td>0.698376</td>\n",
       "      <td>430.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>603</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Bulgaria</td>\n",
       "      <td>5516</td>\n",
       "      <td>13</td>\n",
       "      <td>1.857111</td>\n",
       "      <td>424.307692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>662</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Serbia</td>\n",
       "      <td>4074</td>\n",
       "      <td>10</td>\n",
       "      <td>1.139960</td>\n",
       "      <td>407.400000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>656</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Philippines</td>\n",
       "      <td>5347</td>\n",
       "      <td>14</td>\n",
       "      <td>0.129490</td>\n",
       "      <td>381.928571</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  season                                country  \\\n",
       "611  M2_08 Draconid 2020                                 Cyprus   \n",
       "608  M2_08 Draconid 2020  Congo, The Democratic Republic of the   \n",
       "593  M2_08 Draconid 2020                                Armenia   \n",
       "631  M2_08 Draconid 2020                                Ireland   \n",
       "596  M2_08 Draconid 2020                             Azerbaijan   \n",
       "654  M2_08 Draconid 2020                                 Panama   \n",
       "669  M2_08 Draconid 2020                            Switzerland   \n",
       "603  M2_08 Draconid 2020                               Bulgaria   \n",
       "662  M2_08 Draconid 2020                                 Serbia   \n",
       "656  M2_08 Draconid 2020                            Philippines   \n",
       "\n",
       "     total_matches  num_players  pro_players_per_million  matches_per_player  \n",
       "611           1217            2                 1.668648          608.500000  \n",
       "608            570            1                 0.011522          570.000000  \n",
       "593           1002            2                 0.676194          501.000000  \n",
       "631           2988            6                 1.228880          498.000000  \n",
       "596           1447            3                 0.298575          482.333333  \n",
       "654            453            1                 0.235491          453.000000  \n",
       "669           2580            6                 0.698376          430.000000  \n",
       "603           5516           13                 1.857111          424.307692  \n",
       "662           4074           10                 1.139960          407.400000  \n",
       "656           5347           14                 0.129490          381.928571  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_popularity = national_stats[national_stats['season'] == 'M2_08 Draconid 2020'].sort_values('matches_per_player', ascending=False).drop(columns=['pop2019'])\n",
    "top_popularity.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Creating a ranking of nations\n",
    "\n",
    "We'll select countries with three or more playing in the pro rank, select the top 3 players and calculate the average MMR and total MMR for the top\n",
    "3 players of the country.\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>rank</th>\n",
       "      <th>name</th>\n",
       "      <th>matches</th>\n",
       "      <th>mmr</th>\n",
       "      <th>previous_top500</th>\n",
       "      <th>national_rank</th>\n",
       "      <th>efficiency</th>\n",
       "      <th>lei</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>174</td>\n",
       "      <td>Srchenko</td>\n",
       "      <td>393</td>\n",
       "      <td>9887</td>\n",
       "      <td>no</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.730280</td>\n",
       "      <td>14.477235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>471</td>\n",
       "      <td>eltroco</td>\n",
       "      <td>339</td>\n",
       "      <td>9758</td>\n",
       "      <td>no</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.466077</td>\n",
       "      <td>8.581382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>846</td>\n",
       "      <td>MorgannFreemann</td>\n",
       "      <td>208</td>\n",
       "      <td>9660</td>\n",
       "      <td>no</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.288462</td>\n",
       "      <td>4.160251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Australia</td>\n",
       "      <td>135</td>\n",
       "      <td>whitecool</td>\n",
       "      <td>404</td>\n",
       "      <td>9914</td>\n",
       "      <td>no</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.777228</td>\n",
       "      <td>15.622084</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Australia</td>\n",
       "      <td>211</td>\n",
       "      <td>Pacifier3750</td>\n",
       "      <td>453</td>\n",
       "      <td>9863</td>\n",
       "      <td>no</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.580574</td>\n",
       "      <td>12.356818</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73410</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Uzbekistan</td>\n",
       "      <td>1045</td>\n",
       "      <td>Frag.</td>\n",
       "      <td>156</td>\n",
       "      <td>9781</td>\n",
       "      <td>no</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.160256</td>\n",
       "      <td>14.491598</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73411</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Uzbekistan</td>\n",
       "      <td>1119</td>\n",
       "      <td>SMEAGOOL</td>\n",
       "      <td>180</td>\n",
       "      <td>9770</td>\n",
       "      <td>no</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.944444</td>\n",
       "      <td>12.671052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73414</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Viet Nam</td>\n",
       "      <td>655</td>\n",
       "      <td>HaoThien</td>\n",
       "      <td>325</td>\n",
       "      <td>9868</td>\n",
       "      <td>no</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.824615</td>\n",
       "      <td>14.865965</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73415</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Viet Nam</td>\n",
       "      <td>940</td>\n",
       "      <td>captainstef</td>\n",
       "      <td>401</td>\n",
       "      <td>9800</td>\n",
       "      <td>no</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.498753</td>\n",
       "      <td>9.987523</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73416</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Viet Nam</td>\n",
       "      <td>1381</td>\n",
       "      <td>__cAnh</td>\n",
       "      <td>292</td>\n",
       "      <td>9734</td>\n",
       "      <td>no</td>\n",
       "      <td>3.0</td>\n",
       "      <td>0.458904</td>\n",
       "      <td>7.841757</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>4956 rows × 10 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                season     country  rank             name  matches   mmr  \\\n",
       "0      M2_01 Wolf 2020   Argentina   174         Srchenko      393  9887   \n",
       "1      M2_01 Wolf 2020   Argentina   471          eltroco      339  9758   \n",
       "2      M2_01 Wolf 2020   Argentina   846  MorgannFreemann      208  9660   \n",
       "4      M2_01 Wolf 2020   Australia   135        whitecool      404  9914   \n",
       "5      M2_01 Wolf 2020   Australia   211     Pacifier3750      453  9863   \n",
       "...                ...         ...   ...              ...      ...   ...   \n",
       "73410  M4_02 Love 2022  Uzbekistan  1045            Frag.      156  9781   \n",
       "73411  M4_02 Love 2022  Uzbekistan  1119         SMEAGOOL      180  9770   \n",
       "73414  M4_02 Love 2022    Viet Nam   655         HaoThien      325  9868   \n",
       "73415  M4_02 Love 2022    Viet Nam   940      captainstef      401  9800   \n",
       "73416  M4_02 Love 2022    Viet Nam  1381           __cAnh      292  9734   \n",
       "\n",
       "      previous_top500  national_rank  efficiency        lei  \n",
       "0                  no            1.0    0.730280  14.477235  \n",
       "1                  no            2.0    0.466077   8.581382  \n",
       "2                  no            3.0    0.288462   4.160251  \n",
       "4                  no            1.0    0.777228  15.622084  \n",
       "5                  no            2.0    0.580574  12.356818  \n",
       "...               ...            ...         ...        ...  \n",
       "73410              no            2.0    1.160256  14.491598  \n",
       "73411              no            3.0    0.944444  12.671052  \n",
       "73414              no            1.0    0.824615  14.865965  \n",
       "73415              no            2.0    0.498753   9.987523  \n",
       "73416              no            3.0    0.458904   7.841757  \n",
       "\n",
       "[4956 rows x 10 columns]"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "num_player_included = 3   # Here we define the number of players in Pro Ladder to be considered\n",
    "\n",
    "players_per_country = full_df.groupby(['season','country']).agg(\n",
    "    num_players = pd.NamedAgg('name', 'count')\n",
    ").reset_index()\n",
    "\n",
    "players_per_country = players_per_country[players_per_country['num_players'] >= num_player_included]\n",
    "\n",
    "top_per_country = pd.merge(players_per_country[['season', 'country']], full_df, how='inner', on=['season', 'country'])\n",
    "top_per_country = top_per_country[top_per_country['national_rank'] <= num_player_included]\n",
    "top_per_country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>mean_mmr</th>\n",
       "      <th>total_mmr</th>\n",
       "      <th>mean_matches_per_player</th>\n",
       "      <th>total_matches</th>\n",
       "      <th>nation_rank</th>\n",
       "      <th>efficiency</th>\n",
       "      <th>lei</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Argentina</td>\n",
       "      <td>9768.333333</td>\n",
       "      <td>29305</td>\n",
       "      <td>313.333333</td>\n",
       "      <td>940</td>\n",
       "      <td>34.0</td>\n",
       "      <td>0.537234</td>\n",
       "      <td>9.509700</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Australia</td>\n",
       "      <td>9869.666667</td>\n",
       "      <td>29609</td>\n",
       "      <td>336.333333</td>\n",
       "      <td>1009</td>\n",
       "      <td>22.0</td>\n",
       "      <td>0.801784</td>\n",
       "      <td>14.704231</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Austria</td>\n",
       "      <td>9905.666667</td>\n",
       "      <td>29717</td>\n",
       "      <td>392.333333</td>\n",
       "      <td>1177</td>\n",
       "      <td>19.0</td>\n",
       "      <td>0.779099</td>\n",
       "      <td>15.431938</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Belarus</td>\n",
       "      <td>10064.333333</td>\n",
       "      <td>30193</td>\n",
       "      <td>390.000000</td>\n",
       "      <td>1170</td>\n",
       "      <td>11.0</td>\n",
       "      <td>1.190598</td>\n",
       "      <td>23.512432</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>Belgium</td>\n",
       "      <td>9901.000000</td>\n",
       "      <td>29703</td>\n",
       "      <td>415.666667</td>\n",
       "      <td>1247</td>\n",
       "      <td>21.0</td>\n",
       "      <td>0.724138</td>\n",
       "      <td>14.763655</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1647</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Ukraine</td>\n",
       "      <td>10458.000000</td>\n",
       "      <td>31374</td>\n",
       "      <td>748.666667</td>\n",
       "      <td>2246</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.146037</td>\n",
       "      <td>31.357616</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1648</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>10223.666667</td>\n",
       "      <td>30671</td>\n",
       "      <td>692.000000</td>\n",
       "      <td>2076</td>\n",
       "      <td>12.0</td>\n",
       "      <td>0.901252</td>\n",
       "      <td>23.708249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1649</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>United States</td>\n",
       "      <td>10201.333333</td>\n",
       "      <td>30604</td>\n",
       "      <td>562.666667</td>\n",
       "      <td>1688</td>\n",
       "      <td>14.0</td>\n",
       "      <td>1.068720</td>\n",
       "      <td>25.350684</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1650</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Uzbekistan</td>\n",
       "      <td>9810.333333</td>\n",
       "      <td>29431</td>\n",
       "      <td>220.333333</td>\n",
       "      <td>661</td>\n",
       "      <td>53.0</td>\n",
       "      <td>0.954614</td>\n",
       "      <td>14.169940</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1651</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>Viet Nam</td>\n",
       "      <td>9800.666667</td>\n",
       "      <td>29402</td>\n",
       "      <td>339.333333</td>\n",
       "      <td>1018</td>\n",
       "      <td>56.0</td>\n",
       "      <td>0.591356</td>\n",
       "      <td>10.893363</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1652 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               season         country      mean_mmr  total_mmr  \\\n",
       "0     M2_01 Wolf 2020       Argentina   9768.333333      29305   \n",
       "1     M2_01 Wolf 2020       Australia   9869.666667      29609   \n",
       "2     M2_01 Wolf 2020         Austria   9905.666667      29717   \n",
       "3     M2_01 Wolf 2020         Belarus  10064.333333      30193   \n",
       "4     M2_01 Wolf 2020         Belgium   9901.000000      29703   \n",
       "...               ...             ...           ...        ...   \n",
       "1647  M4_02 Love 2022         Ukraine  10458.000000      31374   \n",
       "1648  M4_02 Love 2022  United Kingdom  10223.666667      30671   \n",
       "1649  M4_02 Love 2022   United States  10201.333333      30604   \n",
       "1650  M4_02 Love 2022      Uzbekistan   9810.333333      29431   \n",
       "1651  M4_02 Love 2022        Viet Nam   9800.666667      29402   \n",
       "\n",
       "      mean_matches_per_player  total_matches  nation_rank  efficiency  \\\n",
       "0                  313.333333            940         34.0    0.537234   \n",
       "1                  336.333333           1009         22.0    0.801784   \n",
       "2                  392.333333           1177         19.0    0.779099   \n",
       "3                  390.000000           1170         11.0    1.190598   \n",
       "4                  415.666667           1247         21.0    0.724138   \n",
       "...                       ...            ...          ...         ...   \n",
       "1647               748.666667           2246          3.0    1.146037   \n",
       "1648               692.000000           2076         12.0    0.901252   \n",
       "1649               562.666667           1688         14.0    1.068720   \n",
       "1650               220.333333            661         53.0    0.954614   \n",
       "1651               339.333333           1018         56.0    0.591356   \n",
       "\n",
       "            lei  \n",
       "0      9.509700  \n",
       "1     14.704231  \n",
       "2     15.431938  \n",
       "3     23.512432  \n",
       "4     14.763655  \n",
       "...         ...  \n",
       "1647  31.357616  \n",
       "1648  23.708249  \n",
       "1649  25.350684  \n",
       "1650  14.169940  \n",
       "1651  10.893363  \n",
       "\n",
       "[1652 rows x 9 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_stats = top_per_country.groupby(['season','country']).agg(\n",
    "    mean_mmr                = pd.NamedAgg('mmr', 'mean'),\n",
    "    total_mmr               = pd.NamedAgg('mmr', 'sum'),\n",
    "    mean_matches_per_player = pd.NamedAgg('matches', 'mean'),\n",
    "    total_matches           = pd.NamedAgg('matches', 'sum')\n",
    ").reset_index()\n",
    "\n",
    "# Here we add the same ranking and effeciency statistics for national teams as for individual players\n",
    "top_stats['nation_rank'] = top_stats.groupby(['season'])[\"mean_mmr\"].rank(\"first\", ascending=False)\n",
    "top_stats['efficiency'] = ((top_stats['mean_mmr']-9600))/top_stats['mean_matches_per_player']\n",
    "top_stats['lei'] = ((top_stats['mean_mmr']-9600))/np.sqrt(top_stats['mean_matches_per_player'])\n",
    "\n",
    "top_stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>country</th>\n",
       "      <th>mean_mmr</th>\n",
       "      <th>total_mmr</th>\n",
       "      <th>mean_matches_per_player</th>\n",
       "      <th>total_matches</th>\n",
       "      <th>nation_rank</th>\n",
       "      <th>efficiency</th>\n",
       "      <th>lei</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>432</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>China</td>\n",
       "      <td>10489.333333</td>\n",
       "      <td>31468</td>\n",
       "      <td>409.000000</td>\n",
       "      <td>1227</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.174409</td>\n",
       "      <td>43.974703</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>466</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Russian Federation</td>\n",
       "      <td>10479.666667</td>\n",
       "      <td>31439</td>\n",
       "      <td>636.000000</td>\n",
       "      <td>1908</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.383124</td>\n",
       "      <td>34.881052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>463</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Poland</td>\n",
       "      <td>10439.333333</td>\n",
       "      <td>31318</td>\n",
       "      <td>657.000000</td>\n",
       "      <td>1971</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.277524</td>\n",
       "      <td>32.745512</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>440</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Germany</td>\n",
       "      <td>10399.666667</td>\n",
       "      <td>31199</td>\n",
       "      <td>440.000000</td>\n",
       "      <td>1320</td>\n",
       "      <td>4.0</td>\n",
       "      <td>1.817424</td>\n",
       "      <td>38.122613</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>450</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Japan</td>\n",
       "      <td>10345.000000</td>\n",
       "      <td>31035</td>\n",
       "      <td>708.666667</td>\n",
       "      <td>2126</td>\n",
       "      <td>5.0</td>\n",
       "      <td>1.051270</td>\n",
       "      <td>27.985642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>446</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Iran, Islamic Republic of</td>\n",
       "      <td>9787.000000</td>\n",
       "      <td>29361</td>\n",
       "      <td>264.000000</td>\n",
       "      <td>792</td>\n",
       "      <td>60.0</td>\n",
       "      <td>0.708333</td>\n",
       "      <td>11.509054</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>431</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Chile</td>\n",
       "      <td>9782.666667</td>\n",
       "      <td>29348</td>\n",
       "      <td>310.000000</td>\n",
       "      <td>930</td>\n",
       "      <td>61.0</td>\n",
       "      <td>0.589247</td>\n",
       "      <td>10.374770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>469</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Singapore</td>\n",
       "      <td>9772.666667</td>\n",
       "      <td>29318</td>\n",
       "      <td>325.000000</td>\n",
       "      <td>975</td>\n",
       "      <td>62.0</td>\n",
       "      <td>0.531282</td>\n",
       "      <td>9.577823</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>483</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Venezuela, Bolivarian Republic of</td>\n",
       "      <td>9766.000000</td>\n",
       "      <td>29298</td>\n",
       "      <td>271.333333</td>\n",
       "      <td>814</td>\n",
       "      <td>63.0</td>\n",
       "      <td>0.611794</td>\n",
       "      <td>10.077586</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>471</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>Slovenia</td>\n",
       "      <td>9743.666667</td>\n",
       "      <td>29231</td>\n",
       "      <td>273.333333</td>\n",
       "      <td>820</td>\n",
       "      <td>64.0</td>\n",
       "      <td>0.525610</td>\n",
       "      <td>8.689799</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>64 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                  season                            country      mean_mmr  \\\n",
       "432  M2_08 Draconid 2020                              China  10489.333333   \n",
       "466  M2_08 Draconid 2020                 Russian Federation  10479.666667   \n",
       "463  M2_08 Draconid 2020                             Poland  10439.333333   \n",
       "440  M2_08 Draconid 2020                            Germany  10399.666667   \n",
       "450  M2_08 Draconid 2020                              Japan  10345.000000   \n",
       "..                   ...                                ...           ...   \n",
       "446  M2_08 Draconid 2020          Iran, Islamic Republic of   9787.000000   \n",
       "431  M2_08 Draconid 2020                              Chile   9782.666667   \n",
       "469  M2_08 Draconid 2020                          Singapore   9772.666667   \n",
       "483  M2_08 Draconid 2020  Venezuela, Bolivarian Republic of   9766.000000   \n",
       "471  M2_08 Draconid 2020                           Slovenia   9743.666667   \n",
       "\n",
       "     total_mmr  mean_matches_per_player  total_matches  nation_rank  \\\n",
       "432      31468               409.000000           1227          1.0   \n",
       "466      31439               636.000000           1908          2.0   \n",
       "463      31318               657.000000           1971          3.0   \n",
       "440      31199               440.000000           1320          4.0   \n",
       "450      31035               708.666667           2126          5.0   \n",
       "..         ...                      ...            ...          ...   \n",
       "446      29361               264.000000            792         60.0   \n",
       "431      29348               310.000000            930         61.0   \n",
       "469      29318               325.000000            975         62.0   \n",
       "483      29298               271.333333            814         63.0   \n",
       "471      29231               273.333333            820         64.0   \n",
       "\n",
       "     efficiency        lei  \n",
       "432    2.174409  43.974703  \n",
       "466    1.383124  34.881052  \n",
       "463    1.277524  32.745512  \n",
       "440    1.817424  38.122613  \n",
       "450    1.051270  27.985642  \n",
       "..          ...        ...  \n",
       "446    0.708333  11.509054  \n",
       "431    0.589247  10.374770  \n",
       "469    0.531282   9.577823  \n",
       "483    0.611794  10.077586  \n",
       "471    0.525610   8.689799  \n",
       "\n",
       "[64 rows x 9 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "top_stats[top_stats['season'] == 'M2_08 Draconid 2020'].sort_values('nation_rank')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Export everything\n",
    "\n",
    "Lets exports all data to Excel files so they can be further analysed without pandas/python. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "full_df.to_excel('./output/player_stats.xlsx')\n",
    "national_stats.to_excel('./output/national_stats.xlsx')\n",
    "per_season_df.to_excel('./output/seasonal_stats.xlsx')\n",
    "player_summaries.to_excel('./output/player_summaries.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>season</th>\n",
       "      <th>previous_top500</th>\n",
       "      <th>count</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>M2_02 Love 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>304</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>M2_03 Bear 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>313</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>M2_04 Elf 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>M2_05 Viper 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>M2_06 Magic 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>314</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>M2_07 Griffin 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>M2_08 Draconid 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>M2_09 Dryad 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>264</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>M2_10 Cat 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>M2_11 Mahakam 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>M2_12 Wild Hunt 2020</td>\n",
       "      <td>yes</td>\n",
       "      <td>242</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>M3_01 Wolf 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>267</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>M3_02 Love 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>M3_03 Bear 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>M3_04 Elf 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>266</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>M3_05 Viper 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>284</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>M3_06 Magic 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>270</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>M3_07 Griffin 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>257</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>M3_08 Draconid 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>M3_09 Dryad 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>258</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>M3_10 Cat 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>210</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>M3_11 Mahakam 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>M3_12 Wild Hunt 2021</td>\n",
       "      <td>yes</td>\n",
       "      <td>249</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>M4_01 Wolf 2022</td>\n",
       "      <td>yes</td>\n",
       "      <td>299</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>M4_02 Love 2022</td>\n",
       "      <td>yes</td>\n",
       "      <td>275</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  season previous_top500  count\n",
       "2        M2_02 Love 2020             yes    304\n",
       "4        M2_03 Bear 2020             yes    313\n",
       "6         M2_04 Elf 2020             yes    294\n",
       "8       M2_05 Viper 2020             yes    317\n",
       "10      M2_06 Magic 2020             yes    314\n",
       "12    M2_07 Griffin 2020             yes    277\n",
       "14   M2_08 Draconid 2020             yes    299\n",
       "16      M2_09 Dryad 2020             yes    264\n",
       "18        M2_10 Cat 2020             yes    277\n",
       "20    M2_11 Mahakam 2020             yes    266\n",
       "22  M2_12 Wild Hunt 2020             yes    242\n",
       "24       M3_01 Wolf 2021             yes    267\n",
       "26       M3_02 Love 2021             yes    286\n",
       "28       M3_03 Bear 2021             yes    286\n",
       "30        M3_04 Elf 2021             yes    266\n",
       "32      M3_05 Viper 2021             yes    284\n",
       "34      M3_06 Magic 2021             yes    270\n",
       "36    M3_07 Griffin 2021             yes    257\n",
       "38   M3_08 Draconid 2021             yes    230\n",
       "40      M3_09 Dryad 2021             yes    258\n",
       "42        M3_10 Cat 2021             yes    210\n",
       "44    M3_11 Mahakam 2021             yes    233\n",
       "46  M3_12 Wild Hunt 2021             yes    249\n",
       "48       M4_01 Wolf 2022             yes    299\n",
       "50       M4_02 Love 2022             yes    275"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "bracket = 500\n",
    "\n",
    "returning = full_df[full_df['rank'] <= bracket].groupby(['season', 'previous_top500']).agg(\n",
    "    count = pd.NamedAgg('name', 'count')\n",
    ").reset_index()\n",
    "returning = returning[returning.previous_top500 == 'yes']\n",
    "returning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "54.695999999999984"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "returning['%'] = returning['count'] * 100/bracket\n",
    "np.mean(returning['%'])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}