{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Getting stats for more players\n",
    "\n",
    "This is a quick version that will only check the top500 players from last season to estimate the total number of pro players. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tqdm import tqdm\n",
    "import requests\n",
    "from bs4 import BeautifulSoup\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>rank</th>\n",
       "      <th>name</th>\n",
       "      <th>country</th>\n",
       "      <th>matches</th>\n",
       "      <th>mmr</th>\n",
       "      <th>season</th>\n",
       "      <th>previous_top500</th>\n",
       "      <th>national_rank</th>\n",
       "      <th>efficiency</th>\n",
       "      <th>lei</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>kolemoen</td>\n",
       "      <td>Germany</td>\n",
       "      <td>431</td>\n",
       "      <td>10484</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "      <td>2.051044</td>\n",
       "      <td>42.580782</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>kams134</td>\n",
       "      <td>Poland</td>\n",
       "      <td>923</td>\n",
       "      <td>10477</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>1</td>\n",
       "      <td>0.950163</td>\n",
       "      <td>28.866807</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>TailBot</td>\n",
       "      <td>Poland</td>\n",
       "      <td>538</td>\n",
       "      <td>10472</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>2</td>\n",
       "      <td>1.620818</td>\n",
       "      <td>37.594590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Pajabol</td>\n",
       "      <td>Poland</td>\n",
       "      <td>820</td>\n",
       "      <td>10471</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>3</td>\n",
       "      <td>1.062195</td>\n",
       "      <td>30.416639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Adzikov</td>\n",
       "      <td>Poland</td>\n",
       "      <td>1105</td>\n",
       "      <td>10442</td>\n",
       "      <td>M2_01 Wolf 2020</td>\n",
       "      <td>no</td>\n",
       "      <td>4</td>\n",
       "      <td>0.761991</td>\n",
       "      <td>25.329753</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   rank      name  country  matches    mmr           season previous_top500  \\\n",
       "0     1  kolemoen  Germany      431  10484  M2_01 Wolf 2020              no   \n",
       "1     2   kams134   Poland      923  10477  M2_01 Wolf 2020              no   \n",
       "2     3   TailBot   Poland      538  10472  M2_01 Wolf 2020              no   \n",
       "3     4   Pajabol   Poland      820  10471  M2_01 Wolf 2020              no   \n",
       "4     5   Adzikov   Poland     1105  10442  M2_01 Wolf 2020              no   \n",
       "\n",
       "   national_rank  efficiency        lei  \n",
       "0              1    2.051044  42.580782  \n",
       "1              1    0.950163  28.866807  \n",
       "2              2    1.620818  37.594590  \n",
       "3              3    1.062195  30.416639  \n",
       "4              4    0.761991  25.329753  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Read list of players\n",
    "players_df = pd.read_excel('./output/player_stats.xlsx').drop(columns=['Unnamed: 0'])\n",
    "players_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "./output/season_of_the_wolf_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_love_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_bear_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_elf_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_viper_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_magic_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_griffin_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_draconid_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_dryad_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_cat_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_mahakam_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_wild_hunt_2020_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_wolf_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_love_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_bear_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_elf_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_viper_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_magic_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_griffin_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_draconid_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_dryad_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_cat_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_mahakam_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_wild_hunt_2021_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_the_wolf_2022_extra.xlsx exists, loading file instead of downloading ...\n",
      "./output/season_of_love_2022_extra.xlsx exists, loading file instead of downloading ...\n"
     ]
    }
   ],
   "source": [
    "all_players = []\n",
    "seasons = [\n",
    "    ('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2020_extra.xlsx'),\n",
    "    ('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/1/{user}', './output/season_of_love_2020_extra.xlsx'),\n",
    "    ('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2020_extra.xlsx'),\n",
    "    ('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2020_extra.xlsx'),\n",
    "    ('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2020_extra.xlsx'),\n",
    "    ('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/1/{user}', './output/season_of_magic_2020_extra.xlsx'),\n",
    "    ('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/1/{user}', './output/season_of_the_griffin_2020_extra.xlsx'),\n",
    "    ('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2020_extra.xlsx'),\n",
    "    ('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2020_extra.xlsx'),\n",
    "    ('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2020_extra.xlsx'),\n",
    "    ('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/1/{user}', './output/season_of_the_mahakam_2020_extra.xlsx'),\n",
    "    ('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2020_extra.xlsx'),\n",
    "    ('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2021_extra.xlsx'),\n",
    "    ('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/1/{user}', './output/season_of_love_2021_extra.xlsx'),\n",
    "    ('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2021_extra.xlsx'),\n",
    "    ('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2021_extra.xlsx'),\n",
    "    ('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2021_extra.xlsx'),\n",
    "    ('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/1/{user}', './output/season_of_magic_2021_extra.xlsx'),\n",
    "    ('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/1/{user}', './output/season_of_griffin_2021_extra.xlsx'),\n",
    "    ('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2021_extra.xlsx'),\n",
    "    ('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2021_extra.xlsx'),\n",
    "    ('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2021_extra.xlsx'),\n",
    "    ('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/1/{user}', './output/season_of_the_mahakam_2021_extra.xlsx'),\n",
    "    ('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2021_extra.xlsx'),\n",
    "    ('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2022_extra.xlsx'),\n",
    "    ('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/1/{user}', './output/season_of_love_2022_extra.xlsx'),\n",
    "\n",
    "]\n",
    "\n",
    "for season, url_template, output_path in seasons:\n",
    "    if os.path.exists(output_path):\n",
    "        print(f\"{output_path} exists, loading file instead of downloading ...\")\n",
    "        df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)\n",
    "        all_players = players_df[(players_df.season == season) & (players_df['rank'] <=500)].name.unique()\n",
    "    else:\n",
    "        output = []\n",
    "        known_players = players_df[players_df.season == season].name.values\n",
    "        unknown_players = [n for n in all_players if n not in known_players]\n",
    "        \n",
    "        for player in tqdm(unknown_players):       \n",
    "            url = url_template.replace('{user}', str(player))\n",
    "            try:\n",
    "                r = requests.get(url)\n",
    "                soup = BeautifulSoup(r.text, 'html.parser')\n",
    "                rows = soup.find_all(\"div\", {\"class\": \"c-ranking__inner-frame-found\"})\n",
    "                for row in rows[:1]:\n",
    "                    flag = row.find(\"i\", {\"class\": \"flag-icon\"})[\"class\"][1]\n",
    "                    new_record = {\n",
    "                        'rank': int(row.find(\"div\", {\"class\": \"td-number\"}).text.strip()),\n",
    "                        'name': row.find(\"div\", {\"class\": \"td-nick\"}).text.strip(),\n",
    "                        'country': flag.replace('flag-icon-', '').upper(),\n",
    "                        'matches': int(row.find(\"div\", {\"class\": \"td-matches\"}).text.strip().replace(' matches', '')),\n",
    "                        'mmr': int(row.find(\"div\", {\"class\": \"td-mmr\"}).text.strip().replace(',', '')),\n",
    "                        'season': season\n",
    "                    }\n",
    "                    if 0 < new_record['matches']:\n",
    "                        output.append(new_record)\n",
    "            except:\n",
    "                pass\n",
    "            \n",
    "        df = pd.DataFrame(output).drop_duplicates()\n",
    "        df.to_excel(output_path)\n",
    "        all_players = players_df[(players_df.season == season) & (players_df['rank'] <= 500)].name.unique()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}