{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Getting stats for more players\n", "\n", "This is a quick version that will only check the top500 players from last season to estimate the total number of pro players. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from tqdm import tqdm\n", "import requests\n", "from bs4 import BeautifulSoup\n", "import os\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>rank</th>\n", " <th>name</th>\n", " <th>country</th>\n", " <th>matches</th>\n", " <th>mmr</th>\n", " <th>season</th>\n", " <th>previous_top500</th>\n", " <th>national_rank</th>\n", " <th>efficiency</th>\n", " <th>lei</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>kolemoen</td>\n", " <td>Germany</td>\n", " <td>431</td>\n", " <td>10484</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>1</td>\n", " <td>2.051044</td>\n", " <td>42.580782</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>kams134</td>\n", " <td>Poland</td>\n", " <td>923</td>\n", " <td>10477</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>1</td>\n", " <td>0.950163</td>\n", " <td>28.866807</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>TailBot</td>\n", " <td>Poland</td>\n", " <td>538</td>\n", " <td>10472</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>2</td>\n", " <td>1.620818</td>\n", " <td>37.594590</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Pajabol</td>\n", " <td>Poland</td>\n", " <td>820</td>\n", " <td>10471</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>3</td>\n", " <td>1.062195</td>\n", " <td>30.416639</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Adzikov</td>\n", " <td>Poland</td>\n", " <td>1105</td>\n", " <td>10442</td>\n", " <td>M2_01 Wolf 2020</td>\n", " <td>no</td>\n", " <td>4</td>\n", " <td>0.761991</td>\n", " <td>25.329753</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " rank name country matches mmr season previous_top500 \\\n", "0 1 kolemoen Germany 431 10484 M2_01 Wolf 2020 no \n", "1 2 kams134 Poland 923 10477 M2_01 Wolf 2020 no \n", "2 3 TailBot Poland 538 10472 M2_01 Wolf 2020 no \n", "3 4 Pajabol Poland 820 10471 M2_01 Wolf 2020 no \n", "4 5 Adzikov Poland 1105 10442 M2_01 Wolf 2020 no \n", "\n", " national_rank efficiency lei \n", "0 1 2.051044 42.580782 \n", "1 1 0.950163 28.866807 \n", "2 2 1.620818 37.594590 \n", "3 3 1.062195 30.416639 \n", "4 4 0.761991 25.329753 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Read list of players\n", "players_df = pd.read_excel('./output/player_stats.xlsx').drop(columns=['Unnamed: 0'])\n", "players_df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "./output/season_of_the_wolf_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_love_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_bear_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_elf_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_viper_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_magic_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_griffin_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_draconid_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_dryad_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_cat_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_mahakam_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_wild_hunt_2020_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_wolf_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_love_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_bear_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_elf_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_viper_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_magic_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_griffin_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_draconid_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_dryad_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_cat_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_mahakam_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_wild_hunt_2021_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_the_wolf_2022_extra.xlsx exists, loading file instead of downloading ...\n", "./output/season_of_love_2022_extra.xlsx exists, loading file instead of downloading ...\n" ] } ], "source": [ "all_players = []\n", "seasons = [\n", " ('M2_01 Wolf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2020_extra.xlsx'),\n", " ('M2_02 Love 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-love/1/1/{user}', './output/season_of_love_2020_extra.xlsx'),\n", " ('M2_03 Bear 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2020_extra.xlsx'),\n", " ('M2_04 Elf 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2020_extra.xlsx'),\n", " ('M2_05 Viper 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2020_extra.xlsx'),\n", " ('M2_06 Magic 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-magic/1/1/{user}', './output/season_of_magic_2020_extra.xlsx'),\n", " ('M2_07 Griffin 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-griffin/1/1/{user}', './output/season_of_the_griffin_2020_extra.xlsx'),\n", " ('M2_08 Draconid 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2020_extra.xlsx'),\n", " ('M2_09 Dryad 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2020_extra.xlsx'),\n", " ('M2_10 Cat 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2020_extra.xlsx'),\n", " ('M2_11 Mahakam 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-mahakam/1/1/{user}', './output/season_of_the_mahakam_2020_extra.xlsx'),\n", " ('M2_12 Wild Hunt 2020', 'https://masters.playgwent.com/en/rankings/masters-2/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2020_extra.xlsx'),\n", " ('M3_01 Wolf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2021_extra.xlsx'),\n", " ('M3_02 Love 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-love/1/1/{user}', './output/season_of_love_2021_extra.xlsx'),\n", " ('M3_03 Bear 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-bear/1/1/{user}', './output/season_of_the_bear_2021_extra.xlsx'),\n", " ('M3_04 Elf 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-elf/1/1/{user}', './output/season_of_the_elf_2021_extra.xlsx'),\n", " ('M3_05 Viper 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-viper/1/1/{user}', './output/season_of_the_viper_2021_extra.xlsx'),\n", " ('M3_06 Magic 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-magic/1/1/{user}', './output/season_of_magic_2021_extra.xlsx'),\n", " ('M3_07 Griffin 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-griffin/1/1/{user}', './output/season_of_griffin_2021_extra.xlsx'),\n", " ('M3_08 Draconid 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-draconid/1/1/{user}', './output/season_of_the_draconid_2021_extra.xlsx'),\n", " ('M3_09 Dryad 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-dryad/1/1/{user}', './output/season_of_the_dryad_2021_extra.xlsx'),\n", " ('M3_10 Cat 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-cat/1/1/{user}', './output/season_of_the_cat_2021_extra.xlsx'),\n", " ('M3_11 Mahakam 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-mahakam/1/1/{user}', './output/season_of_the_mahakam_2021_extra.xlsx'),\n", " ('M3_12 Wild Hunt 2021', 'https://masters.playgwent.com/en/rankings/masters-3/season-of-the-wild-hunt/1/1/{user}', './output/season_of_the_wild_hunt_2021_extra.xlsx'),\n", " ('M4_01 Wolf 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-wolf/1/1/{user}', './output/season_of_the_wolf_2022_extra.xlsx'),\n", " ('M4_02 Love 2022', 'https://masters.playgwent.com/en/rankings/masters-4/season-of-the-love/1/1/{user}', './output/season_of_love_2022_extra.xlsx'),\n", "\n", "]\n", "\n", "for season, url_template, output_path in seasons:\n", " if os.path.exists(output_path):\n", " print(f\"{output_path} exists, loading file instead of downloading ...\")\n", " df = pd.read_excel(output_path).drop(['Unnamed: 0'], axis=1)\n", " all_players = players_df[(players_df.season == season) & (players_df['rank'] <=500)].name.unique()\n", " else:\n", " output = []\n", " known_players = players_df[players_df.season == season].name.values\n", " unknown_players = [n for n in all_players if n not in known_players]\n", " \n", " for player in tqdm(unknown_players): \n", " url = url_template.replace('{user}', str(player))\n", " try:\n", " r = requests.get(url)\n", " soup = BeautifulSoup(r.text, 'html.parser')\n", " rows = soup.find_all(\"div\", {\"class\": \"c-ranking__inner-frame-found\"})\n", " for row in rows[:1]:\n", " flag = row.find(\"i\", {\"class\": \"flag-icon\"})[\"class\"][1]\n", " new_record = {\n", " 'rank': int(row.find(\"div\", {\"class\": \"td-number\"}).text.strip()),\n", " 'name': row.find(\"div\", {\"class\": \"td-nick\"}).text.strip(),\n", " 'country': flag.replace('flag-icon-', '').upper(),\n", " 'matches': int(row.find(\"div\", {\"class\": \"td-matches\"}).text.strip().replace(' matches', '')),\n", " 'mmr': int(row.find(\"div\", {\"class\": \"td-mmr\"}).text.strip().replace(',', '')),\n", " 'season': season\n", " }\n", " if 0 < new_record['matches']:\n", " output.append(new_record)\n", " except:\n", " pass\n", " \n", " df = pd.DataFrame(output).drop_duplicates()\n", " df.to_excel(output_path)\n", " all_players = players_df[(players_df.season == season) & (players_df['rank'] <= 500)].name.unique()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }