# Scraping all expert draft rankings

Start out by first scraping the FP expert list, and then using that list to scrape each expert's rankings.

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime

In [2]:
TODAYS_DATE = datetime.date.today().strftime("%Y_%m_%d")
SCORING_TYPE = 'ppr'

## Scrape the FP expert list

In [4]:
if SCORING_TYPE == 'ppr':
 fp_url = 'https://www.fantasypros.com/nfl/rankings/ppr-cheatsheets.php'
elif SCORING_TYPE == 'standard':
 fp_url = 'https://www.fantasypros.com/nfl/rankings/consensus-cheatsheets.php'
r = requests.get(fp_url)

In [5]:
soup = BeautifulSoup(r.text,'lxml')
expert_table = soup.find('table',{'id':'experts'})

In [6]:
experts = []
rows = expert_table.findAll('tr')
for tr in rows:
 columns = tr.findAll('td')
 if len(columns) > 0:
 expert = {}
 expert['expert_id'] = columns[0].find('input').get('value')
 expert['name'] = columns[1].text.strip()
 expert['source'] = columns[2].text.strip()
 expert['in_season_rank'] = columns[3].text.strip('#').strip()
 expert['draft_rank'] = columns[4].text.strip('#').strip()
 expert['date'] = columns[5].text.strip()
 experts.append(expert)
print(len(experts))
print(experts[:5])

87
[{'expert_id': '3', 'source': 'ESPN', 'draft_rank': '108', 'in_season_rank': '104', 'name': 'Eric Karabell', 'date': '2017-08-23 12:00:008/23'}, {'expert_id': '5', 'source': 'ESPN', 'draft_rank': '', 'in_season_rank': '', 'name': 'Staff Composite', 'date': '2017-08-23 12:00:008/23'}, {'expert_id': '7', 'source': 'Yahoo! Sports', 'draft_rank': '69', 'in_season_rank': '65', 'name': 'Andy Behrens', 'date': '2017-08-25 10:51:138/25'}, {'expert_id': '9', 'source': 'Yahoo! Sports', 'draft_rank': '28', 'in_season_rank': '66', 'name': 'Scott Pianowski', 'date': '2017-08-25 02:21:318/24'}, {'expert_id': '15', 'source': 'ScoutFantasy', 'draft_rank': '76', 'in_season_rank': '8', 'name': 'Staff Rankings', 'date': '2017-08-23 07:47:338/23'}]


In [7]:
expert_df = pd.DataFrame(experts)
print(expert_df.head())

 date draft_rank expert_id in_season_rank \
0 2017-08-23 12:00:008/23 108 3 104 
1 2017-08-23 12:00:008/23 5 
2 2017-08-25 10:51:138/25 69 7 65 
3 2017-08-25 02:21:318/24 28 9 66 
4 2017-08-23 07:47:338/23 76 15 8 

 name source 
0 Eric Karabell ESPN 
1 Staff Composite ESPN 
2 Andy Behrens Yahoo! Sports 
3 Scott Pianowski Yahoo! Sports 
4 Staff Rankings ScoutFantasy 


Need to clean up that messy date field

In [8]:
expert_df['date'] = expert_df['date'].str.split(' ').str.get(0)
print(expert_df.head())

 date draft_rank expert_id in_season_rank name \
0 2017-08-23 108 3 104 Eric Karabell 
1 2017-08-23 5 Staff Composite 
2 2017-08-25 69 7 65 Andy Behrens 
3 2017-08-25 28 9 66 Scott Pianowski 
4 2017-08-23 76 15 8 Staff Rankings 

 source 
0 ESPN 
1 ESPN 
2 Yahoo! Sports 
3 Yahoo! Sports 
4 ScoutFantasy 


In [19]:
expert_list_file = 'data/fp_experts_{}_{}.tsv'.format(SCORING_TYPE, TODAYS_DATE)
expert_df.to_csv(expert_list_file, sep='\t', index=False)

## Scrape each individual FP expert ranking

In [10]:
expert_ids = expert_df['expert_id'].tolist()
print(len(expert_ids))

87


Here's the function we'll use to scrape each expert's ranking.

In [11]:
def get_expert_rankings(expert_id,score_type):
 return_rows = []
 
 payload = {"source":'2',
 "id":expert_id,
 "year":"2017",
 "position":"ALL",
 "scoring":score_type, #PPR or STD
 "week":"0",
 "ajax":"true"}
 widget_url = "https://partners.fantasypros.com/external/widget/nfl-staff-rankings.php"
 r = requests.get(widget_url, params=payload)
 
 soup = BeautifulSoup(r.content,'lxml')
 ranking_table = soup.find('table')
 if ranking_table:
 rows = ranking_table.findAll('tr')
 for tr in rows:
 columns = tr.findAll('td')
 if len(columns) > 0:
 return_row = {}
 return_row['rank'] = int(columns[0].text.strip())
 return_row['player_name'] = columns[1].find('a').text
 rest = columns[1].find('small').text
 if "-" in rest:
 rest_split = rest.split(',')
 return_row['position'], return_row['team'] = rest_split[0].split(" - ")
 else:
 return_row['position'] = 'D/ST'
 return_row['team'] = return_row['player_name']
 return_row['expert_id'] = expert_id
 return_rows.append(return_row)
 return return_rows

In [13]:
test_ranks = get_expert_rankings('7',SCORING_TYPE)
print(test_ranks[:3])

[{'player_name': 'David Johnson', 'expert_id': '7', 'position': 'RB', 'rank': 1, 'team': 'ARI'}, {'player_name': "Le'Veon Bell", 'expert_id': '7', 'position': 'RB', 'rank': 2, 'team': 'PIT'}, {'player_name': 'Antonio Brown', 'expert_id': '7', 'position': 'WR', 'rank': 3, 'team': 'PIT'}]


In [14]:
from tqdm import tqdm_notebook

In [15]:
all_ranks = []
for expert_id in tqdm_notebook(expert_ids):
 all_ranks += get_expert_rankings(expert_id, SCORING_TYPE)




In [16]:
rankings_df = pd.DataFrame(all_ranks)
print(rankings_df.head())
print(len(rankings_df))

 expert_id player_name position rank team
0 3 David Johnson RB 1 ARI
1 3 Le'Veon Bell RB 2 PIT
2 3 Antonio Brown WR 3 PIT
3 3 Odell Beckham Jr. WR 4 NYG
4 3 Mike Evans WR 5 TB
21793


Across the 87 expert rankings, we've managed to put together a dataset of 21,793 player/expert/rank observations.

In [18]:
expert_rankings_file = 'data/fp_rankings_{}_{}.tsv'.format(SCORING_TYPE,TODAYS_DATE)
rankings_df.to_csv(expert_rankings_file, sep='\t', index=False)