### European Soccer Leagues, Interactive Standings Visualization

Love and Live the Game!

1. Scrape the current table standings of each league (La Liga, Bundesliga, Premier League, Serie A, Ligue 1).
2. Visualize the standings of each league in various interactive plots (bubble_2d, bubble_3d, boxplot, density).

> To reproduce the plots you need an `api_key` to sign in to [Plotly](https://plot.ly/settings/api).


In [1]:
%%bash
whoami
date

Aziz
Wed Dec  2 19:10:12 EST 2015


<hr>
Standings labels

```
P: Games Played
W: Games Won
D: Games Drawn
L: Games Lost
GS: Goals Scored
GA: Goals Against
Diff: Goals Difference
Pts: Points
```

### **Preview tables (sample) data**

In [22]:
for l, df in leagues.items():
    print(l)
    print(df.head())
    print

Premier League
        P  W  D  L  GS  GA  Diff  Pts
Team                                 
1-MCI  14  9  2  3  30  14    16   29
2-LEI  14  8  5  1  29  21     8   29
3-MUN  14  8  4  2  20  10    10   28
4-ARS  14  8  3  3  24  12    12   27
5-TOT  14  6  7  1  24  11    13   25

Bundesliga
        P   W  D  L  GS  GA  Diff  Pts
Team                                  
1-BAY  14  13  1  0  42   5    37   40
2-BVB  14  10  2  2  40  19    21   32
3-WOB  14   7  4  3  23  15     8   25
4-BMG  14   7  2  5  28  22     6   23
5-HER  14   7  2  5  18  17     1   23

Ligue 1
        P   W  D  L  GS  GA  Diff  Pts
Team                                  
1-PSG  16  13  3  0  37   8    29   42
2-CAE  16   9  2  5  19  16     3   29
3-ANG  16   7  6  3  14   9     5   27
4-LYO  16   7  5  4  21  14     7   26
5-NIC  16   7  4  5  30  19    11   25

Serie A
        P  W  D  L  GS  GA  Diff  Pts
Team                                 
1-NAP  14  9  4  1  26   9    17   31
2-INT  14  9  3  2  17   9   

### Standings in an interactive `bubble_2d` plot

In [23]:
py.iplot_mpl(figs_2d[0])

In [24]:
py.iplot_mpl(figs_2d[1])

In [25]:
py.iplot_mpl(figs_2d[2])

In [26]:
py.iplot_mpl(figs_2d[3])

In [27]:
py.iplot_mpl(figs_2d[4])

### Standings in an interactive `bubble_3d` plot

In [28]:
py.iplot(figs_3d[0])

In [29]:
py.iplot(figs_3d[1])

In [30]:
py.iplot(figs_3d[2])

In [31]:
py.iplot(figs_3d[3])

In [32]:
py.iplot(figs_3d[4])

### League in an interactive `boxplot` plot

In [33]:
py.iplot(figs_box[0])

In [34]:
py.iplot(figs_box[1])

In [35]:
py.iplot(figs_box[2])

In [36]:
py.iplot(figs_box[3])

In [37]:
py.iplot(figs_box[4])

### League in an interactive `density` plot

In [43]:
py.iplot_mpl(figs_kde[0])

In [39]:
py.iplot_mpl(figs_kde[1])

In [40]:
py.iplot_mpl(figs_kde[2])

In [41]:
py.iplot_mpl(figs_kde[3])

In [42]:
py.iplot_mpl(figs_kde[4])

<hr>
# How?

## 1. Scrape the leagues standings data into DataFrames

In [2]:
from bs4 import BeautifulSoup as bs
import requests
import pandas as pd

In [3]:
urls = {
    'La Liga'       : 'http://www.goal.com/en/tables/primera-divisi%C3%B3n/7',
    'Bundesliga'    :'http://www.goal.com/en/tables/bundesliga/9?ICID=SP_TN_112',
    'Premier League':'http://www.goal.com/en/tables/premier-league/8?ICID=TA',
    'Serie A'       :'http://www.goal.com/en/tables/serie-a/13?ICID=SP_TN_114',
    'Ligue 1'       :'http://www.goal.com/en/tables/ligue-1/16?ICID=SP_TN_114',
}

In [4]:
def scrape_table(url):
    '''input: league url, return: a list of teams' standings list '''
    
    data = requests.get(url).text
    so = bs(data)
    table = so.find('table', class_='short')
    standings = table.findChild('tbody')
    teams_html = standings.findAll('tr')
    
    teams = []
    for i, team in enumerate(teams_html):
        t = []
        for d in team.findChildren('td'):
            data = str(d.text.strip().encode('ascii', 'ignore'))
            # aggregate a team standings
            t.append(data)
        # remove empty string from the standings list
        t = [x for x in t if x]
        # add team standings into a list
        teams.append(t)

    return teams

In [5]:
def to_df(teams):
    """create dataframe from the teams' standings lists"""

    cols = ['pos','full_name', 'Team', 'PtsF', 'P', 'W', 'D', 'L', 'WH','DH', 'LH', 'WA','DA','LA', 'GS', 'GA', 'Diff', 'Pts']
    df = pd.DataFrame(columns=cols)

    for i, team in enumerate(teams):
        df.loc[i] = team
        
    return df

In [6]:
def remove_cols(df):
    # remove un-needed cols
    useless = ['pos', 'full_name', 'PtsF', 'WH', 'WA', 'DH', 'DA', 'LH', 'LA'] #, 'diff']
    for u in useless:
        del df['{}'.format(u)]

def apply_int(df):
    # convert cols type from str to int (for plotting)
    for c in df.columns:
        df[c] = df[c].apply(int)
    return df

In [7]:
def league_df(url):
    """return {league : dataframe_table}"""
    
    teams = scrape_table(url)

    df = to_df(teams)

    # concate 'position' and 'team'
    df['Team'] = ['{}-{}'.format(p, t) for p, t in zip(df['pos'], df['Team'])]

    # remove un-usefull columns
    remove_cols(df)

    # set team name as the df index
    df = df.set_index('Team')

    # set columns to int values
    df = apply_int(df)
    
    return df

#### Collect data in a dict as `{league : its_table_data_frame}`

In [8]:
leagues = {}
for league, url in urls.items():
    df = league_df(url)
    leagues[league] = df

In [9]:
print(leagues.keys())

['Premier League', 'Bundesliga', 'Ligue 1', 'Serie A', 'La Liga']


# 2. Generate Interactive Plots of league tables (using Plotly)

In [10]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib
matplotlib.style.use('ggplot')

In [11]:
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
py.sign_in('username', 'api_key')

## Bubble_2D

In [12]:
# ref: https://plot.ly/python/matplotlib-to-plotly-tutorial/#Bubble-Charts

def bubble_2d(df, league='Soccer League'):
    
    mpl_fig = plt.figure()         # (!) set new mpl figure object
    ax = mpl_fig.add_subplot(111)  # add axis

    plt.xlabel('Points')
    plt.ylabel('Goals Scored')
    plt.title(league)

    scatter = ax.scatter(
        df['Pts'],
        df['GS'],
        c=df['GS'],        # using some color scale
        s=np.sqrt(df['Pts']**5),
        linewidths=2,
        edgecolor='w',
        alpha=0.6
    )

    for i_X, X in df.iterrows():
        plt.text(
            X['Pts'],
            X['GS'],
            i_X, # team name
            size=8,
            horizontalalignment='center'
        )
    return mpl_fig

# # Test
# fig = bubble_2d(df, league)
# py.iplot_mpl(fig)

#### Collect leagues' bubble_2d figures

In [19]:
figs_2d = []
for l, d in leagues.items():
    fig = bubble_2d(d, l)
    figs_2d.append(fig)

## bubble_3D

In [14]:
# https://plot.ly/~jorgesantos/402/cufflinks-bubble-3d-chart/

def bubble_3d(df, league='Soccer League'):
    
    traces = []

    for row in df.iterrows():
        
        team, score = row
        
        trace = go.Scatter3d(
                x= score.GA,
                y= score.GS,
                z= score.Pts,
            
             marker= go.Marker(
                    line=go.Line(
                        width=0.5
                    ),
                    size= score.Pts * 1.5, # [bubble size],
                    symbol='dot'
                ),
                opacity=0.7,
                mode='markers',
                name=team,
                text= team, # [team names]
            )
        # add team's Scatter3d trace to list of Data
        traces.append(trace)

    data = go.Data(traces)

    layout = go.Layout(
        scene=go.Scene(
            xaxis=go.XAxis(
                title='Goals Against (x)',
            ),
            yaxis=go.YAxis(
                title='Goals Scored (y)',
            ),
            zaxis=go.ZAxis(
                title='Points (z)'
            ),
        ),
        title=league
    )

    fig = go.Figure(data=data, layout=layout)
    return fig

#### Collect leagues' bubble_3d figures

In [15]:
figs_3d = []
for l, d in leagues.items():
    fig = bubble_3d(d, l)
    figs_3d.append(fig)

## Boxplot

In [16]:
# ref: https://plot.ly/python/box-plots/

def boxplot(df, league='Soccer League'):

    traces = []
    for c in [a for a in df.columns if a is not 'P']:
        trace = go.Box(
            y = df[c].values,
            name = c,
        )
        traces.append(trace)
    data = go.Data(traces)
    layout = go.Layout(
        title=league
    )
    fig = go.Figure(data=data, layout=layout)
    return fig

# # TEST
# fig = boxplot(df, 'La Liga')
# py.iplot(fig)

#### Collect leagues' boxplot figures

In [17]:
figs_box = []
for l, d in leagues.items():
    fig = boxplot(d, l)
    figs_box.append(fig)

## Densities

In [18]:
def density(df, league):
    fig, ax = plt.subplots()
    cols = [c for c in df.columns if c is not 'P']
    df = df[cols]
    df.plot(kind='kde', ax=ax, title=league)
    return fig

# # Test
# fig = density(df, league)
# py.iplot_mpl(fig)

#### Collect leagues' density figures

In [21]:
figs_kde = []
for l, d in leagues.items():
    fig = density(d, l)
    figs_kde.append(fig)