In [1]:
import pandas as pd
from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file
from bokeh.charts import Bar
import numpy as np
import bokeh.plotting as bk
bk.output_notebook()

In [2]:
# Columns Definition

k_columns = ("Div", "Date", "HomeTeam", "AwayTeam")

p_columns = k_columns + ("FTHG", "FTAG", "FTR", "HTHG", "HTAG", "HTR")

s_columns = k_columns + ("Attendance", "HS", "AS", "HST", "AST",  "HC", "AC", "HF", "AF", "HY", "AY", "HR", "AR")

In [3]:
d1_1516 = pd.read_csv("./data/D1_1516.csv")
sp1_1516 = pd.read_csv("./data/SP1_1516.csv")

In [4]:
d1_1516.ix[:, p_columns].head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR
0,D1,14/08/15,Bayern Munich,Hamburg,5,0,H,1,0,H
1,D1,15/08/15,Augsburg,Hertha,0,1,A,0,0,D
2,D1,15/08/15,Darmstadt,Hannover,2,2,D,1,0,H
3,D1,15/08/15,Dortmund,M'gladbach,4,0,H,3,0,H
4,D1,15/08/15,Leverkusen,Hoffenheim,2,1,H,1,1,D


In [5]:
d1_1516.ix[:, s_columns].head()

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,Attendance,HS,AS,HST,AST,HC,AC,HF,AF,HY,AY,HR,AR
0,D1,14/08/15,Bayern Munich,Hamburg,,23,5,9,1,7,0,10,12,2,2,0,0
1,D1,15/08/15,Augsburg,Hertha,,20,11,3,4,7,4,20,22,1,2,1,1
2,D1,15/08/15,Darmstadt,Hannover,,11,14,4,5,5,9,21,22,1,2,0,0
3,D1,15/08/15,Dortmund,M'gladbach,,17,5,7,1,3,5,13,14,0,1,0,0
4,D1,15/08/15,Leverkusen,Hoffenheim,,25,6,9,2,13,5,12,18,1,0,0,0


In [6]:
d1_1516.ix[:, p_columns].describe()

Unnamed: 0,FTHG,FTAG,HTHG,HTAG
count,306.0,306.0,306.0,306.0
mean,1.565359,1.264706,0.696078,0.54902
std,1.363689,1.130151,0.815295,0.676938
min,0.0,0.0,0.0,0.0
25%,1.0,0.0,0.0,0.0
50%,1.0,1.0,1.0,0.0
75%,2.0,2.0,1.0,1.0
max,6.0,5.0,4.0,3.0


In [7]:
d1_1516.ix[:, s_columns].describe()

Unnamed: 0,Attendance,HS,AS,HST,AST,HC,AC,HF,AF,HY,AY,HR,AR
count,0.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0,306.0
mean,,14.035948,11.598039,5.173203,4.333333,5.287582,4.218954,14.183007,14.862745,1.79085,2.003268,0.052288,0.075163
std,,5.65587,4.557667,2.831799,2.337155,3.042159,2.413303,4.053637,4.290376,1.234231,1.299428,0.222971,0.264087
min,,1.0,2.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,0.0,0.0
25%,,10.0,8.0,3.0,3.0,3.0,2.25,11.0,12.0,1.0,1.0,0.0,0.0
50%,,14.0,11.0,5.0,4.0,5.0,4.0,14.0,15.0,2.0,2.0,0.0,0.0
75%,,17.0,15.0,7.0,6.0,7.0,6.0,17.0,18.0,3.0,3.0,0.0,0.0
max,,36.0,24.0,14.0,12.0,18.0,13.0,26.0,29.0,6.0,6.0,1.0,1.0


In [8]:
def create_plot(title, hist, edges, fill_color="#00BCD4"):
    """Create a plot object.
    
    Keyword arguments:
    title -- A caption of plot.
    hist -- 
    edges -- 
"""
    plot = figure(title=title,tools="save", background_fill_color="#E8DDCB")
    plot.quad(top=hist, bottom=0, left=edges[:-1], right=edges[1:], fill_color=fill_color, line_color="#424242")
    
    return plot

def show_single_plot(plot):
    """Show a plot.
    
    Keyword arguments:
    plot -- plot object
    """
    show(gridplot(plot, ncols=1, plot_width=400, plot_height=400, toolbar_location=None))

In [9]:
# Creating plots
hist_home, edges_home = np.histogram(d1_1516["FTHG"], bins=7)
plot_d1_hg = create_plot("Bundesliga D1 15/16 Home Team Goal", hist_home, edges_home)

hist_away, edges_away = np.histogram(d1_1516["FTAG"], bins=6)
plot_d1_ag = create_plot("Bundesliga D115/16 Away Team Goal", hist_away, edges_away)

In [10]:
# Goals
show(gridplot(plot_d1_hg, plot_d1_ag, ncols=2, plot_width=400, plot_height=400))

In [11]:
p = Bar(d1_1516, "FTHG", values="FTHG", agg="count", title="Bundesliga D1 15/16 Game Results" , legend="")
show_single_plot(p)

# ポアソン分布

# $f(X;\lambda) = \frac{\lambda ^{X}}{X!}e^{-\lambda}$

In [12]:
# Poisson Distribution

samples = np.random.poisson(lam=1.5, size=10000)
hist_dummy, edges_dummy = np.histogram(samples, density=True, bins=max(samples))
p3 = create_plot("Poisson Distribution", hist_dummy, edges_dummy)
show(gridplot(p3, ncols=1, plot_width=400, plot_height=400, toolbar_location=None))

In [13]:
# Shots
hist_shot, edges_shot = np.histogram(d1_1516["HS"], bins=15)
plot_shot = create_plot("Bundesliga D1 15/16 Shots", hist_shot, edges_shot)
show_single_plot(plot_shot)

In [14]:
p = Bar(d1_1516, "FTR", values="FTR", agg="count", title="Bundesliga D1 15/16 Game Results")
show_single_plot(p)

In [15]:
# Creating plots
hist_home, edges_home = np.histogram(sp1_1516["FTHG"], bins=10)
plot_sp1_hg = create_plot("Liga Española D1 15/16 Home Team Goal", hist_home, edges_home, "#FF9800")

hist_away, edges_away = np.histogram(sp1_1516["FTAG"], bins=9)
plot_sp1_ag = create_plot("Liga Española D1 15/16 Away Team Goal", hist_away, edges_away, "#FF9800")

show(gridplot(plot_d1_hg, plot_d1_ag, plot_sp1_hg, plot_sp1_ag , ncols=2, plot_width=400, plot_height=400))

In [16]:
sp1_1516.describe()

Unnamed: 0,FTHG,FTAG,HTHG,HTAG,HS,AS,HST,AST,HF,AF,...,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
count,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0,...,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0,380.0
mean,1.618421,1.126316,0.742105,0.505263,13.236842,10.526316,4.863158,3.757895,13.602632,13.594737,...,2.095368,27.331579,-0.403289,1.994,1.935158,1.994263,1.936579,3.108974,4.817711,6.026421
std,1.450749,1.148518,0.899613,0.70988,4.784177,4.448099,2.670509,2.107736,4.290567,4.346955,...,0.746049,2.341673,1.033264,0.163487,0.152572,0.161801,0.150245,3.567728,3.080678,6.71219
min,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,4.0,2.0,...,1.45,21.0,-3.5,1.63,1.59,1.62,1.58,1.05,2.99,1.09
25%,1.0,0.0,0.0,0.0,10.0,7.0,3.0,2.0,11.0,11.0,...,1.66,26.0,-1.0,1.88,1.8375,1.88,1.83,1.5975,3.4175,2.64
50%,1.0,1.0,1.0,0.0,13.0,10.0,4.5,4.0,13.0,13.0,...,1.82,28.0,-0.25,1.98,1.92,1.99,1.93,2.12,3.69,3.85
75%,2.0,2.0,1.0,1.0,16.0,13.0,6.0,5.0,16.0,16.0,...,2.125,29.0,0.0625,2.09,2.02,2.08,2.02,2.915,4.525,6.39
max,10.0,8.0,5.0,4.0,31.0,28.0,15.0,11.0,29.0,27.0,...,6.22,33.0,2.75,2.55,2.44,2.55,2.42,35.5,24.5,46.0


In [17]:
threshold = 6
sp1_1516[(sp1_1516.FTHG > threshold) | (sp1_1516.FTAG > threshold)].ix[:, p_columns]

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR
157,SP1,20/12/15,Real Madrid,Vallecano,10,2,H,4,2,H
272,SP1,05/03/16,Real Madrid,Celta,7,1,H,1,0,H
333,SP1,20/04/16,La Coruna,Barcelona,0,8,A,0,2,A


In [18]:
sp1_1516[(sp1_1516.HomeTeam == "Barcelona")].ix[:, p_columns].describe()

Unnamed: 0,FTHG,FTAG,HTHG,HTAG
count,19.0,19.0,19.0,19.0
mean,3.526316,0.736842,1.210526,0.368421
std,1.806421,0.805682,1.031662,0.597265
min,1.0,0.0,0.0,0.0
25%,2.0,0.0,0.5,0.0
50%,4.0,1.0,1.0,0.0
75%,5.0,1.0,2.0,1.0
max,6.0,2.0,4.0,2.0


In [19]:
sp1_1516.groupby("HomeTeam").sum().ix[:, ("FTHG", )].sort_values(by="FTHG", ascending=False).head()

Unnamed: 0_level_0,FTHG
HomeTeam,Unnamed: 1_level_1
Real Madrid,70
Barcelona,67
Sevilla,38
Ath Bilbao,35
Ath Madrid,33


In [20]:
sp1_1516.groupby("AwayTeam").sum().ix[:, ("FTAG", )].sort_values(by="FTAG", ascending=False).head()

Unnamed: 0_level_0,FTAG
AwayTeam,Unnamed: 1_level_1
Barcelona,45
Real Madrid,40
Ath Madrid,30
Ath Bilbao,23
Vallecano,23


In [21]:
# ranking = sp1_1516.groupby("HomeTeam").sum().ix[:, ("FTHG",)].sort_values(by="FTHG", ascending=False).index

In [22]:
p = Bar(sp1_1516, label="HomeTeam", values="FTHG", agg="sum",
        title="Liga Española D1 15/16 HomeTeam Goals", height=400,  plot_width=900, legend="")

show(p)