# Soccer analysis example



[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/movingpandas/movingpandas-examples/main?filepath=2-analysis-examples/soccer-game.ipynb)
[![IPYNB](https://img.shields.io/badge/view-ipynb-hotpink)](https://github.com/movingpandas/movingpandas-examples/blob/main/2-analysis-examples/soccer-game.ipynb)
[![HTML](https://img.shields.io/badge/view-html-green)](https://movingpandas.github.io/movingpandas-website/2-analysis-examples/soccer-game.html)

This tutorial uses data extracted from video footage of a soccer game that was published in https://github.com/Friends-of-Tracking-Data-FoTD/Last-Row


In [None]:
import numpy as np
import pandas as pd
import geopandas as gpd
import movingpandas as mpd
import shapely as shp
import holoviews as hv
import hvplot.pandas 
import matplotlib.pyplot as plt

from geopandas import GeoDataFrame, read_file
from shapely.geometry import Point, LineString, Polygon
from datetime import datetime, timedelta
from holoviews import opts, dim
from os.path import exists
from urllib.request import urlretrieve

import warnings
warnings.filterwarnings('ignore')

hvplot_defaults = {'line_width':5, 'frame_height':350, 'frame_width':700, 'colorbar':True, 'tiles':None, 'geo':False,}

mpd.show_versions()

## Loading soccer dataset from Github


In [None]:
def get_file_from_url(url):
 file = url.split('/')[-1]
 if not exists(file):
 urlretrieve(url, file)
 return file 

def get_df_from_gh_url(url):
 file = get_file_from_url(url)
 return pd.read_csv(file)

In [None]:
input_file = "https://raw.githubusercontent.com/Friends-of-Tracking-Data-FoTD/Last-Row/master/datasets/positional_data/liverpool_2019.csv"
df = get_df_from_gh_url(input_file)
df.drop(columns=['Unnamed: 0'], inplace=True)
print(f'Number of records: {len(df)}')

In [None]:
df.head()

From the metadata: 

> * play: the scoreline after the goal. The team who scored the goal is the one next to the brackets.
> * frame: the frame number for the current location. Data provided has 20 frames per second.
> * player: the id of the player. The id is consistent within a play but not between plays.
> * player_num: the player jersey number. This number is the official one, and did not change for Liverpool in 2019. You can check the corresponding names at this wikipedia link.
> * x, y: coordinates for the player/ball. Pitch coordinates go from 0 to 100 on each axis.
> * dx, dx: change in (x,y) coordinates from last frame to current frame
> * z: height, from 0 to 1.5 (only filled for the ball)
> * bgcolor: the main color for the team (used as background color)
> * edgecolor the secondary color (used as edge color)

And accoring to https://en.wikipedia.org/wiki/Football_pitch 

> the preferred size for many professional teams' stadiums is 105 by 68 metres

In [None]:
plays = list(df.play.unique())

def to_timestamp(row):
 # plays to date
 day = plays.index(row.play)+1
 start_time = datetime(2019,1,day,12,0,0)
 # frames to time
 td = timedelta(milliseconds=1000/20*row.frame)
 return start_time + td

# frame: the frame number for the current location. Data provided has 20 frames per second
df['time'] = df.apply(to_timestamp, axis=1)
df.set_index('time', inplace=True)

# the preferred size for many professional teams' stadiums is 105 by 68 metres, accoring to https://en.wikipedia.org/wiki/Football_pitch
pitch_length = 105
pitch_width = 68
df.x = df.x / 100 * pitch_length 
df.y = df.y / 100 * pitch_width

df

In [None]:
df['team'].value_counts().plot(title='team', kind='bar', figsize=(15,3))

In [None]:
df['player_num'].value_counts().plot(title='player_num', kind='bar', figsize=(15,3))

In [None]:
df['team'] = df['team'].astype('category').cat.as_ordered()
df['player'] = df['player'].astype('category').cat.as_ordered()
df['player_num'] = df['player_num'].astype('category').cat.as_ordered()

Finally, let's create trajectories:

## Trajectories


In [None]:
%%time
CRS = None
tc = mpd.TrajectoryCollection(df, 'player', x='x', y='y', crs=CRS)
mpd.TemporalSplitter(tc).split(mode="day")
print(f"Finished creating {len(tc)} trajectories")

In [None]:
pitch = Polygon([(0, 0), (0, pitch_width), (pitch_length, pitch_width), (pitch_length, 0), (0, 0)])
plotted_pitch = GeoDataFrame(pd.DataFrame([{'geometry': pitch, 'id': 1}]), crs=CRS).hvplot(color='white', alpha=0.5)

In [None]:
plotted_pitch * tc.filter('player_num', 20).hvplot(**hvplot_defaults)

## Plays

In [None]:
PLAY = 2
title = f'Play {PLAY} {plays[PLAY]}'
play_trajs = tc.filter('play', plays[PLAY])
play_trajs

In [None]:
play_trajs.plot(column='team', colormap={'attack':'hotpink', 'defense':'turquoise'})

In [None]:
generalized = mpd.MinTimeDeltaGeneralizer(play_trajs).generalize(tolerance=timedelta(seconds=0.5))

In [None]:
generalized.add_speed()

In [None]:
generalized.hvplot(title=title, c='speed', hover_cols=['player', 'team'], **hvplot_defaults)

In [None]:
(
 plotted_pitch * 
 generalized.hvplot(title=title, c='speed', hover_cols=['player'], cmap='Viridis', **hvplot_defaults)
)

In [None]:
get_file_from_url('https://github.com/movingpandas/movingpandas/raw/main/tutorials/data/soccer_field.png')

pitch_img = hv.RGB.load_image('soccer_field.png', bounds=(0,0,pitch_length,pitch_width)) 
(
 pitch_img * 
 generalized.hvplot(title=title, c='team', colormap={'attack':'limegreen', 'defense':'purple'}, hover_cols=['team'], **hvplot_defaults) * 
 generalized.get_start_locations().hvplot(label='start', color='orange')
)

In [None]:
(
 pitch_img * 
 generalized.hvplot(title=title, c='team', hover_cols=['team'], **hvplot_defaults) * 
 generalized.get_start_locations().hvplot(label='start', c='team', hover_cols=['team'], colormap={'attack':'limegreen', 'defense':'purple'}, colorbar=True, legend=True)
)

## Continue exploring MovingPandas

1. [Bird migration analysis](bird-migration.ipynb)
1. [Ship data analysis](ship-data.ipynb)
1. [Horse collar data exploration](horse-collar.ipynb)
1. [OSM traces](osm-traces.ipynb)
1. [Soccer game](soccer-game.ipynb)
1. [Mars rover & heli](mars-rover.ipynb)