Skip to content

Commit

Permalink
fix: clean up seaborn chapter to avoid warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
nathanbraun committed Jun 18, 2024
1 parent f3943d2 commit 4e0eec1
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 127 deletions.
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ If I were using Windows, it might look like this:
Set these aside for now and we'll pick them up in chapter 2.

## Changelog
### v0.2.0 (2024-06-18)
Cleaned up plotting chapter to avoid misc Seaborn warnings.

### v0.1.1 (2024-04-22)
Fix Pandas `mean` example. More: Pandas changed their defaults to throw an
error if you try to call this on string columns. Fixed example to explicitely
Expand Down
122 changes: 18 additions & 104 deletions code/06_01_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from os import path
import matplotlib.image as mpimg

pd.options.mode.chained_assignment = None
%matplotlib qt

# change this to the directory where the csv files that come with the book are
Expand All @@ -16,11 +15,6 @@
DATA_DIR = './data'
FIG_DIR = './figures'

dftm = dfpm.groupby(['match_id', 'team']).agg(
total_shot=('shot', 'sum'),
total_pass=('pass', 'sum'),
total_goal=('goal', 'sum'))

###############
# distributions
###############
Expand All @@ -34,7 +28,7 @@

# processing
dfpm = pd.merge(dfpm, dfm[['match_id', 'home_team', 'away_team']], how='left')
dfpm['opp'] = np.nan
dfpm['opp'] = pd.NA
dfpm.loc[dfpm['team'] == dfpm['home_team'], 'opp'] = dfpm['away_team']
dfpm.loc[dfpm['team'] == dfpm['away_team'], 'opp'] = dfpm['home_team']

Expand All @@ -60,43 +54,39 @@
##########

# basic displot - all on one line
g = (sns.FacetGrid(dfpm).map(sns.kdeplot, 'pass', shade=True))
g = (sns.FacetGrid(dfpm).map(sns.kdeplot, 'pass', fill=True))
g.set(xlim=(-5, 120))
plt.show()

# on seperate lines so it's clearer it's a two step process
g = (sns.FacetGrid(dfpm)
.map(sns.kdeplot, 'dist', shade=True))
.map(sns.kdeplot, 'dist', fill=True))

# hue
g = (sns.FacetGrid(dfpm, hue='pos')
.map(sns.kdeplot, 'pass', shade=True)
.map(sns.kdeplot, 'pass', fill=True)
.add_legend()
.set(xlim=(-5, 120)))
plt.show()

# add col
g = (sns.FacetGrid(dfpm, hue='pos', col='side')
.map(sns.kdeplot, 'pass', shade=True)
.map(sns.kdeplot, 'pass', fill=True)
.add_legend()
.set(xlim=(-5, 120)))
plt.show()

# add col order
g = (sns.FacetGrid(dfpm, hue='pos', col='side', col_order=['left', 'central',
'right'])
.map(sns.kdeplot, 'pass', shade=True)
.map(sns.kdeplot, 'pass', fill=True)
.add_legend()
.set(xlim=(-5, 160)))
plt.show()

# rows
dfpm.loc[dfpm['pos'] == 'GKP', 'side'] = 'central'
g = (sns.FacetGrid(dfpm, hue='pos', col='side', row='pos',
col_order=['left', 'central', 'right'],
row_order=['FWD', 'MID', 'DEF', 'GKP'],
)
.map(sns.kdeplot, 'pass', shade=True)
.map(sns.kdeplot, 'pass', fill=True)
.add_legend()
.set(xlim=(-5, 160)))

Expand All @@ -116,7 +106,7 @@
.sort_values('date').head())

def home_away_score_df(df, location):
df = df[['match_id', 'date', f'{location}_team', f'{location}_score']]
df = df[['match_id', 'date', f'{location}_team', f'{location}_score']].copy()
df.columns = ['match_id', 'date', 'team', 'score']
df['location'] = location
return df
Expand All @@ -129,9 +119,8 @@ def home_away_score_df(df, location):

# now can plot points by scoring system and position
g = (sns.FacetGrid(score_long, hue='location')
.map(sns.kdeplot, 'score', shade=True))
.map(sns.kdeplot, 'score', fill=True))
g.add_legend()
plt.show()

#################################
# relationships between variables
Expand All @@ -141,7 +130,7 @@ def home_away_score_df(df, location):
g = sns.relplot(x='weight', y='height', data=dfp)

# with hue
g = sns.relplot(x='jweight', y='jheight', hue='pos', data=dfp)
g = sns.relplot(x='weight', y='height', hue='pos', data=dfp)

# adding jitter
import random
Expand All @@ -161,15 +150,15 @@ def home_away_score_df(df, location):
# and label_order arguments
g = (sns.FacetGrid(dfp, hue='team', col='grouping', col_wrap=2, aspect=2,
col_order=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H'])
.map(sns.kdeplot, 'weight', shade=True)
.map(sns.kdeplot, 'weight', fill=True)
.add_legend())

# contour plots
# shade
# fill
g = (sns.FacetGrid(dfp, col='pos', hue='pos', col_wrap=2, aspect=2)
.map(sns.kdeplot, 'weight', 'height', shade=True))
.map(sns.kdeplot, 'weight', 'height', fill=True))

# no shade
# no fill
g = (sns.FacetGrid(dfp, col='pos', hue='pos', col_wrap=2, aspect=2)
.map(sns.kdeplot, 'weight', 'height'))

Expand Down Expand Up @@ -215,25 +204,22 @@ def home_away_score_df(df, location):
g = sns.relplot(x='min_period', y='dist_m', kind='line', hue='foot',
data=dfs.query("period in ('1H', '2H')"),
row='period')
plt.show()

##############
# plot options
##############

# basic plot
g = (sns.FacetGrid(dfpm, col='pos')
.map(sns.kdeplot, 'pass', shade=True))

plt.show()
.map(sns.kdeplot, 'pass', fill=True))

# wrap columns
g = (sns.FacetGrid(dfpm, col='pos', col_wrap=2)
.map(sns.kdeplot, 'pass', shade=True))
.map(sns.kdeplot, 'pass', fill=True))

# adding a title
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Distribution of No of Passes by Position')
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle('Distribution of No of Passes by Position')

# modifying options
g.set(xlim=(-5, 120))
Expand All @@ -243,75 +229,3 @@ def home_away_score_df(df, location):

# saving
g.savefig(path.join(FIG_DIR, 'no_passes_by_position.png'))

#############
# shot charts
#############

dfs[['name', 'dist', 'foot', 'goal', 'x1', 'y1']].head(5)

dfs['x'] = dfs['x1']*120/100
dfs['y'] = (100 - dfs['y1'])*75/100

# shot data
g = sns.relplot(data=dfs, x='x', y='y', kind='scatter')
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)

map_img = mpimg.imread('./fig/soccer_field.png')

# scatter plot with field overlay
g = sns.relplot(data=dfs, x='x', y='y', kind='scatter')
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
for ax in g.fig.axes:
ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75])

plt.show()

# how about a bit of jitter
dfs['xj'] = dfs['x'].apply(lambda x: x + random.gauss(0, 1))
dfs['yj'] = dfs['y'].apply(lambda x: x + random.gauss(0, 1))

g = sns.relplot(data=dfs, x='xj', y='yj', kind='scatter')
for ax in g.fig.axes:
ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74])
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
plt.show()

# putting it in a function
def shot_chart(df, **kwargs):
g = sns.relplot(data=df, x='xj', y='yj', kind='scatter', **kwargs)
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)

for ax in g.fig.axes:
ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74])

return g

shot_chart(dfs, hue='goal', style='goal')
shot_chart(dfs, col='foot', hue='foot')

# and columns
shot_chart(dfs, hue='goal', style='goal', col='team', height=3, col_wrap=4,
s=10)

# now let's try a contour plot
g = (sns.FacetGrid(dfs, col='foot', hue='foot')
.map(sns.kdeplot, 'x', 'y', alpha=0.5, shade=True)
.add_legend())
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
for ax in g.fig.axes:
ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74])

# add goal row, turn shading off
g = (sns.FacetGrid(dfs, col='foot', hue='foot')
.map(sns.kdeplot, 'x', 'y', alpha=0.5)
.add_legend())
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
for ax in g.fig.axes:
ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74])
9 changes: 3 additions & 6 deletions code/06_02_shot_chart.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
import pandas as pd
import random
from pandas import DataFrame
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from os import path

pd.options.mode.chained_assignment = None
%matplotlib qt

DATA_DIR = './data'
Expand Down Expand Up @@ -38,7 +35,7 @@
g.despine(left=True, bottom=True)

import matplotlib.image as mpimg
map_img = mpimg.imread('./fig/soccer_field.png')
map_img = mpimg.imread('./data/soccer_field.png')

# scatter plot with field overlay
g = sns.relplot(data=dfs, x='x', y='y', kind='scatter', size=5)
Expand Down Expand Up @@ -93,13 +90,13 @@ def add2_flexible(num1, num2, **kwargs):
.map(sns.kdeplot, 'x', 'y', alpha=0.5))
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
for ax in g.fig.axes:
for ax in g.figure.axes:
ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75])

# turn shading off - by team
g = (sns.FacetGrid(dfs, col='team', col_wrap=4, height=2, hue='team')
.map(sns.kdeplot, 'x', 'y', alpha=0.5))
g.set(yticks=[], xticks=[], xlabel=None, ylabel=None)
g.despine(left=True, bottom=True)
for ax in g.fig.axes:
for ax in g.figure.axes:
ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75])
34 changes: 17 additions & 17 deletions solutions-to-exercises/06_plotting_answers.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@

# 6.1a
g = (sns.FacetGrid(dftm)
.map(sns.kdeplot, 'pass', shade=True))
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Distribution of Passes')
.map(sns.kdeplot, 'pass', fill=True))
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle('Distribution of Passes')
g.savefig('./solutions-to-exercises/6-1a.png')

# Now modify your plot to show the distribution of passes by whether the team
Expand All @@ -28,41 +28,41 @@

# 6.1b
g = (sns.FacetGrid(dftm, hue='win')
.map(sns.kdeplot, 'pass', shade=True))
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Distribution of Passes by Win/Loss B')
.map(sns.kdeplot, 'pass', fill=True))
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle('Distribution of Passes by Win/Loss B')
g.savefig('./solutions-to-exercises/6-1b.png')

# 6.1c
g = (sns.FacetGrid(dftm, col='win')
.map(sns.kdeplot, 'pass', shade=True))
g.fig.subplots_adjust(top=0.8)
g.fig.suptitle('Distribution of Passes by Win/Loss C')
.map(sns.kdeplot, 'pass', fill=True))
g.figure.subplots_adjust(top=0.8)
g.figure.suptitle('Distribution of Passes by Win/Loss C')
g.savefig('./solutions-to-exercises/6-1c.png')

# 6.1d

g = (sns.FacetGrid(dftm, col='win', hue='win')
.map(sns.kdeplot, 'pass', shade=True))
g.fig.subplots_adjust(top=0.8)
g.fig.suptitle('Distribution of Passes by Win/Loss D')
.map(sns.kdeplot, 'pass', fill=True))
g.figure.subplots_adjust(top=0.8)
g.figure.suptitle('Distribution of Passes by Win/Loss D')
g.savefig('./solutions-to-exercises/6-1d.png')


# 6.1e
g = (sns.FacetGrid(dftm, col='team', col_wrap=6)
.map(sns.kdeplot, 'pass', shade=True))
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Distribution of Passes by Team')
.map(sns.kdeplot, 'pass', fill=True))
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle('Distribution of Passes by Team')
g.savefig('./solutions-to-exercises/6-1e.png')

###############################################################################
# 6.2
###############################################################################
# 6.2a
g = sns.relplot(x='pass', y='pass_opp', data=dftm)
g.fig.subplots_adjust(top=0.9)
g.fig.suptitle('Passes vs Opponent Passes')
g.figure.subplots_adjust(top=0.9)
g.figure.suptitle('Passes vs Opponent Passes')
g.savefig('./solutions-to-exercises/6-2a.png')

# 6.2b
Expand Down

0 comments on commit 4e0eec1

Please sign in to comment.