diff --git a/README.md b/README.md index 05b9eb1..8ce922c 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,9 @@ If I were using Windows, it might look like this: Set these aside for now and we'll pick them up in chapter 2. ## Changelog +### v0.2.0 (2024-06-18) +Cleaned up plotting chapter to avoid misc Seaborn warnings. + ### v0.1.1 (2024-04-22) Fix Pandas `mean` example. More: Pandas changed their defaults to throw an error if you try to call this on string columns. Fixed example to explicitely diff --git a/code/06_01_summary.py b/code/06_01_summary.py index bd14407..762a1c3 100644 --- a/code/06_01_summary.py +++ b/code/06_01_summary.py @@ -6,7 +6,6 @@ from os import path import matplotlib.image as mpimg -pd.options.mode.chained_assignment = None %matplotlib qt # change this to the directory where the csv files that come with the book are @@ -16,11 +15,6 @@ DATA_DIR = './data' FIG_DIR = './figures' -dftm = dfpm.groupby(['match_id', 'team']).agg( - total_shot=('shot', 'sum'), - total_pass=('pass', 'sum'), - total_goal=('goal', 'sum')) - ############### # distributions ############### @@ -34,7 +28,7 @@ # processing dfpm = pd.merge(dfpm, dfm[['match_id', 'home_team', 'away_team']], how='left') -dfpm['opp'] = np.nan +dfpm['opp'] = pd.NA dfpm.loc[dfpm['team'] == dfpm['home_team'], 'opp'] = dfpm['away_team'] dfpm.loc[dfpm['team'] == dfpm['away_team'], 'opp'] = dfpm['home_team'] @@ -60,35 +54,31 @@ ########## # basic displot - all on one line -g = (sns.FacetGrid(dfpm).map(sns.kdeplot, 'pass', shade=True)) +g = (sns.FacetGrid(dfpm).map(sns.kdeplot, 'pass', fill=True)) g.set(xlim=(-5, 120)) -plt.show() # on seperate lines so it's clearer it's a two step process g = (sns.FacetGrid(dfpm) - .map(sns.kdeplot, 'dist', shade=True)) + .map(sns.kdeplot, 'dist', fill=True)) # hue g = (sns.FacetGrid(dfpm, hue='pos') - .map(sns.kdeplot, 'pass', shade=True) + .map(sns.kdeplot, 'pass', fill=True) .add_legend() .set(xlim=(-5, 120))) -plt.show() # add col g = (sns.FacetGrid(dfpm, hue='pos', col='side') - .map(sns.kdeplot, 'pass', shade=True) + .map(sns.kdeplot, 'pass', fill=True) .add_legend() .set(xlim=(-5, 120))) -plt.show() # add col order g = (sns.FacetGrid(dfpm, hue='pos', col='side', col_order=['left', 'central', 'right']) - .map(sns.kdeplot, 'pass', shade=True) + .map(sns.kdeplot, 'pass', fill=True) .add_legend() .set(xlim=(-5, 160))) -plt.show() # rows dfpm.loc[dfpm['pos'] == 'GKP', 'side'] = 'central' @@ -96,7 +86,7 @@ col_order=['left', 'central', 'right'], row_order=['FWD', 'MID', 'DEF', 'GKP'], ) - .map(sns.kdeplot, 'pass', shade=True) + .map(sns.kdeplot, 'pass', fill=True) .add_legend() .set(xlim=(-5, 160))) @@ -116,7 +106,7 @@ .sort_values('date').head()) def home_away_score_df(df, location): - df = df[['match_id', 'date', f'{location}_team', f'{location}_score']] + df = df[['match_id', 'date', f'{location}_team', f'{location}_score']].copy() df.columns = ['match_id', 'date', 'team', 'score'] df['location'] = location return df @@ -129,9 +119,8 @@ def home_away_score_df(df, location): # now can plot points by scoring system and position g = (sns.FacetGrid(score_long, hue='location') - .map(sns.kdeplot, 'score', shade=True)) + .map(sns.kdeplot, 'score', fill=True)) g.add_legend() -plt.show() ################################# # relationships between variables @@ -141,7 +130,7 @@ def home_away_score_df(df, location): g = sns.relplot(x='weight', y='height', data=dfp) # with hue -g = sns.relplot(x='jweight', y='jheight', hue='pos', data=dfp) +g = sns.relplot(x='weight', y='height', hue='pos', data=dfp) # adding jitter import random @@ -161,15 +150,15 @@ def home_away_score_df(df, location): # and label_order arguments g = (sns.FacetGrid(dfp, hue='team', col='grouping', col_wrap=2, aspect=2, col_order=['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']) - .map(sns.kdeplot, 'weight', shade=True) + .map(sns.kdeplot, 'weight', fill=True) .add_legend()) # contour plots -# shade +# fill g = (sns.FacetGrid(dfp, col='pos', hue='pos', col_wrap=2, aspect=2) - .map(sns.kdeplot, 'weight', 'height', shade=True)) + .map(sns.kdeplot, 'weight', 'height', fill=True)) -# no shade +# no fill g = (sns.FacetGrid(dfp, col='pos', hue='pos', col_wrap=2, aspect=2) .map(sns.kdeplot, 'weight', 'height')) @@ -215,7 +204,6 @@ def home_away_score_df(df, location): g = sns.relplot(x='min_period', y='dist_m', kind='line', hue='foot', data=dfs.query("period in ('1H', '2H')"), row='period') -plt.show() ############## # plot options @@ -223,17 +211,15 @@ def home_away_score_df(df, location): # basic plot g = (sns.FacetGrid(dfpm, col='pos') - .map(sns.kdeplot, 'pass', shade=True)) - -plt.show() + .map(sns.kdeplot, 'pass', fill=True)) # wrap columns g = (sns.FacetGrid(dfpm, col='pos', col_wrap=2) - .map(sns.kdeplot, 'pass', shade=True)) + .map(sns.kdeplot, 'pass', fill=True)) # adding a title -g.fig.subplots_adjust(top=0.9) -g.fig.suptitle('Distribution of No of Passes by Position') +g.figure.subplots_adjust(top=0.9) +g.figure.suptitle('Distribution of No of Passes by Position') # modifying options g.set(xlim=(-5, 120)) @@ -243,75 +229,3 @@ def home_away_score_df(df, location): # saving g.savefig(path.join(FIG_DIR, 'no_passes_by_position.png')) - -############# -# shot charts -############# - -dfs[['name', 'dist', 'foot', 'goal', 'x1', 'y1']].head(5) - -dfs['x'] = dfs['x1']*120/100 -dfs['y'] = (100 - dfs['y1'])*75/100 - -# shot data -g = sns.relplot(data=dfs, x='x', y='y', kind='scatter') -g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) -g.despine(left=True, bottom=True) - -map_img = mpimg.imread('./fig/soccer_field.png') - -# scatter plot with field overlay -g = sns.relplot(data=dfs, x='x', y='y', kind='scatter') -g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) -g.despine(left=True, bottom=True) -for ax in g.fig.axes: - ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75]) - -plt.show() - -# how about a bit of jitter -dfs['xj'] = dfs['x'].apply(lambda x: x + random.gauss(0, 1)) -dfs['yj'] = dfs['y'].apply(lambda x: x + random.gauss(0, 1)) - -g = sns.relplot(data=dfs, x='xj', y='yj', kind='scatter') -for ax in g.fig.axes: - ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74]) -g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) -g.despine(left=True, bottom=True) -plt.show() - -# putting it in a function -def shot_chart(df, **kwargs): - g = sns.relplot(data=df, x='xj', y='yj', kind='scatter', **kwargs) - g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) - g.despine(left=True, bottom=True) - - for ax in g.fig.axes: - ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74]) - - return g - -shot_chart(dfs, hue='goal', style='goal') -shot_chart(dfs, col='foot', hue='foot') - -# and columns -shot_chart(dfs, hue='goal', style='goal', col='team', height=3, col_wrap=4, - s=10) - -# now let's try a contour plot -g = (sns.FacetGrid(dfs, col='foot', hue='foot') - .map(sns.kdeplot, 'x', 'y', alpha=0.5, shade=True) - .add_legend()) -g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) -g.despine(left=True, bottom=True) -for ax in g.fig.axes: - ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74]) - -# add goal row, turn shading off -g = (sns.FacetGrid(dfs, col='foot', hue='foot') - .map(sns.kdeplot, 'x', 'y', alpha=0.5) - .add_legend()) -g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) -g.despine(left=True, bottom=True) -for ax in g.fig.axes: - ax.imshow(map_img, zorder=0, extent=[0, 115, 0, 74]) diff --git a/code/06_02_shot_chart.py b/code/06_02_shot_chart.py index ef899cf..e8bb4de 100644 --- a/code/06_02_shot_chart.py +++ b/code/06_02_shot_chart.py @@ -1,12 +1,9 @@ import pandas as pd import random -from pandas import DataFrame import seaborn as sns -import numpy as np import matplotlib.pyplot as plt from os import path -pd.options.mode.chained_assignment = None %matplotlib qt DATA_DIR = './data' @@ -38,7 +35,7 @@ g.despine(left=True, bottom=True) import matplotlib.image as mpimg -map_img = mpimg.imread('./fig/soccer_field.png') +map_img = mpimg.imread('./data/soccer_field.png') # scatter plot with field overlay g = sns.relplot(data=dfs, x='x', y='y', kind='scatter', size=5) @@ -93,7 +90,7 @@ def add2_flexible(num1, num2, **kwargs): .map(sns.kdeplot, 'x', 'y', alpha=0.5)) g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) g.despine(left=True, bottom=True) -for ax in g.fig.axes: +for ax in g.figure.axes: ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75]) # turn shading off - by team @@ -101,5 +98,5 @@ def add2_flexible(num1, num2, **kwargs): .map(sns.kdeplot, 'x', 'y', alpha=0.5)) g.set(yticks=[], xticks=[], xlabel=None, ylabel=None) g.despine(left=True, bottom=True) -for ax in g.fig.axes: +for ax in g.figure.axes: ax.imshow(map_img, zorder=0, extent=[0, 120, 0, 75]) diff --git a/solutions-to-exercises/06_plotting_answers.py b/solutions-to-exercises/06_plotting_answers.py index d8130b6..57c9540 100644 --- a/solutions-to-exercises/06_plotting_answers.py +++ b/solutions-to-exercises/06_plotting_answers.py @@ -17,9 +17,9 @@ # 6.1a g = (sns.FacetGrid(dftm) - .map(sns.kdeplot, 'pass', shade=True)) -g.fig.subplots_adjust(top=0.9) -g.fig.suptitle('Distribution of Passes') + .map(sns.kdeplot, 'pass', fill=True)) +g.figure.subplots_adjust(top=0.9) +g.figure.suptitle('Distribution of Passes') g.savefig('./solutions-to-exercises/6-1a.png') # Now modify your plot to show the distribution of passes by whether the team @@ -28,32 +28,32 @@ # 6.1b g = (sns.FacetGrid(dftm, hue='win') - .map(sns.kdeplot, 'pass', shade=True)) -g.fig.subplots_adjust(top=0.9) -g.fig.suptitle('Distribution of Passes by Win/Loss B') + .map(sns.kdeplot, 'pass', fill=True)) +g.figure.subplots_adjust(top=0.9) +g.figure.suptitle('Distribution of Passes by Win/Loss B') g.savefig('./solutions-to-exercises/6-1b.png') # 6.1c g = (sns.FacetGrid(dftm, col='win') - .map(sns.kdeplot, 'pass', shade=True)) -g.fig.subplots_adjust(top=0.8) -g.fig.suptitle('Distribution of Passes by Win/Loss C') + .map(sns.kdeplot, 'pass', fill=True)) +g.figure.subplots_adjust(top=0.8) +g.figure.suptitle('Distribution of Passes by Win/Loss C') g.savefig('./solutions-to-exercises/6-1c.png') # 6.1d g = (sns.FacetGrid(dftm, col='win', hue='win') - .map(sns.kdeplot, 'pass', shade=True)) -g.fig.subplots_adjust(top=0.8) -g.fig.suptitle('Distribution of Passes by Win/Loss D') + .map(sns.kdeplot, 'pass', fill=True)) +g.figure.subplots_adjust(top=0.8) +g.figure.suptitle('Distribution of Passes by Win/Loss D') g.savefig('./solutions-to-exercises/6-1d.png') # 6.1e g = (sns.FacetGrid(dftm, col='team', col_wrap=6) - .map(sns.kdeplot, 'pass', shade=True)) -g.fig.subplots_adjust(top=0.9) -g.fig.suptitle('Distribution of Passes by Team') + .map(sns.kdeplot, 'pass', fill=True)) +g.figure.subplots_adjust(top=0.9) +g.figure.suptitle('Distribution of Passes by Team') g.savefig('./solutions-to-exercises/6-1e.png') ############################################################################### @@ -61,8 +61,8 @@ ############################################################################### # 6.2a g = sns.relplot(x='pass', y='pass_opp', data=dftm) -g.fig.subplots_adjust(top=0.9) -g.fig.suptitle('Passes vs Opponent Passes') +g.figure.subplots_adjust(top=0.9) +g.figure.suptitle('Passes vs Opponent Passes') g.savefig('./solutions-to-exercises/6-2a.png') # 6.2b