From 6deb0410d3f4c663ca5dcd819ef152b470189b7f Mon Sep 17 00:00:00 2001 From: thebigpiece <124386347+LukeFabric@users.noreply.github.com> Date: Sun, 16 Jun 2024 19:59:53 +0000 Subject: [PATCH] Added bat tracking and fixed Outs Above Average Leaderboard link (#430) --- pybaseball/__init__.py | 3 ++- pybaseball/statcast_batter.py | 16 ++++++++++++++++ pybaseball/statcast_fielding.py | 2 +- pybaseball/statcast_pitcher.py | 15 +++++++++++++++ .../pybaseball/test_statcast_batter.py | 13 ++++++++++++- .../pybaseball/test_statcast_fielding.py | 4 ++-- .../pybaseball/test_statcast_pitcher.py | 11 ++++++++++- 7 files changed, 58 insertions(+), 6 deletions(-) diff --git a/pybaseball/__init__.py b/pybaseball/__init__.py index 38e235d9..cc223855 100644 --- a/pybaseball/__init__.py +++ b/pybaseball/__init__.py @@ -20,7 +20,8 @@ statcast_batter_exitvelo_barrels, statcast_batter_expected_stats, statcast_batter_percentile_ranks, - statcast_batter_pitch_arsenal + statcast_batter_pitch_arsenal, + statcast_batter_bat_tracking ) from .statcast_running import statcast_sprint_speed, statcast_running_splits from .statcast_fielding import ( diff --git a/pybaseball/statcast_batter.py b/pybaseball/statcast_batter.py index 942e628a..a6cdb24c 100644 --- a/pybaseball/statcast_batter.py +++ b/pybaseball/statcast_batter.py @@ -92,3 +92,19 @@ def statcast_batter_pitch_arsenal(year: int, minPA: int = 25) -> pd.DataFrame: data = pd.read_csv(io.StringIO(res.decode('utf-8'))) data = sanitize_statcast_columns(data) return data +@cache.df_cache() +def statcast_batter_bat_tracking(year: int, minSwings: Union[int,str] = "q" ) -> pd.DataFrame: + """ + Retrieves a player's bat tracking data for a given year. + + ARGUMENTS + year: The year for which you which to retrieve the bat tracking data. Format: YYYY. + minSwings: The minimum number of competitive swings for wach player. If a player falls below this threshold, + they will be excluded from the results. If no value is specified, the default number of competitive swings + is qualified. + """ + url = f"https://baseballsavant.mlb.com/leaderboard/bat-tracking?attackZone=&batSide=&contactType=&count=&dateStart={year}-01-01&dateEnd={year}-12-31&gameType=&isHardHit=&minSwings={minSwings}&minGroupSwings=1&pitchHand=&pitchType=&seasonStart=&seasonEnd=&team=&type=batter&csv=true" + res = requests.get(url, timeout=None).content + data = pd.read_csv(io.StringIO(res.decode('utf-8'))) + data = sanitize_statcast_columns(data) + return data diff --git a/pybaseball/statcast_fielding.py b/pybaseball/statcast_fielding.py index f1945a5f..9eb13487 100644 --- a/pybaseball/statcast_fielding.py +++ b/pybaseball/statcast_fielding.py @@ -30,7 +30,7 @@ def statcast_outs_above_average(year: int, pos: Union[int, str], min_att: Union[ # catcher is not included in this leaderboard if pos == "2": raise ValueError("This particular leaderboard does not include catchers!") - url = f"https://baseballsavant.mlb.com/leaderboard/outs_above_average?type={view}&year={year}&team=&range=year&min={min_att}&pos={pos}&roles=&viz=show&csv=true" + url = f"https://baseballsavant.mlb.com/leaderboard/outs_above_average?type={view}&startYear={year}&endYear={year}&split=no&team=&range=year&min={min_att}&pos={pos}&roles=&viz=hide&csv=true" res = requests.get(url, timeout=None).content data = pd.read_csv(io.StringIO(res.decode('utf-8'))) data = sanitize_statcast_columns(data) diff --git a/pybaseball/statcast_pitcher.py b/pybaseball/statcast_pitcher.py index 044bdabd..040fe49d 100644 --- a/pybaseball/statcast_pitcher.py +++ b/pybaseball/statcast_pitcher.py @@ -201,3 +201,18 @@ def statcast_pitcher_spin_dir_comp(year: int, pitch_a: str = "FF", pitch_b: str data = pd.read_csv(io.StringIO(res.decode('utf-8'))) data = sanitize_statcast_columns(data) return data +@cache.df_cache() +def statcast_pitcher_bat_tracking(year: int, minSwings: Union[int,str] = "q") -> pd.DataFrame: + """ + Retrieves the bat tracking data against for pitchers. + + ARGUMENTS + year: The year for which you wish to retreive bat tracking data. Format: YYYY + minSwings: The minimum number of swings batters have taken against a pitcher. If a pitcher falls + below the threshold, they will be excluded from the results. The default value is qualified. + """ + url = f"https://baseballsavant.mlb.com/leaderboard/bat-tracking?attackZone=&batSide=&contactType=&count=&dateStart={year}-01-01&dateEnd={year}-12-31&gameType=&isHardHit=&minSwings={minSwings}&minGroupSwings=1&pitchHand=&pitchType=&seasonStart=&seasonEnd=&team=&type=pitcher&csv=true" + res = requests.get(url, timeout=None).content + data = pd.read_csv(io.StringIO(res.decode('utf-8'))) + data = sanitize_statcast_columns(data) + return data diff --git a/tests/integration/pybaseball/test_statcast_batter.py b/tests/integration/pybaseball/test_statcast_batter.py index 58898066..f5be9c60 100644 --- a/tests/integration/pybaseball/test_statcast_batter.py +++ b/tests/integration/pybaseball/test_statcast_batter.py @@ -7,7 +7,8 @@ statcast_batter_exitvelo_barrels, statcast_batter_expected_stats, statcast_batter_percentile_ranks, - statcast_batter_pitch_arsenal + statcast_batter_pitch_arsenal, + statcast_batter_bat_tracking ) @@ -62,3 +63,13 @@ def test_statcast_batter_pitch_arsenal() -> None: assert len(result.columns) == 21 assert len(result) > 0 assert len(result[result['pa'] < min_pa]) == 0 +def test_statcast_batter_bat_tracking() -> None: + min_pa = 25 + result: pd.DataFrame = statcast_batter_bat_tracking(2024, min_pa) + + assert result is not None + assert not result.empty + + assert len(result.columns) == 18 + assert len(result) > 0 + assert len(result[result['swings_competitive'] < min_pa]) == 0 diff --git a/tests/integration/pybaseball/test_statcast_fielding.py b/tests/integration/pybaseball/test_statcast_fielding.py index eaf675e0..3041d3ff 100644 --- a/tests/integration/pybaseball/test_statcast_fielding.py +++ b/tests/integration/pybaseball/test_statcast_fielding.py @@ -18,7 +18,7 @@ def test_statcast_outs_above_average() -> None: assert result is not None assert not result.empty - assert len(result.columns) == 17 + assert len(result.columns) == 16 assert len(result) > 0 def test_statcast_outs_above_average_view() -> None: @@ -101,4 +101,4 @@ def test_statcast_fielding_run_value() -> None: assert len(result) > 0 #test_statcast_outs_above_average_view() -test_statcast_outs_above_average() \ No newline at end of file +test_statcast_outs_above_average() diff --git a/tests/integration/pybaseball/test_statcast_pitcher.py b/tests/integration/pybaseball/test_statcast_pitcher.py index 8b74d213..e8a06721 100644 --- a/tests/integration/pybaseball/test_statcast_pitcher.py +++ b/tests/integration/pybaseball/test_statcast_pitcher.py @@ -11,7 +11,8 @@ statcast_pitcher_percentile_ranks, statcast_pitcher_pitch_arsenal, statcast_pitcher_pitch_movement, - statcast_pitcher_spin_dir_comp + statcast_pitcher_spin_dir_comp, + statcast_pitcher_bat_tracking ) @@ -105,3 +106,11 @@ def test_statcast_pitcher_spin_dir_comp() -> None: assert len(result.columns) == 30 assert len(result) > 100 +def test_statcast_pitcher_bat_tracking() -> None: + result: pd.DataFrame = statcast_pitcher_bat_tracking(2024) + + assert result is not None + assert not result.empty + + assert len(result.columns) == 18 + assert len(result) > 0