From 19bba33206eaba9dd75d3cf2986039252979cb5b Mon Sep 17 00:00:00 2001 From: fstp Date: Tue, 27 Aug 2024 14:46:30 +0200 Subject: [PATCH 1/3] Fix Pandas 2.1.0 warning in data_portal.py When running zipline-reloaded with latest Pandas version the following deprecation warning is shown: "DataFrame.fillna with 'method' is deprecated" Simply replacing fillna() with the ffill() method directly works and removes the warning. --- src/zipline/data/data_portal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zipline/data/data_portal.py b/src/zipline/data/data_portal.py index d637195b57..6a3fd68751 100644 --- a/src/zipline/data/data_portal.py +++ b/src/zipline/data/data_portal.py @@ -948,7 +948,7 @@ def get_history_window( df.iloc[0, assets_with_leading_nan] = np.array( initial_values, dtype=np.float64 ) - df.fillna(method="ffill", inplace=True) + df.ffill(inplace=True) # forward-filling will incorrectly produce values after the end of # an asset's lifetime, so write NaNs back over the asset's From 3b01960756797ae148f216157d602daaae799308 Mon Sep 17 00:00:00 2001 From: fstp Date: Tue, 27 Aug 2024 15:46:24 +0200 Subject: [PATCH 2/3] Use new implementation of stack from Pandas 2.1.0 In preparation for Pandas 3.0, a new and improved implementation of the stack() method was introduced. There is a feature flag, future_stack, which, when set to True, enables the new implementation to be used. If the flag is not set, a warning will be emitted. More information about the implementation can be found here: https://pandas.pydata.org/docs/whatsnew/v2.1.0.html Crucially, the behavior regarding NA values has been improved. The new implementation will not create unnecessary NA values, but instead, it will preserve them from the input without the need to use dropna=False. This change avoids the potential issue of generating unnecessary NA values from multiple levels of stacking. --- src/zipline/_protocol.pyx | 2 +- src/zipline/pipeline/loaders/earnings_estimates.py | 2 +- tests/pipeline/test_international_markets.py | 2 +- tests/pipeline/test_quarters_estimates.py | 8 ++++---- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/zipline/_protocol.pyx b/src/zipline/_protocol.pyx index d3aef4919b..dceb11b293 100644 --- a/src/zipline/_protocol.pyx +++ b/src/zipline/_protocol.pyx @@ -659,7 +659,7 @@ cdef class BarData: df = (pd.concat(df_dict, keys=df_dict.keys(), names=['fields', dt_label]) - .stack(dropna=False) # ensure we return all fields/assets/dates despite missing values + .stack(future_stack=True) # ensure we return all fields/assets/dates despite missing values .unstack(level='fields')) df.index.set_names([dt_label, 'asset']) return df.sort_index() diff --git a/src/zipline/pipeline/loaders/earnings_estimates.py b/src/zipline/pipeline/loaders/earnings_estimates.py index 3e815735b4..093f91f52e 100644 --- a/src/zipline/pipeline/loaders/earnings_estimates.py +++ b/src/zipline/pipeline/loaders/earnings_estimates.py @@ -702,7 +702,7 @@ def get_last_data_per_qtr( ffill_across_cols(last_per_qtr, columns, self.name_map) # Stack quarter and sid into the index. stacked_last_per_qtr = last_per_qtr.stack( - [SID_FIELD_NAME, NORMALIZED_QUARTERS], + [SID_FIELD_NAME, NORMALIZED_QUARTERS], future_stack=True, ) # Set date index name for ease of reference stacked_last_per_qtr.index.set_names( diff --git a/tests/pipeline/test_international_markets.py b/tests/pipeline/test_international_markets.py index 54133e75d9..3e6af0bc0f 100644 --- a/tests/pipeline/test_international_markets.py +++ b/tests/pipeline/test_international_markets.py @@ -128,7 +128,7 @@ def init_class_fixtures(cls): bar_data = cls.daily_bar_data[name] df = ( - pd.concat(bar_data, keys=bar_data.keys()).stack().unstack(0).swaplevel() + pd.concat(bar_data, keys=bar_data.keys()).stack(future_stack=True).unstack(0).swaplevel() ) frames = { field: frame.reset_index(level=0, drop=True) diff --git a/tests/pipeline/test_quarters_estimates.py b/tests/pipeline/test_quarters_estimates.py index 1a613d0185..91d4c3c18e 100644 --- a/tests/pipeline/test_quarters_estimates.py +++ b/tests/pipeline/test_quarters_estimates.py @@ -2666,7 +2666,7 @@ def make_expected_out(cls): .set_index(SID_FIELD_NAME, append=True) .unstack(SID_FIELD_NAME) .reindex(cls.trading_days) - .stack(SID_FIELD_NAME, dropna=False) + .stack(SID_FIELD_NAME, future_stack=True) ) split_adjusted_at_end_boundary = ( @@ -2733,7 +2733,7 @@ def make_expected_out(cls): .set_index(SID_FIELD_NAME, append=True) .unstack(SID_FIELD_NAME) .reindex(cls.trading_days) - .stack(SID_FIELD_NAME, dropna=False) + .stack(SID_FIELD_NAME, future_stack=True) ) split_adjusted_before_start_boundary = split_adjusted_at_start_boundary @@ -2812,7 +2812,7 @@ def make_expected_out(cls): .set_index(SID_FIELD_NAME, append=True) .unstack(SID_FIELD_NAME) .reindex(cls.trading_days) - .stack(SID_FIELD_NAME, dropna=False) + .stack(SID_FIELD_NAME, future_stack=True) ) split_adjusted_at_end_boundary = ( @@ -2867,7 +2867,7 @@ def make_expected_out(cls): .set_index(SID_FIELD_NAME, append=True) .unstack(SID_FIELD_NAME) .reindex(cls.trading_days) - .stack(SID_FIELD_NAME, dropna=False) + .stack(SID_FIELD_NAME, future_stack=True) ) split_adjusted_before_start_boundary = split_adjusted_at_start_boundary From 46a8938516fbc04d0dc13df03cf318f239b5319e Mon Sep 17 00:00:00 2001 From: fstp Date: Wed, 28 Aug 2024 08:35:44 +0200 Subject: [PATCH 3/3] Fix formatting errors --- src/zipline/pipeline/loaders/earnings_estimates.py | 3 ++- tests/pipeline/test_international_markets.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/zipline/pipeline/loaders/earnings_estimates.py b/src/zipline/pipeline/loaders/earnings_estimates.py index 093f91f52e..f6e076297b 100644 --- a/src/zipline/pipeline/loaders/earnings_estimates.py +++ b/src/zipline/pipeline/loaders/earnings_estimates.py @@ -702,7 +702,8 @@ def get_last_data_per_qtr( ffill_across_cols(last_per_qtr, columns, self.name_map) # Stack quarter and sid into the index. stacked_last_per_qtr = last_per_qtr.stack( - [SID_FIELD_NAME, NORMALIZED_QUARTERS], future_stack=True, + [SID_FIELD_NAME, NORMALIZED_QUARTERS], + future_stack=True, ) # Set date index name for ease of reference stacked_last_per_qtr.index.set_names( diff --git a/tests/pipeline/test_international_markets.py b/tests/pipeline/test_international_markets.py index 3e6af0bc0f..7c96c88907 100644 --- a/tests/pipeline/test_international_markets.py +++ b/tests/pipeline/test_international_markets.py @@ -128,7 +128,10 @@ def init_class_fixtures(cls): bar_data = cls.daily_bar_data[name] df = ( - pd.concat(bar_data, keys=bar_data.keys()).stack(future_stack=True).unstack(0).swaplevel() + pd.concat(bar_data, keys=bar_data.keys()) + .stack(future_stack=True) + .unstack(0) + .swaplevel() ) frames = { field: frame.reset_index(level=0, drop=True)