From fc30421c6f199280bfac4e547537d1f77055d927 Mon Sep 17 00:00:00 2001 From: Filip Boltuzic Date: Mon, 27 Nov 2023 17:01:11 +0100 Subject: [PATCH 1/3] Updating library to be compatible with latest pandas --- spectre/data/dataloader.py | 17 +++++++++++------ spectre/factors/engine.py | 8 ++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/spectre/data/dataloader.py b/spectre/data/dataloader.py index ff6fc82..9135fac 100644 --- a/spectre/data/dataloader.py +++ b/spectre/data/dataloader.py @@ -159,15 +159,20 @@ def load(self, start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] end = index[-1] if index[0] > start: - raise ValueError("`start` time cannot less than earliest time of data: {}." - .format(index[0])) + raise ValueError( + f"`start` time ({start}) cannot be less " \ + f"than earliest time of data: {index[0]}." + ) + if index[-1] < end: - raise ValueError("`end` time cannot greater than latest time of data: {}." - .format(index[-1])) + raise ValueError( + f"`end` time ({end}) cannot be greater " \ + f"than latest time of data: {index[-1]}." + ) - start_loc = index.get_loc(start, 'bfill') + start_loc = index.get_loc(start) backward_loc = max(start_loc - backwards, 0) - end_loc = index.get_loc(end, 'ffill') + end_loc = index.get_loc(end) assert end_loc >= start_loc, 'There is no data between `start` and `end`.' backward_start = index[backward_loc] diff --git a/spectre/factors/engine.py b/spectre/factors/engine.py index 526ba4f..f2a6036 100644 --- a/spectre/factors/engine.py +++ b/spectre/factors/engine.py @@ -100,13 +100,13 @@ def _prepare_tensor(self, start, end, max_backwards): df = self._loader.load(start, end, max_backwards).copy() # If possible, pre-screen if isinstance(self._filter, StaticAssets): - df = df.loc[(slice(None), self._filter.assets), :] + df = df.loc[(slice(None), list(self._filter.assets)), :] if df.shape[0] == 0: raise ValueError("The assets {} specified by StaticAssets filter, was not found in " "DataLoader.".format(self._filter.assets)) # check history data is insufficient df.index = df.index.remove_unused_levels() - history_win = df.index.levels[0].get_loc(start, 'bfill') + history_win = df.index.levels[0].get_loc(start) if history_win < max_backwards: warnings.warn("Historical data seems insufficient. " "{} rows of historical data are required, but only {} rows are obtained. " @@ -353,7 +353,7 @@ def run(self, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp], # if any factors delayed, return df also should be delayed if delayed: index = ret.index.levels[0] - start_ind = index.get_loc(start, 'bfill') + start_ind = index.get_loc(start) if (start_ind + 1) >= len(index): raise ValueError('There is no data between start and end.') start = index[start_ind + 1] @@ -369,7 +369,7 @@ def run_raw(self, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp] results, shifted_mask, delayed = self._run(start, end, delay_factor) index = self._dataframe.index.levels[0] - start_ind = index.get_loc(start, 'bfill') + start_ind = index.get_loc(start) if delayed: # if any factors delayed, return df also should be delayed start_ind += 1 if start_ind >= len(index): From 317cb9898482edf4941aca1e4ffec40039dbc02a Mon Sep 17 00:00:00 2001 From: Filip Boltuzic Date: Tue, 28 Nov 2023 13:09:59 +0100 Subject: [PATCH 2/3] PR comments --- spectre/data/dataloader.py | 4 ++-- spectre/factors/engine.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/spectre/data/dataloader.py b/spectre/data/dataloader.py index 9135fac..f129435 100644 --- a/spectre/data/dataloader.py +++ b/spectre/data/dataloader.py @@ -170,9 +170,9 @@ def load(self, start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] f"than latest time of data: {index[-1]}." ) - start_loc = index.get_loc(start) + start_loc = index.get_indexer([start], method='bfill')[0] backward_loc = max(start_loc - backwards, 0) - end_loc = index.get_loc(end) + end_loc = index.get_indexer([end], method='ffill')[0] assert end_loc >= start_loc, 'There is no data between `start` and `end`.' backward_start = index[backward_loc] diff --git a/spectre/factors/engine.py b/spectre/factors/engine.py index f2a6036..29fe862 100644 --- a/spectre/factors/engine.py +++ b/spectre/factors/engine.py @@ -106,7 +106,8 @@ def _prepare_tensor(self, start, end, max_backwards): "DataLoader.".format(self._filter.assets)) # check history data is insufficient df.index = df.index.remove_unused_levels() - history_win = df.index.levels[0].get_loc(start) + history_win = df.index.levels[0].get_indexer([start], method='ffill')[0] + print('hehehehehe') if history_win < max_backwards: warnings.warn("Historical data seems insufficient. " "{} rows of historical data are required, but only {} rows are obtained. " @@ -353,7 +354,7 @@ def run(self, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp], # if any factors delayed, return df also should be delayed if delayed: index = ret.index.levels[0] - start_ind = index.get_loc(start) + start_ind = index.get_indexer([start], method='bfill')[0] if (start_ind + 1) >= len(index): raise ValueError('There is no data between start and end.') start = index[start_ind + 1] @@ -369,7 +370,7 @@ def run_raw(self, start: Union[str, pd.Timestamp], end: Union[str, pd.Timestamp] results, shifted_mask, delayed = self._run(start, end, delay_factor) index = self._dataframe.index.levels[0] - start_ind = index.get_loc(start) + start_ind = index.get_indexer([start], method='bfill')[0] if delayed: # if any factors delayed, return df also should be delayed start_ind += 1 if start_ind >= len(index): From df561ac6f638ad23ff5cdbe3c91f8188fa2ca4af Mon Sep 17 00:00:00 2001 From: Filip Boltuzic Date: Tue, 28 Nov 2023 13:11:06 +0100 Subject: [PATCH 3/3] PR comments --- spectre/factors/engine.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/spectre/factors/engine.py b/spectre/factors/engine.py index 29fe862..75d88e3 100644 --- a/spectre/factors/engine.py +++ b/spectre/factors/engine.py @@ -106,8 +106,7 @@ def _prepare_tensor(self, start, end, max_backwards): "DataLoader.".format(self._filter.assets)) # check history data is insufficient df.index = df.index.remove_unused_levels() - history_win = df.index.levels[0].get_indexer([start], method='ffill')[0] - print('hehehehehe') + history_win = df.index.levels[0].get_indexer([start], method='bfill')[0] if history_win < max_backwards: warnings.warn("Historical data seems insufficient. " "{} rows of historical data are required, but only {} rows are obtained. "