From a134a075b69b816148dfa8816ac472831b5e7349 Mon Sep 17 00:00:00 2001 From: Matt Dancho Date: Sun, 5 Nov 2023 12:03:12 -0500 Subject: [PATCH] #216 - fix incorrect sorting --- src/pytimetk/feature_engineering/expanding.py | 34 +++++++------------ src/pytimetk/feature_engineering/rolling.py | 34 +++++++------------ 2 files changed, 24 insertions(+), 44 deletions(-) diff --git a/src/pytimetk/feature_engineering/expanding.py b/src/pytimetk/feature_engineering/expanding.py index 1e989f61..6cc44eae 100644 --- a/src/pytimetk/feature_engineering/expanding.py +++ b/src/pytimetk/feature_engineering/expanding.py @@ -522,29 +522,19 @@ def _augment_expanding_polars( expanding_exprs.append(expanding_expr) new_column_names.append(new_column_name) - # Convert Pandas DataFrame to Polars and ensure a consistent row order by resetting the index - df = pl.from_pandas(pandas_df.reset_index()) - - # Evaluate the accumulated expanding expressions and convert back to a Pandas DataFrame - if group_names: - df_new_columns = df \ - .sort(*group_names, date_column) \ - .group_by(group_names) \ - .agg(expanding_exprs) \ - .sort(*group_names) \ - .explode(new_column_names) + # Select the columns + selected_columns = expanding_exprs + + df = pl.DataFrame(pandas_df) + if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy): + out_df = df.group_by(data.grouper.names, maintain_order=True).agg(selected_columns) + out_df = out_df.explode(out_df.columns[1:]) + out_df = out_df.drop(data.grouper.names) + else: # a dataframe + out_df = df.select(selected_columns) - df = pl.concat([df, df_new_columns.drop(group_names)], how="horizontal") \ - .sort('index') \ - .drop('index') \ - .to_pandas() - else: - df = df \ - .sort(date_column) \ - .with_columns(expanding_exprs) \ - .sort('index') \ - .drop('index') \ - .to_pandas() + # Concatenate the DataFrames horizontally + df = pl.concat([df, out_df], how="horizontal").to_pandas() return reduce_memory_usage(df) diff --git a/src/pytimetk/feature_engineering/rolling.py b/src/pytimetk/feature_engineering/rolling.py index e68e1d41..7f778148 100644 --- a/src/pytimetk/feature_engineering/rolling.py +++ b/src/pytimetk/feature_engineering/rolling.py @@ -514,30 +514,20 @@ def _augment_rolling_polars( # Add constructed expressions and new column names to respective lists rolling_exprs.append(rolling_expr) new_column_names.append(new_column_name) - - # Convert Pandas DataFrame to Polars and ensure a consistent row order by resetting the index - df = pl.from_pandas(pandas_df.reset_index()) - # Evaluate the accumulated rolling expressions and convert back to a Pandas DataFrame - if group_names: - df_new_columns = df \ - .sort(*group_names, date_column) \ - .group_by(group_names) \ - .agg(rolling_exprs) \ - .sort(*group_names) \ - .explode(new_column_names) + # Select the columns + selected_columns = rolling_exprs + + df = pl.DataFrame(pandas_df) + if isinstance(data, pd.core.groupby.generic.DataFrameGroupBy): + out_df = df.group_by(data.grouper.names, maintain_order=True).agg(selected_columns) + out_df = out_df.explode(out_df.columns[1:]) + out_df = out_df.drop(data.grouper.names) + else: # a dataframe + out_df = df.select(selected_columns) - df = pl.concat([df, df_new_columns.drop(group_names)], how="horizontal") \ - .sort('index') \ - .drop('index') \ - .to_pandas() - else: - df = df \ - .sort(date_column) \ - .with_columns(rolling_exprs) \ - .sort('index') \ - .drop('index') \ - .to_pandas() + # Concatenate the DataFrames horizontally + df = pl.concat([df, out_df], how="horizontal").to_pandas() return reduce_memory_usage(df)