Skip to content

Commit

Permalink
Fix weekly date range bug for deepar (#154)
Browse files Browse the repository at this point in the history
* Fix weekly date range bug for deepar

* Fix import
  • Loading branch information
es94129 authored Nov 15, 2024
1 parent 0c71e51 commit de71cd8
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 1 deletion.
7 changes: 7 additions & 0 deletions runtime/databricks/automl_runtime/forecast/deepar/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
multi-series - dictionary of transformed dataframes, each key is the (concatenated) id of the time series
"""
total_min, total_max = df[time_col].min(), df[time_col].max()

# We need to adjust the frequency for pd.date_range if it is weekly,
# otherwise it would always be "W-SUN"
if frequency.upper() == "W":
weekday_name = total_min.strftime("%a").upper() # e.g., "FRI"
frequency = f"W-{weekday_name}"

new_index_full = pd.date_range(total_min, total_max, freq=frequency)

if id_cols is not None:
Expand Down
2 changes: 1 addition & 1 deletion runtime/databricks/automl_runtime/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# limitations under the License.
#

__version__ = "0.2.20.4" # pragma: no cover
__version__ = "0.2.20.5.dev0" # pragma: no cover
35 changes: 35 additions & 0 deletions runtime/tests/automl_runtime/forecast/deepar/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,38 @@ def test_multi_series_multi_id_cols_filled(self):
expected_first_df = expected_first_df.set_index(time_col).rename_axis(None).asfreq("D")

pd.testing.assert_frame_equal(transformed_df_dict["1-1"], expected_first_df)

def test_single_series_week_day_index(self):
target_col = "sales"
time_col = "date"
num_weeks = 10

# Starting from first Friday in 2020
base_dates = pd.date_range(
start='2020-01-03', # First Friday of 2020
periods=num_weeks,
freq='W-FRI' # Weekly frequency starting Friday
)

base_df = pd.DataFrame({
time_col: base_dates,
target_col: range(num_weeks)
})

# Create a dataframe with missing weeks (drop weeks 3 and 4)
dropped_df = base_df.drop([3, 4]).reset_index(drop=True)

# Transform the dataframe
transformed_df = set_index_and_fill_missing_time_steps(
dropped_df,
time_col,
"W" # Weekly frequency **without** specifying Friday
)

# Create expected dataframe
expected_df = base_df.copy()
expected_df.loc[[3, 4], target_col] = float('nan')
expected_df = expected_df.set_index(time_col).rename_axis(None).asfreq("W-FRI")

# Assert equality
pd.testing.assert_frame_equal(transformed_df, expected_df)

0 comments on commit de71cd8

Please sign in to comment.