Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix weekly date range bug for deepar #154

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions runtime/databricks/automl_runtime/forecast/deepar/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ def set_index_and_fill_missing_time_steps(df: pd.DataFrame, time_col: str,
multi-series - dictionary of transformed dataframes, each key is the (concatenated) id of the time series
"""
total_min, total_max = df[time_col].min(), df[time_col].max()

# We need to adjust the frequency for pd.date_range if it is weekly,
# otherwise it would always be "W-SUN"
if frequency.upper() == "W":
weekday_name = total_min.strftime("%a").upper() # e.g., "FRI"
frequency = f"W-{weekday_name}"

new_index_full = pd.date_range(total_min, total_max, freq=frequency)

if id_cols is not None:
Expand Down
2 changes: 1 addition & 1 deletion runtime/databricks/automl_runtime/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# limitations under the License.
#

__version__ = "0.2.20.4" # pragma: no cover
__version__ = "0.2.20.5.dev0" # pragma: no cover
35 changes: 35 additions & 0 deletions runtime/tests/automl_runtime/forecast/deepar/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,38 @@ def test_multi_series_multi_id_cols_filled(self):
expected_first_df = expected_first_df.set_index(time_col).rename_axis(None).asfreq("D")

pd.testing.assert_frame_equal(transformed_df_dict["1-1"], expected_first_df)

def test_single_series_week_day_index(self):
target_col = "sales"
time_col = "date"
num_weeks = 10

# Starting from first Friday in 2020
base_dates = pd.date_range(
start='2020-01-03', # First Friday of 2020
periods=num_weeks,
freq='W-FRI' # Weekly frequency starting Friday
)

base_df = pd.DataFrame({
time_col: base_dates,
target_col: range(num_weeks)
})

# Create a dataframe with missing weeks (drop weeks 3 and 4)
dropped_df = base_df.drop([3, 4]).reset_index(drop=True)

# Transform the dataframe
transformed_df = set_index_and_fill_missing_time_steps(
dropped_df,
time_col,
"W" # Weekly frequency **without** specifying Friday
)

# Create expected dataframe
expected_df = base_df.copy()
expected_df.loc[[3, 4], target_col] = float('nan')
expected_df = expected_df.set_index(time_col).rename_axis(None).asfreq("W-FRI")

# Assert equality
pd.testing.assert_frame_equal(transformed_df, expected_df)
Loading