Skip to content

Commit

Permalink
more tests
Browse files Browse the repository at this point in the history
  • Loading branch information
polinaeterna committed Dec 23, 2024
1 parent bdec2e4 commit f9ffe82
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 7 deletions.
14 changes: 14 additions & 0 deletions services/worker/tests/fixtures/statistics_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1716,6 +1716,19 @@ def null_column(n_samples: int) -> list[None]:
datetime.strptime("2024-01-10 00:00:00", "%Y-%m-%d %H:%M:%S"),
datetime.strptime("2024-01-11 00:00:00", "%Y-%m-%d %H:%M:%S"),
],
"datetime_string": [
"2024-01-01 00:00:00",
"2024-01-02 00:00:00",
"2024-01-03 00:00:00",
"2024-01-04 00:00:00",
"2024-01-05 00:00:00",
"2024-01-06 00:00:00",
"2024-01-07 00:00:00",
"2024-01-08 00:00:00",
"2024-01-09 00:00:00",
"2024-01-10 00:00:00",
"2024-01-11 00:00:00",
],
"datetime_tz": [
datetime.strptime("2024-01-01 00:00:00+0200", "%Y-%m-%d %H:%M:%S%z"),
datetime.strptime("2024-01-02 00:00:00+0200", "%Y-%m-%d %H:%M:%S%z"),
Expand Down Expand Up @@ -1747,6 +1760,7 @@ def null_column(n_samples: int) -> list[None]:
features=Features(
{
"datetime": Value("timestamp[s]"),
"datetime_string": Value("string"),
"datetime_tz": Value("timestamp[s, tz=+02:00]"),
"datetime_null": Value("timestamp[s]"),
"datetime_all_null": Value("timestamp[s]"),
Expand Down
21 changes: 14 additions & 7 deletions services/worker/tests/test_statistics_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def count_expected_statistics_for_datetime_column(column: pd.Series, column_name

# compute std
seconds_in_day = 24 * 60 * 60
if column_name in ["datetime", "datetime_tz"]:
if column_name in ["datetime", "datetime_string", "datetime_tz"]:
timedeltas = pd.Series(range(0, 11 * seconds_in_day, seconds_in_day))
hist = [2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
elif column_name == "datetime_null":
Expand Down Expand Up @@ -542,19 +542,26 @@ def count_expected_statistics_for_datetime_column(column: pd.Series, column_name

@pytest.mark.parametrize(
"column_name",
["datetime", "datetime_tz", "datetime_null", "datetime_all_null"],
["datetime", "datetime_string", "datetime_tz", "datetime_null", "datetime_all_null"],
)
def test_datetime_statistics(
column_name: str,
datasets: Mapping[str, Dataset],
) -> None:
data = datasets["datetime_statistics"].to_pandas()
expected = count_expected_statistics_for_datetime_column(data[column_name], column_name)
computed = DatetimeColumn.compute_statistics(
data=pl.from_pandas(data),
column_name=column_name,
n_samples=len(data[column_name]),
)
if column_name == "datetime_string":
computed = StringColumn.compute_statistics(
data=pl.from_pandas(data),
column_name=column_name,
n_samples=len(data[column_name]),
)
else:
computed = DatetimeColumn.compute_statistics(
data=pl.from_pandas(data),
column_name=column_name,
n_samples=len(data[column_name]),
)
computed_std, expected_std = computed.pop("std"), expected.pop("std")
if computed_std:
assert computed_std.split(".")[0] == expected_std.split(".")[0] # check with precision up to seconds
Expand Down

0 comments on commit f9ffe82

Please sign in to comment.