From bdec2e475b0bfc4c3a47ba3a6a1b2ca17cfa4d1d Mon Sep 17 00:00:00 2001 From: polinaeterna Date: Mon, 23 Dec 2024 13:29:13 +0100 Subject: [PATCH] fix --- services/worker/src/worker/statistics_utils.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/services/worker/src/worker/statistics_utils.py b/services/worker/src/worker/statistics_utils.py index 28d340faa..d53597fff 100644 --- a/services/worker/src/worker/statistics_utils.py +++ b/services/worker/src/worker/statistics_utils.py @@ -504,6 +504,14 @@ def _compute_statistics( ) -> Union[CategoricalStatisticsItem, NumericalStatisticsItem, DatetimeStatisticsItem]: nan_count, nan_proportion = nan_count_proportion(data, column_name, n_samples) n_unique = data[column_name].n_unique() + if cls.is_datetime(data, column_name): + datetime_stats: DatetimeStatisticsItem = DatetimeColumn.compute_statistics( + data.select(pl.col(column_name).cast(pl.Datetime)), + column_name=column_name, + n_samples=n_samples, + ) + return datetime_stats + if cls.is_class(n_unique, n_samples): labels2counts: dict[str, int] = value_counts(data, column_name) if nan_count != n_samples else {} logging.debug(f"{n_unique=} {nan_count=} {nan_proportion=} {labels2counts=}") @@ -517,13 +525,6 @@ def _compute_statistics( n_unique=len(labels2counts), frequencies=labels2counts, ) - if cls.is_datetime(data, column_name): - datetime_stats: DatetimeStatisticsItem = DatetimeColumn.compute_statistics( - data.select(pl.col(column_name).cast(pl.Datetime)), - column_name=column_name, - n_samples=n_samples, - ) - return datetime_stats lengths_column_name = f"{column_name}_len" lengths_df = cls.compute_transformed_data(data, column_name, transformed_column_name=lengths_column_name) @@ -536,7 +537,7 @@ def compute_and_prepare_response(self, data: pl.DataFrame) -> StatisticsPerColum stats = self.compute_statistics(data, column_name=self.name, n_samples=self.n_samples) if "frequencies" in stats: string_type = ColumnType.STRING_LABEL - elif isinstance(stats["histogram"], DatetimeHistogram): # type: ignore + elif isinstance(stats["histogram"]["bin_edges"][0], str): string_type = ColumnType.DATETIME else: string_type = ColumnType.STRING_TEXT