Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
polinaeterna committed Dec 23, 2024
1 parent d91d365 commit bdec2e4
Showing 1 changed file with 9 additions and 8 deletions.
17 changes: 9 additions & 8 deletions services/worker/src/worker/statistics_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -504,6 +504,14 @@ def _compute_statistics(
) -> Union[CategoricalStatisticsItem, NumericalStatisticsItem, DatetimeStatisticsItem]:
nan_count, nan_proportion = nan_count_proportion(data, column_name, n_samples)
n_unique = data[column_name].n_unique()
if cls.is_datetime(data, column_name):
datetime_stats: DatetimeStatisticsItem = DatetimeColumn.compute_statistics(
data.select(pl.col(column_name).cast(pl.Datetime)),
column_name=column_name,
n_samples=n_samples,
)
return datetime_stats

if cls.is_class(n_unique, n_samples):
labels2counts: dict[str, int] = value_counts(data, column_name) if nan_count != n_samples else {}
logging.debug(f"{n_unique=} {nan_count=} {nan_proportion=} {labels2counts=}")
Expand All @@ -517,13 +525,6 @@ def _compute_statistics(
n_unique=len(labels2counts),
frequencies=labels2counts,
)
if cls.is_datetime(data, column_name):
datetime_stats: DatetimeStatisticsItem = DatetimeColumn.compute_statistics(
data.select(pl.col(column_name).cast(pl.Datetime)),
column_name=column_name,
n_samples=n_samples,
)
return datetime_stats

lengths_column_name = f"{column_name}_len"
lengths_df = cls.compute_transformed_data(data, column_name, transformed_column_name=lengths_column_name)
Expand All @@ -536,7 +537,7 @@ def compute_and_prepare_response(self, data: pl.DataFrame) -> StatisticsPerColum
stats = self.compute_statistics(data, column_name=self.name, n_samples=self.n_samples)
if "frequencies" in stats:
string_type = ColumnType.STRING_LABEL
elif isinstance(stats["histogram"], DatetimeHistogram): # type: ignore
elif isinstance(stats["histogram"]["bin_edges"][0], str):
string_type = ColumnType.DATETIME
else:
string_type = ColumnType.STRING_TEXT
Expand Down

0 comments on commit bdec2e4

Please sign in to comment.