From 282364e426b3c07e1a3e79b1b40a281fdf102f89 Mon Sep 17 00:00:00 2001 From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com> Date: Tue, 3 Dec 2024 18:23:30 +0100 Subject: [PATCH] Fix excluded binary column in rows (#3108) * fix excluded binary column in rows * add truncated finary for foursquare/fsq-os-places * style --- services/rows/src/rows/routes/rows.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/rows/src/rows/routes/rows.py b/services/rows/src/rows/routes/rows.py index 14187118b..f1e6768c3 100644 --- a/services/rows/src/rows/routes/rows.py +++ b/services/rows/src/rows/routes/rows.py @@ -4,6 +4,7 @@ import logging from typing import Literal, Optional, Union +from datasets import Features from datasets.table import cast_table_to_features from fsspec.implementations.http import HTTPFileSystem from libapi.authentication import auth_check @@ -96,13 +97,16 @@ async def rows_endpoint(request: Request) -> Response: with StepProfiler(method="rows_endpoint", step="query the rows"): try: truncated_columns: list[str] = [] - if dataset == "Major-TOM/Core-S2L2A": + if dataset == "Major-TOM/Core-S2L2A" or dataset == "foursquare/fsq-os-places": pa_table, truncated_columns = rows_index.query_truncated_binary( offset=offset, length=length ) else: pa_table = rows_index.query(offset=offset, length=length) - pa_table = cast_table_to_features(pa_table, rows_index.parquet_index.features) + features = Features( + {col: rows_index.parquet_index.features[col] for col in pa_table.column_names} + ) + pa_table = cast_table_to_features(pa_table, features) except TooBigRows as err: raise TooBigContentError(str(err)) from None with StepProfiler(method="rows_endpoint", step="transform to a list"):