Skip to content

Commit

Permalink
mention skipped columns
Browse files Browse the repository at this point in the history
  • Loading branch information
lhoestq committed Nov 17, 2023
1 parent 0abae3f commit 53e9c86
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions services/api/src/api/routes/croissant.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,6 @@ def get_croissant_from_dataset_infos(dataset: str, infos: list[Mapping[str, Any]
"includes": f"{config}/*/*.parquet",
}
)
record_set.append(
{
"@type": "ml:RecordSet",
"name": config,
"description": f"'{config}' subset{' (first 5GB)' if partial else ''}",
"field": fields,
}
)
skipped_columns = []
for column, feature in features.items():
if isinstance(feature, Value) and feature.dtype in HF_TO_CRROISSANT_VALUE_TYPE:
Expand Down Expand Up @@ -109,6 +101,19 @@ def get_croissant_from_dataset_infos(dataset: str, infos: list[Mapping[str, Any]
)
else:
skipped_columns.append(column)
description = f"'{config}' subset"
if partial:
description += " (first 5GB)"
if skipped_columns:
description += f" ({len(skipped_columns)} skipped columns: {', '.join(skipped_columns)})"
record_set.append(
{
"@type": "ml:RecordSet",
"name": config,
"description": description,
"field": fields,
}
)
return {
"@context": {
"@language": "en",
Expand Down

0 comments on commit 53e9c86

Please sign in to comment.