From e97b4b24971659f78b5601e47c0f0534f852f292 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 16:23:33 +0100 Subject: [PATCH 01/10] Updated ClassLabel output in features.py --- src/datasets/features/features.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py index 34622cd94d9..2b798f9f43e 100644 --- a/src/datasets/features/features.py +++ b/src/datasets/features/features.py @@ -966,10 +966,10 @@ class ClassLabel: Example: ```py - >>> from datasets import Features + >>> from datasets import Features, ClassLabel >>> features = Features({'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'])}) >>> features - {'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'], id=None)} + {'label': ClassLabel(names=['bad', 'ok', 'good'], id=None)} ``` """ @@ -1156,7 +1156,7 @@ class Sequence: >>> from datasets import Features, Sequence, Value, ClassLabel >>> features = Features({'post': Sequence(feature={'text': Value(dtype='string'), 'upvotes': Value(dtype='int32'), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'])})}) >>> features - {'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'], id=None)}, length=-1, id=None)} + {'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(names=['hot', 'cold'], id=None)}, length=-1, id=None)} ``` """ @@ -2109,7 +2109,7 @@ def copy(self) -> "Features": >>> ds = load_dataset("rotten_tomatoes", split="train") >>> copy_of_features = ds.features.copy() >>> copy_of_features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} ``` """ From 01ef081c44afa6b1539af0e0ff2979a718dec885 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 16:35:31 +0100 Subject: [PATCH 02/10] Updated ClassLabel output in load.py --- src/datasets/load.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/load.py b/src/datasets/load.py index ebdafafcd5f..1048322bb29 100644 --- a/src/datasets/load.py +++ b/src/datasets/load.py @@ -1839,7 +1839,7 @@ def load_dataset_builder( >>> from datasets import load_dataset_builder >>> ds_builder = load_dataset_builder('rotten_tomatoes') >>> ds_builder.info.features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} ``` """ From d33aa46b7b83dd91f33c7f5ac7c7e14124130ff0 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 16:37:28 +0100 Subject: [PATCH 03/10] Updated ClassLabel output in iterable_daaset.py --- src/datasets/iterable_dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py index e38244383e2..cc91120d241 100644 --- a/src/datasets/iterable_dataset.py +++ b/src/datasets/iterable_dataset.py @@ -2951,17 +2951,17 @@ def cast( Example: ```py - >>> from datasets import load_dataset + >>> from datasets import load_dataset, ClassLabel, Value >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) >>> ds.features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> new_features = ds.features.copy() >>> new_features["label"] = ClassLabel(names=["bad", "good"]) >>> new_features["text"] = Value("large_string") >>> ds = ds.cast(new_features) >>> ds.features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='large_string', id=None)} ``` """ From d1f6f7db53540d1b8fa974abaa2dd5e07884e57e Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 16:47:09 +0100 Subject: [PATCH 04/10] Updated ClassLabel output in dataset_dict.py --- src/datasets/dataset_dict.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 40ca0cd7312..3ee81925ed1 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -275,17 +275,17 @@ def cast(self, features: Features) -> "DatasetDict": Example: ```py - >>> from datasets import load_dataset + >>> from datasets import load_dataset, ClassLabel, Value >>> ds = load_dataset("rotten_tomatoes") >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None) 'text': Value(dtype='string', id=None)} >>> new_features = ds["train"].features.copy() >>> new_features['label'] = ClassLabel(names=['bad', 'good']) >>> new_features['text'] = Value('large_string') >>> ds = ds.cast(new_features) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='large_string', id=None)} ``` """ @@ -307,14 +307,14 @@ def cast_column(self, column: str, feature) -> "DatasetDict": Example: ```py - >>> from datasets import load_dataset + >>> from datasets import load_dataset, ClassLabel >>> ds = load_dataset("rotten_tomatoes") >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good'])) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='string', id=None)} ``` """ @@ -2201,14 +2201,14 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDatasetDict Example: ```py - >>> from datasets import load_dataset + >>> from datasets import load_dataset, ClassLabel >>> ds = load_dataset("rotten_tomatoes", streaming=True) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good'])) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='string', id=None)} ``` """ @@ -2240,14 +2240,14 @@ def cast( >>> from datasets import load_dataset >>> ds = load_dataset("rotten_tomatoes", streaming=True) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> new_features = ds["train"].features.copy() >>> new_features['label'] = ClassLabel(names=['bad', 'good']) >>> new_features['text'] = Value('large_string') >>> ds = ds.cast(new_features) >>> ds["train"].features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='large_string', id=None)} ``` """ From c4c63b8b65b69b68ee4ffb110d76e558b78c9a1b Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 16:57:01 +0100 Subject: [PATCH 05/10] Updated ClassLabel output in builder.py --- src/datasets/builder.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index c3eee41c6e0..a0d5c100cbc 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -510,9 +510,9 @@ def get_all_exported_dataset_infos(cls) -> DatasetInfosDict: ```py >>> from datasets import load_dataset_builder - >>> ds_builder = load_dataset_builder('rotten_tomatoes') + >>> ds_builder = load_dataset_builder('vivos') >>> ds_builder.get_all_exported_dataset_infos() - {'default': DatasetInfo(description="Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed\nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo\nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for\nsentiment categorization with respect to rating scales.'', Proceedings of the\nACL, 2005.\n", citation='@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n', homepage='http://www.cs.cornell.edu/people/pabo/movie-review-data/', license='', features={'text': Value(dtype='string', id=None), 'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None)}, post_processed=None, supervised_keys=SupervisedKeysData(input='', output=''), builder_name='rotten_tomatoes_movie_review', config_name='default', version=1.0.0, splits={'train': SplitInfo(name='train', num_bytes=1074810, num_examples=8530, dataset_name='rotten_tomatoes_movie_review'), 'validation': SplitInfo(name='validation', num_bytes=134679, num_examples=1066, dataset_name='rotten_tomatoes_movie_review'), 'test': SplitInfo(name='test', num_bytes=135972, num_examples=1066, dataset_name='rotten_tomatoes_movie_review')}, download_checksums={'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz': {'num_bytes': 487770, 'checksum': 'a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9'}}, download_size=487770, post_processing_size=None, dataset_size=1345461, size_in_bytes=1833231)} + {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)} ``` """ return DatasetInfosDict.from_directory(cls.get_imported_module_dir()) @@ -526,8 +526,7 @@ def get_exported_dataset_info(self) -> DatasetInfo: >>> from datasets import load_dataset_builder >>> ds_builder = load_dataset_builder('rotten_tomatoes') >>> ds_builder.get_exported_dataset_info() - DatasetInfo(description="Movie Review Dataset.\nThis is a dataset of containing 5,331 positive and 5,331 negative processed\nsentences from Rotten Tomatoes movie reviews. This data was first used in Bo\nPang and Lillian Lee, ``Seeing stars: Exploiting class relationships for\nsentiment categorization with respect to rating scales.'', Proceedings of the\nACL, 2005.\n", citation='@InProceedings{Pang+Lee:05a,\n author = {Bo Pang and Lillian Lee},\n title = {Seeing stars: Exploiting class relationships for sentiment\n categorization with respect to rating scales},\n booktitle = {Proceedings of the ACL},\n year = 2005\n}\n', homepage='http://www.cs.cornell.edu/people/pabo/movie-review-data/', license='', features={'text': Value(dtype='string', id=None), 'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None)}, post_processed=None, supervised_keys=SupervisedKeysData(input='', output=''), builder_name='rotten_tomatoes_movie_review', config_name='default', version=1.0.0, splits={'train': SplitInfo(name='train', num_bytes=1074810, num_examples=8530, dataset_name='rotten_tomatoes_movie_review'), 'validation': SplitInfo(name='validation', num_bytes=134679, num_examples=1066, dataset_name='rotten_tomatoes_movie_review'), 'test': SplitInfo(name='test', num_bytes=135972, num_examples=1066, dataset_name='rotten_tomatoes_movie_review')}, download_checksums={'https://storage.googleapis.com/seldon-datasets/sentence_polarity_v1/rt-polaritydata.tar.gz': {'num_bytes': 487770, 'checksum': 'a05befe52aafda71d458d188a1c54506a998b1308613ba76bbda2e5029409ce9'}}, download_size=487770, post_processing_size=None, dataset_size=1345461, size_in_bytes=1833231) - ``` + DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None) """ return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo()) From d1d790ebd19ff30a1a9d902e8217a78466c81939 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 17:01:40 +0100 Subject: [PATCH 06/10] Updated ClassLabel output in arrow_dataset.py --- src/datasets/arrow_dataset.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py index 57f3024e53b..21f32dd3e79 100644 --- a/src/datasets/arrow_dataset.py +++ b/src/datasets/arrow_dataset.py @@ -2023,14 +2023,14 @@ def cast( >>> from datasets import load_dataset, ClassLabel, Value >>> ds = load_dataset("rotten_tomatoes", split="validation") >>> ds.features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> new_features = ds.features.copy() >>> new_features['label'] = ClassLabel(names=['bad', 'good']) >>> new_features['text'] = Value('large_string') >>> ds = ds.cast(new_features) >>> ds.features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='large_string', id=None)} ``` """ @@ -2078,14 +2078,14 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Option Example: ```py - >>> from datasets import load_dataset + >>> from datasets import load_dataset, ClassLabel >>> ds = load_dataset("rotten_tomatoes", split="validation") >>> ds.features - {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good'])) >>> ds.features - {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + {'label': ClassLabel(names=['bad', 'good'], id=None), 'text': Value(dtype='string', id=None)} ``` """ From 8a66b60ee6cf647f2d9a93a34df9376b71f487a1 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 17:14:23 +0100 Subject: [PATCH 07/10] Updated docs --- docs/source/about_dataset_features.mdx | 2 +- docs/source/load_hub.mdx | 2 +- docs/source/loading.mdx | 2 +- docs/source/process.mdx | 4 ++-- docs/source/stream.mdx | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/source/about_dataset_features.mdx b/docs/source/about_dataset_features.mdx index 12a85477645..f9b93fa9cb8 100644 --- a/docs/source/about_dataset_features.mdx +++ b/docs/source/about_dataset_features.mdx @@ -11,7 +11,7 @@ Let's have a look at the features of the MRPC dataset from the GLUE benchmark: >>> dataset = load_dataset('glue', 'mrpc', split='train') >>> dataset.features {'idx': Value(dtype='int32', id=None), - 'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None), + 'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None), 'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), } diff --git a/docs/source/load_hub.mdx b/docs/source/load_hub.mdx index 93a27611fd7..d2c71754bc6 100644 --- a/docs/source/load_hub.mdx +++ b/docs/source/load_hub.mdx @@ -20,7 +20,7 @@ Movie Review Dataset. This is a dataset of containing 5,331 positive and 5,331 n # Inspect dataset features >>> ds_builder.info.features -{'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), +{'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} ``` diff --git a/docs/source/loading.mdx b/docs/source/loading.mdx index 49feeba419d..e30037e3cc7 100644 --- a/docs/source/loading.mdx +++ b/docs/source/loading.mdx @@ -435,7 +435,7 @@ Now when you look at your dataset features, you can see it uses the custom label ```py >>> dataset['train'].features {'text': Value(dtype='string', id=None), -'label': ClassLabel(num_classes=6, names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], names_file=None, id=None)} +'label': ClassLabel(names=['sadness', 'joy', 'love', 'anger', 'fear', 'surprise'], id=None)} ``` ## (Legacy) Local loading script diff --git a/docs/source/process.mdx b/docs/source/process.mdx index 38989613ef3..198b7509456 100644 --- a/docs/source/process.mdx +++ b/docs/source/process.mdx @@ -225,7 +225,7 @@ The [`~Dataset.cast`] function transforms the feature type of one or more column >>> dataset.features {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), -'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None), +'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None), 'idx': Value(dtype='int32', id=None)} >>> from datasets import ClassLabel, Value @@ -236,7 +236,7 @@ The [`~Dataset.cast`] function transforms the feature type of one or more column >>> dataset.features {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), -'label': ClassLabel(num_classes=2, names=['negative', 'positive'], names_file=None, id=None), +'label': ClassLabel(names=['negative', 'positive'], id=None), 'idx': Value(dtype='int64', id=None)} ``` diff --git a/docs/source/stream.mdx b/docs/source/stream.mdx index 0be393ce4a8..f17899aa438 100644 --- a/docs/source/stream.mdx +++ b/docs/source/stream.mdx @@ -229,7 +229,7 @@ When you need to remove one or more columns, give [`IterableDataset.remove_colum >>> dataset.features {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), -'label': ClassLabel(num_classes=2, names=['not_equivalent', 'equivalent'], names_file=None, id=None), +'label': ClassLabel(names=['not_equivalent', 'equivalent'], id=None), 'idx': Value(dtype='int32', id=None)} >>> from datasets import ClassLabel, Value @@ -240,7 +240,7 @@ When you need to remove one or more columns, give [`IterableDataset.remove_colum >>> dataset.features {'sentence1': Value(dtype='string', id=None), 'sentence2': Value(dtype='string', id=None), -'label': ClassLabel(num_classes=2, names=['negative', 'positive'], names_file=None, id=None), +'label': ClassLabel(names=['negative', 'positive'], id=None), 'idx': Value(dtype='int64', id=None)} ``` From a2f86b9ae8d31b9e21c4ab216c27d9abff7b51e5 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Sat, 16 Nov 2024 17:15:46 +0100 Subject: [PATCH 08/10] Added missing comma --- src/datasets/dataset_dict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py index 3ee81925ed1..0720788d1dd 100644 --- a/src/datasets/dataset_dict.py +++ b/src/datasets/dataset_dict.py @@ -278,7 +278,7 @@ def cast(self, features: Features) -> "DatasetDict": >>> from datasets import load_dataset, ClassLabel, Value >>> ds = load_dataset("rotten_tomatoes") >>> ds["train"].features - {'label': ClassLabel(names=['neg', 'pos'], id=None) + {'label': ClassLabel(names=['neg', 'pos'], id=None), 'text': Value(dtype='string', id=None)} >>> new_features = ds["train"].features.copy() >>> new_features['label'] = ClassLabel(names=['bad', 'good']) From 5cf3977145b38864d78c142d4155d7905ef433a9 Mon Sep 17 00:00:00 2001 From: sergiopaniego Date: Mon, 18 Nov 2024 18:16:27 +0100 Subject: [PATCH 09/10] Updated python code --- src/datasets/builder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index a0d5c100cbc..3fef61d4e0a 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -527,6 +527,7 @@ def get_exported_dataset_info(self) -> DatasetInfo: >>> ds_builder = load_dataset_builder('rotten_tomatoes') >>> ds_builder.get_exported_dataset_info() DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None) + ``` """ return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo()) From 3cffbfd893ff519fc513e76b0848359db1e30c89 Mon Sep 17 00:00:00 2001 From: Sergio Paniego Blanco Date: Mon, 18 Nov 2024 19:03:58 +0100 Subject: [PATCH 10/10] Update src/datasets/builder.py Co-authored-by: Steven Liu <59462357+stevhliu@users.noreply.github.com> --- src/datasets/builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/datasets/builder.py b/src/datasets/builder.py index 3fef61d4e0a..1200749538e 100644 --- a/src/datasets/builder.py +++ b/src/datasets/builder.py @@ -512,7 +512,7 @@ def get_all_exported_dataset_infos(cls) -> DatasetInfosDict: >>> from datasets import load_dataset_builder >>> ds_builder = load_dataset_builder('vivos') >>> ds_builder.get_all_exported_dataset_infos() - {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)} + {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)} ``` """ return DatasetInfosDict.from_directory(cls.get_imported_module_dir())