diff --git a/llmfoundry/eval/datasets/in_context_learning_evaluation.py b/llmfoundry/eval/datasets/in_context_learning_evaluation.py index 2fd68e0bf5..b2fc1a943e 100644 --- a/llmfoundry/eval/datasets/in_context_learning_evaluation.py +++ b/llmfoundry/eval/datasets/in_context_learning_evaluation.py @@ -1493,6 +1493,8 @@ def build_icl_dataloader( 3. set the `split_batch` function if necessary """ # Add named parameters to kwargs + if kwargs is None: + kwargs = {} kwargs.update({ 'dataset_uri': dataset_uri, 'tokenizer': tokenizer, diff --git a/llmfoundry/registry.py b/llmfoundry/registry.py index 3f754e1952..6f6b2a2d57 100644 --- a/llmfoundry/registry.py +++ b/llmfoundry/registry.py @@ -212,7 +212,10 @@ icl_datasets = create_registry( 'llmfoundry', 'icl_datasets', - # TODO: Change type to InContextLearningDataset + # TODO: Change type from Dataset to + # llmfoundry.eval.InContextLearningDataset. + # Using ICL dataset here introduces a circular import dependency between + # the registry and eval packages right now, thus needs some refactoring. generic_type=Type[Dataset], entry_points=True, description=_icl_datasets_description, diff --git a/tests/eval/test_in_context_learning_datasets.py b/tests/eval/test_in_context_learning_datasets.py index c17f3a135c..3182005b58 100644 --- a/tests/eval/test_in_context_learning_datasets.py +++ b/tests/eval/test_in_context_learning_datasets.py @@ -2272,7 +2272,7 @@ def test_qa_task_evaluation( 'pad_tok_id': tokenizer.eos_token_id, 'num_fewshot': num_fewshot, 'prompt_string': '', - 'example_delimite': '\n', + 'example_delimiter': '\n', 'continuation_delimiter': ': ', } ) diff --git a/tests/test_registry.py b/tests/test_registry.py index 87881450d4..3bdf5a800f 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -42,6 +42,7 @@ def test_expected_registries_exist(): 'attention_classes', 'attention_implementations', 'fcs', + 'icl_datasets', } assert existing_registries == expected_registry_names