Skip to content

Commit

Permalink
error
Browse files Browse the repository at this point in the history
  • Loading branch information
j316chuck committed Nov 21, 2023
1 parent 918fa9b commit 8551c64
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions tests/test_dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,15 +311,17 @@ def test_finetuning_dataloader(decoder_only_format: bool,
@pytest.mark.parametrize('dataset_size', [4, 8])
@pytest.mark.parametrize('device_batch_size', [2, 4])
@pytest.mark.parametrize('drop_last', [True, False])
@pytest.mark.parameterize('invalid_dataset', [True, False])
def test_finetuning_dataloader_small_data(dataset_size: int,
device_batch_size: int,
drop_last: bool):
drop_last: bool,
invalid_dataset: bool):
tokenizer_name = 'gpt2'
max_seq_len = 2048
tiny_dataset_folder_path = os.path.join(os.getcwd(), 'test-ift-data-small')
tiny_dataset_path = os.path.join(tiny_dataset_folder_path, 'train.jsonl')
if dist.get_global_rank() == 0:
make_tiny_ft_dataset(path=tiny_dataset_path, size=dataset_size)
make_tiny_ft_dataset(path=tiny_dataset_path, size=dataset_size, add_bad_data_error=invalid_dataset)

cfg = {
'name': 'finetuning',
Expand Down Expand Up @@ -353,6 +355,8 @@ def test_finetuning_dataloader_small_data(dataset_size: int,
error_context = contextlib.nullcontext()
if (dist.get_world_size() * device_batch_size > dataset_size) and drop_last:
error_context = pytest.raises(ValueError, match='Your dataset')
if (invalid_dataset):
error_context = pytest.raises(ValueError, match='Unable to tokenize example')

with error_context:
_ = build_finetuning_dataloader(cfg, tokenizer, device_batch_size)
Expand Down

0 comments on commit 8551c64

Please sign in to comment.