Skip to content

Commit

Permalink
fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
eitanturok committed Sep 26, 2024
1 parent e7b812d commit 50d5610
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 13 deletions.
2 changes: 1 addition & 1 deletion llmfoundry/command_utils/data_prep/convert_dataset_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def build_hf_dataset(
no_wrap (bool): if concatenating, whether to wrap text across `max_length` boundaries
tokenizer (PreTrainedTokenizerBase): if mode is CONCAT_TOKENS, the tokenizer to use
data_subset (str): Referred to as "name" in HuggingFace datasets.load_dataset.
Typically "all" (The Pile) or "en" (c4).
Typically "all" (The Pile) or "en" (allenai/c4).
Returns:
An IterableDataset.
Expand Down
9 changes: 5 additions & 4 deletions tests/a_scripts/eval/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,16 +157,17 @@ def test_loader_eval(
print(inmemorylogger.data.keys())

# Checks for first eval dataloader
assert 'metrics/eval/c4/LanguageCrossEntropy' in inmemorylogger.data.keys()
assert 'metrics/eval/allenai/c4/LanguageCrossEntropy' in inmemorylogger.data.keys(
)
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'],
list,
)
assert len(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
) > 0
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
tuple,
)

Expand Down
18 changes: 10 additions & 8 deletions tests/a_scripts/train/test_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,16 +154,17 @@ def test_train_multi_eval(tmp_path: pathlib.Path):
assert isinstance(inmemorylogger, InMemoryLogger)

# Checks for first eval dataloader
assert 'metrics/eval/c4/LanguageCrossEntropy' in inmemorylogger.data.keys()
assert 'metrics/eval/allenai/c4/LanguageCrossEntropy' in inmemorylogger.data.keys(
)
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'],
list,
)
assert len(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
) > 0
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
tuple,
)

Expand Down Expand Up @@ -226,15 +227,16 @@ def test_eval_metrics_with_no_train_metrics(tmp_path: pathlib.Path):
0] # pyright: ignore [reportGeneralTypeIssues]
assert isinstance(inmemorylogger, InMemoryLogger)

assert 'metrics/eval/c4/LanguageCrossEntropy' in inmemorylogger.data.keys()
assert 'metrics/eval/allenai/c4/LanguageCrossEntropy' in inmemorylogger.data.keys(
)
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'],
list,
)
assert len(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
) > 0
assert isinstance(
inmemorylogger.data['metrics/eval/c4/LanguageCrossEntropy'][-1],
inmemorylogger.data['metrics/eval/allenai/c4/LanguageCrossEntropy'][-1],
tuple,
)

0 comments on commit 50d5610

Please sign in to comment.