Skip to content

Commit

Permalink
Add test
Browse files Browse the repository at this point in the history
  • Loading branch information
irenedea committed Oct 22, 2024
1 parent 7898e8c commit 2adfa9b
Showing 1 changed file with 19 additions and 1 deletion.
20 changes: 19 additions & 1 deletion tests/data/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,33 @@
# Copyright 2022 MosaicML LLM Foundry authors
# SPDX-License-Identifier: Apache-2.0
import os
from contextlib import nullcontext
from typing import Optional
from unittest import mock

import pytest

from llmfoundry.data.finetuning.tasks import dataset_constructor
from llmfoundry.data.finetuning.tasks import (
_get_num_processes,
dataset_constructor,
)
from llmfoundry.utils.exceptions import DatasetTooSmallError


def test_get_num_processes():
with mock.patch.dict(os.environ, {'MAX_NUM_PROC': '4'}):
with mock.patch('os.cpu_count', return_value=16):
assert _get_num_processes() == 4

with mock.patch.dict(os.environ, {'MAX_NUM_PROC': '32'}):
with mock.patch('os.cpu_count', return_value=16):
assert _get_num_processes() == 8

with mock.patch.dict(os.environ, {}):
with mock.patch('os.cpu_count', return_value=16):
assert _get_num_processes() == 8


@pytest.mark.parametrize('num_canonical_nodes', [None, 8, 2])
def test_finetuning_streaming_dataset_too_small(
num_canonical_nodes: Optional[int],
Expand Down

0 comments on commit 2adfa9b

Please sign in to comment.