Skip to content

Commit

Permalink
[tests] make TestDeepSpeedModelZoo device-agnostic (huggingface#31402)
Browse files Browse the repository at this point in the history
* fix

* use accelerator device count

* ci fix
  • Loading branch information
faaany authored Jun 17, 2024
1 parent 7977f20 commit 9454f43
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 4 deletions.
4 changes: 4 additions & 0 deletions src/transformers/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2432,6 +2432,10 @@ def _device_agnostic_dispatch(device: str, dispatch_table: Dict[str, Callable],
BACKEND_MANUAL_SEED = {"cuda": torch.cuda.manual_seed, "cpu": torch.manual_seed, "default": torch.manual_seed}
BACKEND_EMPTY_CACHE = {"cuda": torch.cuda.empty_cache, "cpu": None, "default": None}
BACKEND_DEVICE_COUNT = {"cuda": torch.cuda.device_count, "cpu": lambda: 0, "default": lambda: 1}
else:
BACKEND_MANUAL_SEED = {"default": None}
BACKEND_EMPTY_CACHE = {"default": None}
BACKEND_DEVICE_COUNT = {"default": lambda: 0}


def backend_manual_seed(device: str, seed: int):
Expand Down
9 changes: 5 additions & 4 deletions tests/deepspeed/test_model_zoo.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,13 @@
from transformers import is_torch_available
from transformers.testing_utils import (
TestCasePlus,
backend_device_count,
execute_subprocess_async,
get_gpu_count,
get_tests_dir,
require_deepspeed,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed

Expand Down Expand Up @@ -143,7 +144,7 @@ def get_launcher(distributed=False):
# - it won't be able to handle that
# 2. for now testing with just 2 gpus max (since some quality tests may give different
# results with mode gpus because we use very little data)
num_gpus = min(2, get_gpu_count()) if distributed else 1
num_gpus = min(2, backend_device_count(torch_device)) if distributed else 1
master_port = os.environ.get("DS_TEST_PORT", DEFAULT_MASTER_PORT)
return f"deepspeed --num_nodes 1 --num_gpus {num_gpus} --master_port {master_port}".split()

Expand Down Expand Up @@ -326,7 +327,7 @@ def parameterized_custom_name_func(func, param_num, param):

@slow
@require_deepspeed
@require_torch_gpu
@require_torch_accelerator
class TestDeepSpeedModelZoo(TestCasePlus):
"""This class is for testing via an external script - can do multiple gpus"""

Expand Down

0 comments on commit 9454f43

Please sign in to comment.