From 498e756456a4548863537a276a4291d97c4b3414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=81=82=E9=9D=96=E5=85=A5?= Date: Wed, 25 Oct 2023 03:46:35 +0000 Subject: [PATCH] check port availability only in main launcher for deepspeed/torchrun --- src/accelerate/utils/launch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/accelerate/utils/launch.py b/src/accelerate/utils/launch.py index cd4a9eb2188..9abed25defa 100644 --- a/src/accelerate/utils/launch.py +++ b/src/accelerate/utils/launch.py @@ -128,7 +128,8 @@ def prepare_multi_gpu_env(args: argparse.Namespace) -> Dict[str, str]: if main_process_port is None: main_process_port = 29500 - if is_port_in_use(main_process_port): + need_port_check = num_machines <= 1 or int(args.machine_rank) == 0 + if need_port_check and is_port_in_use(main_process_port): raise ConnectionError( f"Tried to launch distributed communication on port `{main_process_port}`, but another process is utilizing it. " "Please specify a different port (such as using the `----main_process_port` flag or specifying a different `main_process_port` in your config file)"