From 235c563c0f1648e9a6d048a15146b9b34bd106b2 Mon Sep 17 00:00:00 2001
From: Sebastian Hoffmann <shoffmann.git@gmail.com>
Date: Thu, 28 Mar 2024 17:19:16 +0100
Subject: [PATCH] fix: use gpu/nccl even when running without slurm

---
 dmlcloud/util/distributed.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dmlcloud/util/distributed.py b/dmlcloud/util/distributed.py
index b76d211..4db2e65 100644
--- a/dmlcloud/util/distributed.py
+++ b/dmlcloud/util/distributed.py
@@ -79,14 +79,17 @@ def print_worker(msg, barrier=True, flush=True):
         dist.barrier()
 
 
-def init_process_group_dummy():
+def init_process_group_dummy(**kwargs):
     """
     Initializes the process group with a single process.
     Uses HashStore under the hood. Useful for applications that
     only run on a single gpu.
     """
+    backend = kwargs.get('backend', None)
+    if backend is None:
+        backend = 'cpu:gloo,cuda:nccl' if dist.is_nccl_available() else 'gloo'
     store = dist.HashStore()
-    dist.init_process_group(store=store, rank=0, world_size=1, backend='gloo')
+    dist.init_process_group(store=store, rank=0, world_size=1, backend=backend, **kwargs)
 
 
 def init_process_group_MPI(ip_idx=0, port=None, **kwargs):