huggingface · muellerzr · Nov 17, 2023 · Nov 14, 2023 · Nov 14, 2023 · Nov 14, 2023
diff --git a/src/accelerate/launchers.py b/src/accelerate/launchers.py
@@ -19,7 +19,7 @@
 import torch
 
 from .state import AcceleratorState, PartialState
-from .utils import PrecisionType, PrepareForLaunch, is_mps_available, patch_environment
+from .utils import PrecisionType, PrepareForLaunch, are_libraries_initialized, is_mps_available, patch_environment
 
 
 def test_launch():
@@ -142,6 +142,17 @@ def train(*args):
                     "inside your training function. Restart your notebook and make sure no cells initializes an "
                     "`Accelerator`."
                 )
+            # Check for specific libraries known to initialize CUDA that users constantly use
+            problematic_imports = are_libraries_initialized("bitsandbytes")
+            if len(problematic_imports) > 1:
+                err = (
+                    "Could not start distributed process. Libraries known to initialize CUDA upon import have been "
+                    "imported already. Please keep these imports inside your training function to try and help with this:"
+                )
+                for lib_name in problematic_imports:
+                    err += f"\n\t* `{lib_name}`"
+                raise RuntimeError(err)
+
             # torch.distributed will expect a few environment variable to be here. We set the ones common to each
             # process here (the other ones will be set be the launcher).
             with patch_environment(

diff --git a/src/accelerate/test_utils/scripts/test_notebook.py b/src/accelerate/test_utils/scripts/test_notebook.py
@@ -1,17 +1,34 @@
 # Test file to ensure that in general certain situational setups for notebooks work.
-import argparse
+import os
+
+from pytest import raises
 
 from accelerate import PartialState, notebook_launcher
+from accelerate.test_utils import require_bnb
+from accelerate.utils import is_bnb_available
+
 
+def basic_function():
+    # Just prints the PartialState
+    print(f"PartialState:\n{PartialState()}")
 
-parser = argparse.ArgumentParser()
-parser.add_argument("--num_processes", type=int, default=1)
-args = parser.parse_args()
 
+NUM_PROCESSES = os.environ.get("ACCELERATE_NUM_PROCESSES", 1)
 
-def function():
-    print(f"PartialState:\n{PartialState()}")
+
+def test_can_initialize():
+    notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
+
+
+@require_bnb
+def test_problematic_imports():
+    with raises(AssertionError, match="Please keep these imports"):
+        notebook_launcher(basic_function, (), num_processes=NUM_PROCESSES)
 
 
-if __name__ == "__main__":
-    notebook_launcher(function, num_processes=int(args.num_processes))
+def main():
+    print("Test basic notebook can be ran")
+    test_can_initialize()
+    if is_bnb_available():
+        print("Test problematic imports (bnb)")
+        test_problematic_imports()
diff --git a/src/accelerate/test_utils/testing.py b/src/accelerate/test_utils/testing.py
@@ -423,13 +423,15 @@ class SubprocessCallException(Exception):
     pass
 
 
-def run_command(command: List[str], return_stdout=False):
+def run_command(command: List[str], return_stdout=False, env=None):
     """
     Runs `command` with `subprocess.check_output` and will potentially return the `stdout`. Will also properly capture
     if an error occured while running `command`
     """
+    if env is None:
+        env = os.environ.copy()
     try:
-        output = subprocess.check_output(command, stderr=subprocess.STDOUT)
+        output = subprocess.check_output(command, stderr=subprocess.STDOUT, env=env)
         if return_stdout:
             if hasattr(output, "decode"):
                 output = output.decode("utf-8")

diff --git a/src/accelerate/utils/__init__.py b/src/accelerate/utils/__init__.py
@@ -37,7 +37,13 @@
     TensorInformation,
     TorchDynamoPlugin,
 )
-from .environment import get_int_from_env, parse_choice_from_env, parse_flag_from_env, str_to_bool
+from .environment import (
+    are_libraries_initialized,
+    get_int_from_env,
+    parse_choice_from_env,
+    parse_flag_from_env,
+    str_to_bool,
+)
 from .imports import (
     get_ccl_version,
     is_4bit_bnb_available,

diff --git a/src/accelerate/utils/environment.py b/src/accelerate/utils/environment.py
@@ -13,6 +13,8 @@
 # limitations under the License.
 
 import os
+import sys
+from typing import Dict
 
 
 def str_to_bool(value) -> int:
@@ -48,3 +50,10 @@ def parse_flag_from_env(key, default=False):
 def parse_choice_from_env(key, default="no"):
     value = os.environ.get(key, str(default))
     return value
+
+
+def are_libraries_initialized(*library_names: list[str]) -> Dict[str, bool]:
+    """
+    Checks if any of `library_names` are imported in the environment. Will return results as a `key:bool` pair.
+    """
+    return [lib_name for lib_name in library_names if lib_name in sys.modules]
diff --git a/tests/test_multigpu.py b/tests/test_multigpu.py
@@ -21,7 +21,8 @@
 import accelerate
 from accelerate import Accelerator
 from accelerate.big_modeling import dispatch_model
-from accelerate.test_utils import assert_exception, execute_subprocess_async, require_multi_gpu, skip
+from accelerate.test_utils import assert_exception, execute_subprocess_async, require_multi_gpu
+from accelerate.test_utils.testing import run_command
 from accelerate.utils import patch_environment
 
 
@@ -33,6 +34,9 @@ def setUp(self):
             mod_file.split(os.path.sep)[:-1] + ["scripts", "test_distributed_data_loop.py"]
         )
         self.operation_file_path = os.path.sep.join(mod_file.split(os.path.sep)[:-1] + ["scripts", "test_ops.py"])
+        self.notebook_launcher_path = os.path.sep.join(
+            mod_file.split(os.path.sep)[:-1] + ["scripts", "test_notebook.py"]
+        )
 
     @require_multi_gpu
     def test_multi_gpu(self):
@@ -66,23 +70,16 @@ def test_distributed_data_loop(self):
         with patch_environment(omp_num_threads=1, cuda_visible_devices="0,1"):
             execute_subprocess_async(cmd, env=os.environ.copy())
 
-    # Need to see why this test raises forking issues when ran as a suite
-    @skip
     @require_multi_gpu
     def test_notebook_launcher(self):
         """
-        This test checks that the `notebook_launcher` will be able to intialize
-        a `PartialState` without issue
+        This test checks a variety of situations and scenarios
+        with the `notebook_launcher`
         """
-        cmd = [
-            "python",
-            "-m",
-            "accelerate.test_utils.scripts.test_notebook",
-            "--num_processes",
-            str(torch.cuda.device_count()),
-        ]
+        cmd = ["torchrun", f"--nproc_per_node={torch.cuda.device_count()}", self.notebook_launcher_path]
+        print(f"Running {cmd}")
         with patch_environment(omp_num_threads=1):
-            execute_subprocess_async(cmd, env=os.environ.copy())
+            run_command(cmd, env=os.environ.copy())
 
 
 if __name__ == "__main__":