bitsandbytes-foundation · Titus-von-Koeller · Jan 24, 2024 · Jan 24, 2024
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,19 @@
+import pytest
+import torch
+
+
+def pytest_runtest_call(item):
+    try:
+        item.runtest()
+    except AssertionError as ae:
+        if str(ae) == "Torch not compiled with CUDA enabled":
+            pytest.skip("Torch not compiled with CUDA enabled")
+        raise
+
+
+@pytest.fixture(scope="session")
+def requires_cuda() -> bool:
+    cuda_available = torch.cuda.is_available()
+    if not cuda_available:
+        pytest.skip("CUDA is required")
+    return cuda_available
diff --git a/tests/test_autograd.py b/tests/test_autograd.py
@@ -40,7 +40,6 @@
     ids=names,
 )
 def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
-    if not torch.cuda.is_available(): pytest.skip('No GPU found.')
     if dim2 > 0:
         dim2 = dim2 - (dim2 % 16)
     dim3 = dim3 - (dim3 % 16)
@@ -307,7 +306,6 @@ def test_matmullt(
     has_fp16_weights,
     has_bias
 ):
-    if not torch.cuda.is_available(): pytest.skip('No GPU found.')
     dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
     dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3)
     outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda")
@@ -461,7 +459,6 @@ def test_matmullt(
 values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type))
 str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type))
 names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values]
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names)
 def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type):
     dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
@@ -551,7 +548,6 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose,
 values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose))
 str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose))
 names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values]
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names)
 def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
     dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)

diff --git a/tests/test_cuda_setup_evaluator.py b/tests/test_cuda_setup_evaluator.py
@@ -5,12 +5,12 @@
 
 # hardcoded test. Not good, but a sanity check for now
 # TODO: improve this
-def test_manual_override():
+def test_manual_override(requires_cuda):
     manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2'))
 
     pytorch_version = torch.version.cuda.replace('.', '')
 
-    assert pytorch_version != 122
+    assert pytorch_version != 122  # TODO: this will never be true...
 
     os.environ['CUDA_HOME']='{manual_cuda_path}'
     os.environ['BNB_CUDA_VERSION']='122'

diff --git a/tests/test_functional.py b/tests/test_functional.py
@@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
         return
     if dtype == torch.int32 and out_order != "col32":
         return
-    func = F.get_transform_func(dtype, orderA, orderOut, transpose)
+    try:
+        func = F.get_transform_func(dtype, orderA, orderOut, transpose)
+    except ValueError as ve:
+        pytest.skip(str(ve))  # skip if not supported
 
     if dims == 2:
         A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype)
@@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype):
     assert relerr.item() < 0.28
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
 def test_4bit_compressed_stats(quant_type):
     for blocksize in [128, 64]:
@@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type):
 
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 #@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
 @pytest.mark.parametrize("quant_type", ['nf4'])
 def test_bench_4bit_dequant(quant_type):

diff --git a/tests/test_generation.py b/tests/test_generation.py
@@ -79,7 +79,7 @@ def model_and_tokenizer(request):
 @pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False'])
 @pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False'])
 #@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32'])
-def test_pi(model_and_tokenizer, inference_kernel, DQ):
+def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ):
     print('')
     dtype = torch.float16
 

diff --git a/tests/test_linear4bit.py b/tests/test_linear4bit.py
@@ -15,7 +15,6 @@
     'float32': torch.float32
 }
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize(
     "quant_type, compress_statistics, bias, quant_storage",
     list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])),

diff --git a/tests/test_linear8bitlt.py b/tests/test_linear8bitlt.py
@@ -33,7 +33,6 @@ def test_layout_exact_match():
         assert torch.all(torch.eq(restored_x, x))
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 def test_linear_no_igemmlt():
     linear = torch.nn.Linear(1024, 3072)
     x = torch.randn(3, 1024, dtype=torch.half)
@@ -68,7 +67,6 @@ def test_linear_no_igemmlt():
     assert linear_custom.state.CxB is None
 
 
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt",
                          list(product([False, True], [False, True], [False, True], [False, True])))
 def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt):

diff --git a/tests/test_modules.py b/tests/test_modules.py
@@ -520,7 +520,6 @@ def test_linear_kbit_fp32_bias(module):
 modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16))
 modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16))
 names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16']
-@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
 @pytest.mark.parametrize("module", modules, ids=names)
 def test_kbit_backprop(module):
     b = 17