Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tests: improve CUDA support detection #985

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import pytest
import torch


def pytest_runtest_call(item):
try:
item.runtest()
except AssertionError as ae:
if str(ae) == "Torch not compiled with CUDA enabled":
pytest.skip("Torch not compiled with CUDA enabled")
raise


@pytest.fixture(scope="session")
def requires_cuda() -> bool:
cuda_available = torch.cuda.is_available()
if not cuda_available:
pytest.skip("CUDA is required")
return cuda_available
4 changes: 0 additions & 4 deletions tests/test_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
ids=names,
)
def test_matmul(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
if dim2 > 0:
dim2 = dim2 - (dim2 % 16)
dim3 = dim3 - (dim3 % 16)
Expand Down Expand Up @@ -307,7 +306,6 @@ def test_matmullt(
has_fp16_weights,
has_bias
):
if not torch.cuda.is_available(): pytest.skip('No GPU found.')
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
dimB = (dim3, dim4) if not transpose[1] else (dim4, dim3)
outlier_dim = torch.randint(0, dimA[1], size=(dimA[1] // 8,), device="cuda")
Expand Down Expand Up @@ -461,7 +459,6 @@ def test_matmullt(
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose, has_bias, compress_statistics, quant_type))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}_has_bias_{}_compress_statistics_{}_quant_type_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type", values, ids=names)
def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose, has_bias, compress_statistics, quant_type):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
Expand Down Expand Up @@ -551,7 +548,6 @@ def test_matmul_4bit( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose,
values = list(product(dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose))
str_values = list(product(dim1, dim2, dim3, dim4, str_funcs, dtype, req_grad_str, str_transpose))
names = ["dim1_{}_dim2_{}_dim3_{}_dim4_{}_func_{}_dtype_{}_requires_grad_{}_transpose_{}".format(*vals) for vals in str_values]
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize( "dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose", values, ids=names)
def test_matmul_fp8( dim1, dim2, dim3, dim4, funcs, dtype, req_grad, transpose):
dimA = (dim2, dim3) if not transpose[0] else (dim3, dim2)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_cuda_setup_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@

# hardcoded test. Not good, but a sanity check for now
# TODO: improve this
def test_manual_override():
def test_manual_override(requires_cuda):
manual_cuda_path = str(Path('/mmfs1/home/dettmers/data/local/cuda-12.2'))

pytorch_version = torch.version.cuda.replace('.', '')

assert pytorch_version != 122
assert pytorch_version != 122 # TODO: this will never be true...

os.environ['CUDA_HOME']='{manual_cuda_path}'
os.environ['BNB_CUDA_VERSION']='122'
Expand Down
7 changes: 4 additions & 3 deletions tests/test_functional.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,10 @@ def test_nvidia_transform(dim1, dim2, dim3, dims, dtype, orderA, orderOut, trans
return
if dtype == torch.int32 and out_order != "col32":
return
func = F.get_transform_func(dtype, orderA, orderOut, transpose)
try:
func = F.get_transform_func(dtype, orderA, orderOut, transpose)
except ValueError as ve:
pytest.skip(str(ve)) # skip if not supported

if dims == 2:
A = torch.randint(-128, 127, size=(dim1, dim2), device="cuda").to(dtype)
Expand Down Expand Up @@ -2278,7 +2281,6 @@ def test_fp4_quant(dtype):
assert relerr.item() < 0.28


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
def test_4bit_compressed_stats(quant_type):
for blocksize in [128, 64]:
Expand Down Expand Up @@ -2317,7 +2319,6 @@ def test_4bit_compressed_stats(quant_type):



@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
#@pytest.mark.parametrize("quant_type", ['fp4', 'nf4'])
@pytest.mark.parametrize("quant_type", ['nf4'])
def test_bench_4bit_dequant(quant_type):
Expand Down
2 changes: 1 addition & 1 deletion tests/test_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def model_and_tokenizer(request):
@pytest.mark.parametrize("DQ", [True, False], ids=['DQ_True', 'DQ_False'])
@pytest.mark.parametrize("inference_kernel", [True, False], ids=['inference_kernel_True', 'inference_kernel_False'])
#@pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32], ids=['fp16', 'bf16', 'fp32'])
def test_pi(model_and_tokenizer, inference_kernel, DQ):
def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ):
print('')
dtype = torch.float16

Expand Down
1 change: 0 additions & 1 deletion tests/test_linear4bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
'float32': torch.float32
}

@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize(
"quant_type, compress_statistics, bias, quant_storage",
list(product(["nf4", "fp4"], [False, True], [False, True], ['uint8', 'float16', 'bfloat16', 'float32'])),
Expand Down
2 changes: 0 additions & 2 deletions tests/test_linear8bitlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def test_layout_exact_match():
assert torch.all(torch.eq(restored_x, x))


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
def test_linear_no_igemmlt():
linear = torch.nn.Linear(1024, 3072)
x = torch.randn(3, 1024, dtype=torch.half)
Expand Down Expand Up @@ -68,7 +67,6 @@ def test_linear_no_igemmlt():
assert linear_custom.state.CxB is None


@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt",
list(product([False, True], [False, True], [False, True], [False, True])))
def test_linear_serialization(has_fp16_weights, serialize_before_forward, deserialize_before_cuda, force_no_igemmlt):
Expand Down
1 change: 0 additions & 1 deletion tests/test_modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,7 +520,6 @@ def test_linear_kbit_fp32_bias(module):
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.float16))
modules.append(lambda d1, d2: bnb.nn.LinearFP4(d1, d2, compute_dtype=torch.bfloat16))
names = ['Int8Lt', '4bit', 'FP4', 'NF4', 'FP4+C', 'NF4+C', 'NF4+fp32', 'NF4+fp16', 'NF4+bf16']
@pytest.mark.skipif(not torch.cuda.is_available(), reason="this test requires a GPU")
@pytest.mark.parametrize("module", modules, ids=names)
def test_kbit_backprop(module):
b = 17
Expand Down
Loading