From 4d84b625a2e3dbbae7168d356b6138580833f4a2 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 31 Jan 2024 09:31:56 +0200 Subject: [PATCH] Mark slow and benchmark tests as such (allows `-k "not benchmark"`) --- pytest.ini | 5 ++++- tests/test_functional.py | 6 ++++++ tests/test_generation.py | 1 + tests/test_optim.py | 2 ++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 9902b98fa..ac6d72e63 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,4 +7,7 @@ addopts = -rP log_cli = True log_cli_level = INFO -log_file = logs/pytest.log \ No newline at end of file +log_file = logs/pytest.log +markers = + benchmark: mark test as benchmark + slow: mark test as slow diff --git a/tests/test_functional.py b/tests/test_functional.py index 3cbf8dfd1..f4b8fca51 100644 --- a/tests/test_functional.py +++ b/tests/test_functional.py @@ -708,6 +708,7 @@ def test_igemmlt_half(dim1, dim2, dim3, dim4, dims): pytest.param(2, 512, 12 * 1024, 4 * 12 * 1024, id="batch=2, seq=512, model=12k, hidden=48k"), ], ) +@pytest.mark.benchmark def test_bench_8bit_training(batch, seq, model, hidden): formatB = F.get_special_format_str() A = torch.randn(batch, seq, model, device="cuda").half() @@ -1118,6 +1119,7 @@ def test_igemmlt_row_scale(dim1, dim4, inner): ], ) @pytest.mark.skip("Row scale has some bugs for ampere") +@pytest.mark.benchmark def test_row_scale_bench(dim1, dim4, inner): formatB = F.get_special_format_str() err1, err2, err3 = [], [], [] @@ -1274,6 +1276,7 @@ def test_spmm_coo(dim1, dim2, transposed_B): assert_all_approx_close(out1, out2, rtol=0.01, atol=3.0e-2, count=30) +@pytest.mark.benchmark def test_spmm_bench(): batch = 2 model = 1024 * 1 @@ -1581,6 +1584,7 @@ def test_spmm_coo_dequant(dim1, dim2, dtype): ("batch", "seq", "model", "hidden"), [pytest.param(1, 1, 6656, 4*6656, id="batch=1, seq=1, model=6656, hidden=26k")], ) +@pytest.mark.benchmark def test_bench_matmul(batch, seq, model, hidden): iters = 1000 formatB = F.get_special_format_str() @@ -2005,6 +2009,7 @@ def test_kbit_quantile_estimation(): assert err < 0.035 +@pytest.mark.benchmark def test_bench_dequantization(): a = torch.rand(1024, 1024, device='cuda').half() code =F.create_fp8_map(True, 3, 0, 4).cuda() @@ -2100,6 +2105,7 @@ def test_4bit_compressed_stats(quant_type): #@pytest.mark.parametrize("quant_type", ['fp4', 'nf4']) @pytest.mark.parametrize("quant_type", ['nf4']) +@pytest.mark.benchmark def test_bench_4bit_dequant(quant_type): blocksize = 256 a = torch.rand(1024*12*4, 1024*12, device='cuda').half() diff --git a/tests/test_generation.py b/tests/test_generation.py index 9e82cb7a4..9ed30cd2a 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -72,6 +72,7 @@ def model_and_tokenizer(request): @pytest.mark.parametrize("DQ", TRUE_FALSE, ids=id_formatter("dq")) @pytest.mark.parametrize("inference_kernel", TRUE_FALSE, ids=id_formatter("inference_kernel")) @pytest.mark.parametrize("dtype", [torch.float16], ids=describe_dtype) +@pytest.mark.slow def test_pi(requires_cuda, model_and_tokenizer, inference_kernel, DQ, dtype): fixture_config, model, tokenizer = model_and_tokenizer diff --git a/tests/test_optim.py b/tests/test_optim.py index 6e17c181d..e379c424a 100644 --- a/tests/test_optim.py +++ b/tests/test_optim.py @@ -469,6 +469,7 @@ def test_adam_percentile_clipping(dim1, dim2, gtype, optim_bits): @pytest.mark.parametrize("dim2", [4096], ids=id_formatter("dim2")) @pytest.mark.parametrize("gtype", [torch.float32, torch.float16], ids=describe_dtype) @pytest.mark.parametrize("optim_name", optimizer_names_benchmark, ids=id_formatter("opt")) +@pytest.mark.benchmark def test_benchmark_blockwise(dim1, dim2, gtype, optim_name): if dim1 == 1 and dim2 == 1: return @@ -498,6 +499,7 @@ def test_benchmark_blockwise(dim1, dim2, gtype, optim_name): @pytest.mark.parametrize("gtype", [torch.float16], ids=describe_dtype) @pytest.mark.parametrize("optim_name", ['paged_adamw'], ids=id_formatter("optim_name")) @pytest.mark.parametrize("mode", ['bnb'], ids=id_formatter("mode")) +@pytest.mark.benchmark def test_stream_optimizer_bench(dim1, gtype, optim_name, mode): layers1 = torch.nn.Sequential(*torch.nn.ModuleList([torch.nn.Linear(dim1, dim1) for i in range(10)])) layers1 = layers1.to(gtype)