skip lm head test

neuralmagic · Jul 7, 2024 · 5f01ec2 · 5f01ec2 · github-actions · Jul 7, 2024
1 parent 7a34cb7
commit 5f01ec2
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 0 deletions.
diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py
@@ -7,6 +7,7 @@
 import ray
 
 from tests.nm_utils.utils_skip import should_skip_test_group
+
 from ..utils import VLLM_PATH, RemoteOpenAIServer
 
 if should_skip_test_group(group_name="TEST_DISTRIBUTED"):

diff --git a/tests/quantization/test_lm_head.py b/tests/quantization/test_lm_head.py
@@ -7,12 +7,17 @@
 import pytest
 import torch
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.model_executor.layers.linear import UnquantizedLinearMethod
 from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
 from vllm.model_executor.layers.quantization.gptq_marlin import (
     GPTQMarlinLinearMethod)
 from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod
 
+if should_skip_test_group(group_name="TEST_QUANTIZATION"):
+    pytest.skip("TEST_QUANTIZATION=DISABLE, skipping quantization test group",
+                allow_module_level=True)
+
 PROMPT = "On the surface of Mars, we found"
 
 MODELS_QUANT = [(
Benchmark suite	Current: `5f01ec2`	Previous: `f6f2554`	Ratio
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`23.90350956997281` ms	`24.5676224299973` ms	`0.97`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`6.150101551748251` ms	`6.1184165169899085` ms	`1.01`
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`180.41770049998073` ms	`187.93780359666093` ms	`0.96`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`84.9106414551821` ms	`85.69236929201485` ms	`0.99`