format

neuralmagic · Jul 1, 2024 · b4eec34 · b4eec34 · github-actions · Jul 1, 2024
1 parent 9a4be7f
commit b4eec34
Show file tree

Hide file tree

Showing 6 changed files with 6 additions and 8 deletions.
diff --git a/tests/distributed/test_multimodal_broadcast.py b/tests/distributed/test_multimodal_broadcast.py
@@ -22,7 +22,6 @@
     pytest.skip("TEST_DISTRIBUTED=DISABLE, skipping distributed test group",
                 allow_module_level=True)
 
-
 model = os.environ["TEST_DIST_MODEL"]
 
 if model.startswith("llava-hf/llava"):

diff --git a/tests/distributed/test_parallel_state.py b/tests/distributed/test_parallel_state.py
@@ -3,15 +3,15 @@
 import pytest
 import torch
 
+from tests.nm_utils.utils_skip import should_skip_test_group
 from vllm.distributed.parallel_state import (_split_tensor_dict,
                                              _update_nested_dict)
 
-from tests.nm_utils.utils_skip import should_skip_test_group
-
 if should_skip_test_group(group_name="TEST_DISTRIBUTED"):
     pytest.skip("TEST_DISTRIBUTED=DISABLE, skipping distributed test group",
                 allow_module_level=True)
 
+
 def test_split_tensor_dict():
     test_dict = {
         "key_a": "a",

diff --git a/tests/models/test_compressed_tensors.py b/tests/models/test_compressed_tensors.py
@@ -7,9 +7,8 @@
 
 import pytest
 
-from tests.quantization.utils import is_quant_method_supported
-
 from tests.nm_utils.utils_skip import should_skip_test_group
+from tests.quantization.utils import is_quant_method_supported
 
 from .utils import check_logprobs_close
 

diff --git a/tests/spec_decode/e2e/test_integration_dist_tp2.py b/tests/spec_decode/e2e/test_integration_dist_tp2.py
@@ -5,9 +5,8 @@
 import pytest
 import torch
 
-from vllm.utils import is_hip
-
 from tests.nm_utils.utils_skip import should_skip_test_group
+from vllm.utils import is_hip
 
 from .conftest import run_greedy_equality_correctness_test
 

diff --git a/tests/tokenization/test_get_eos.py b/tests/tokenization/test_get_eos.py
@@ -9,11 +9,11 @@
 from vllm.transformers_utils.config import try_get_generation_config
 from vllm.transformers_utils.tokenizer import get_tokenizer
 
-
 if should_skip_test_group(group_name="TEST_TOKENIZATION"):
     pytest.skip("TEST_TOKENIZATION=DISABLE, skipping tokenization test group",
                 allow_module_level=True)
 
+
 def test_get_llama3_eos_token():
     model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
 

diff --git a/tests/worker/test_model_input.py b/tests/worker/test_model_input.py
@@ -17,6 +17,7 @@
     pytest.skip("TEST_WORKER=DISABLE, skipping worker test group",
                 allow_module_level=True)
 
+
 class MockAttentionBackend(AttentionBackend):
 
     @staticmethod
Benchmark suite	Current: `b4eec34`	Previous: `569c905`	Ratio
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`183.7518851866662` ms	`183.7486813564707` ms	`1.00`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`83.3850945371153` ms	`83.87263279896116` ms	`0.99`
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`23.610891176692046` ms	`24.654848356343184` ms	`0.96`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`5.957410871290109` ms	`6.001352674302764` ms	`0.99`