Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Jul 1, 2024
1 parent 9a4be7f commit b4eec34
Show file tree
Hide file tree
Showing 6 changed files with 6 additions and 8 deletions.
1 change: 0 additions & 1 deletion tests/distributed/test_multimodal_broadcast.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
pytest.skip("TEST_DISTRIBUTED=DISABLE, skipping distributed test group",
allow_module_level=True)


model = os.environ["TEST_DIST_MODEL"]

if model.startswith("llava-hf/llava"):
Expand Down
4 changes: 2 additions & 2 deletions tests/distributed/test_parallel_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
import pytest
import torch

from tests.nm_utils.utils_skip import should_skip_test_group
from vllm.distributed.parallel_state import (_split_tensor_dict,
_update_nested_dict)

from tests.nm_utils.utils_skip import should_skip_test_group

if should_skip_test_group(group_name="TEST_DISTRIBUTED"):
pytest.skip("TEST_DISTRIBUTED=DISABLE, skipping distributed test group",
allow_module_level=True)


def test_split_tensor_dict():
test_dict = {
"key_a": "a",
Expand Down
3 changes: 1 addition & 2 deletions tests/models/test_compressed_tensors.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@

import pytest

from tests.quantization.utils import is_quant_method_supported

from tests.nm_utils.utils_skip import should_skip_test_group
from tests.quantization.utils import is_quant_method_supported

from .utils import check_logprobs_close

Expand Down
3 changes: 1 addition & 2 deletions tests/spec_decode/e2e/test_integration_dist_tp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,8 @@
import pytest
import torch

from vllm.utils import is_hip

from tests.nm_utils.utils_skip import should_skip_test_group
from vllm.utils import is_hip

from .conftest import run_greedy_equality_correctness_test

Expand Down
2 changes: 1 addition & 1 deletion tests/tokenization/test_get_eos.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@
from vllm.transformers_utils.config import try_get_generation_config
from vllm.transformers_utils.tokenizer import get_tokenizer


if should_skip_test_group(group_name="TEST_TOKENIZATION"):
pytest.skip("TEST_TOKENIZATION=DISABLE, skipping tokenization test group",
allow_module_level=True)


def test_get_llama3_eos_token():
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

Expand Down
1 change: 1 addition & 0 deletions tests/worker/test_model_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
pytest.skip("TEST_WORKER=DISABLE, skipping worker test group",
allow_module_level=True)


class MockAttentionBackend(AttentionBackend):

@staticmethod
Expand Down

1 comment on commit b4eec34

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smaller_is_better

Benchmark suite Current: b4eec34 Previous: 569c905 Ratio
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 183.7518851866662 ms 183.7486813564707 ms 1.00
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 83.3850945371153 ms 83.87263279896116 ms 0.99
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 23.610891176692046 ms 24.654848356343184 ms 0.96
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 5.957410871290109 ms 6.001352674302764 ms 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.