Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
skip lm head test
Browse files Browse the repository at this point in the history
  • Loading branch information
robertgshaw2-neuralmagic committed Jul 7, 2024
1 parent 7a34cb7 commit 5f01ec2
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/distributed/test_pipeline_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import ray

from tests.nm_utils.utils_skip import should_skip_test_group

from ..utils import VLLM_PATH, RemoteOpenAIServer

if should_skip_test_group(group_name="TEST_DISTRIBUTED"):
Expand Down
5 changes: 5 additions & 0 deletions tests/quantization/test_lm_head.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,17 @@
import pytest
import torch

from tests.nm_utils.utils_skip import should_skip_test_group
from vllm.model_executor.layers.linear import UnquantizedLinearMethod
from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod
from vllm.model_executor.layers.quantization.gptq_marlin import (
GPTQMarlinLinearMethod)
from vllm.model_executor.layers.quantization.marlin import MarlinLinearMethod

if should_skip_test_group(group_name="TEST_QUANTIZATION"):
pytest.skip("TEST_QUANTIZATION=DISABLE, skipping quantization test group",
allow_module_level=True)

PROMPT = "On the surface of Mars, we found"

MODELS_QUANT = [(
Expand Down

1 comment on commit 5f01ec2

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smaller_is_better

Benchmark suite Current: 5f01ec2 Previous: f6f2554 Ratio
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 23.90350956997281 ms 24.5676224299973 ms 0.97
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 6.150101551748251 ms 6.1184165169899085 ms 1.01
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 180.41770049998073 ms 187.93780359666093 ms 0.96
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 84.9106414551821 ms 85.69236929201485 ms 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.