Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
format
Browse files Browse the repository at this point in the history
  • Loading branch information
ElizaWszola committed Jul 4, 2024
1 parent a0d7f77 commit f629593
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 1 deletion.
2 changes: 2 additions & 0 deletions tests/kernels/test_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def torch_moe(a, w1, w2, score, topk):
return (out.view(B, -1, w2.shape[1]) *
topk_weight.view(B, -1, 1).to(out.dtype)).sum(dim=1)


def torch_moe_single(a, w, score, topk):
B, D = a.shape
a = a.view(B, -1, D).repeat(1, topk, 1).reshape(-1, D)
Expand All @@ -51,6 +52,7 @@ def torch_moe_single(a, w, score, topk):
out[mask] = a[mask] @ w[i].transpose(0, 1)
return (out.view(B, -1, w.shape[1])).sum(dim=1)


@pytest.mark.parametrize("m", [1024 * 128, 512, 222, 33, 1])
@pytest.mark.parametrize("n", [2048, 256, 1024])
@pytest.mark.parametrize("k", [128, 511, 1024])
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/fused_moe/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from vllm.model_executor.layers.fused_moe.fused_moe import (
fused_experts, fused_marlin_moe, fused_moe, fused_topk,
get_config_file_name, single_marlin_moe, grouped_topk)
get_config_file_name, grouped_topk, single_marlin_moe)

__all__ = [
"fused_moe",
Expand Down
2 changes: 2 additions & 0 deletions vllm/model_executor/layers/fused_moe/fused_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@ def fused_topk(
topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)
return topk_weights, topk_ids


# This is used by the Deepseek-V2 model
def grouped_topk(
hidden_states: torch.Tensor,
Expand Down Expand Up @@ -412,6 +413,7 @@ def get_expert_config(w1: torch.Tensor, w2: torch.Tensor, topk: int, M: int,
return get_default_config(M, E, N, w1.shape[2], topk,
"float8" if use_fp8 else None)


def fused_experts(hidden_states: torch.Tensor,
w1: torch.Tensor,
w2: torch.Tensor,
Expand Down

1 comment on commit f629593

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

smaller_is_better

Benchmark suite Current: f629593 Previous: f6f2554 Ratio
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 183.48948831311037 ms 187.90222024999898 ms 0.98
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 84.226856744941 ms 83.68341362953504 ms 1.01
{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 25.00361027327017 ms 24.958321440002084 ms 1.00
{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 6.221973479174864 ms 6.288577137086927 ms 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.