use v1.0.0 tag for nm-actions

neuralmagic · Jul 8, 2024 · 87d5b1c · 87d5b1c · github-actions · Jul 8, 2024
1 parent 537957c
commit 87d5b1c
Show file tree

Hide file tree

Showing 2 changed files with 3 additions and 6 deletions.
diff --git a/.github/workflows/nm-build.yml b/.github/workflows/nm-build.yml
@@ -108,7 +108,7 @@ jobs:
 
             - name: set python
               id: set_python
-              uses: neuralmagic/nm-actions/actions/set-python@main
+              uses: neuralmagic/nm-actions/actions/set-python@v1.0.0
               with:
                 python: ${{ inputs.python }}
                 venv: ${{ env.VENV_BASE }}

diff --git a/.github/workflows/nm-test.yml b/.github/workflows/nm-test.yml
@@ -72,10 +72,7 @@ jobs:
                 python-version: ${{ inputs.python }}
 
             - name: install automation components
-              run: |
-                sudo apt-get update --fix-missing
-                sudo apt-get install -y git-all
-                sudo apt-get install -y curl
+              uses: neuralmagic/nm-actions/actions/[email protected]
 
             - name: checkout
               id: checkout
@@ -94,7 +91,7 @@ jobs:
                 nvcc_threads: 0
 
             - name: install testmo
-              uses: neuralmagic/nm-actions/actions/install-testmo@main
+              uses: neuralmagic/nm-actions/actions/install-testmo@v1.0.0
 
             - name: create testmo run
               id: create_testmo_run
Benchmark suite	Current: `87d5b1c`	Previous: `f6f2554`	Ratio
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`24.499484919998622` ms	`24.363126653334653` ms	`1.01`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - facebook/opt-350m\nmax-model-len - 2048\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`6.062676599480929` ms	`6.047910136160147` ms	`1.00`
`{"name": "mean_ttft_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`188.70118089333272` ms	`184.3794278033306` ms	`1.02`
`{"name": "mean_tpot_ms", "description": "VLLM Serving - Dense\nmodel - meta-llama/Meta-Llama-3-8B-Instruct\nmax-model-len - 4096\nsparsity - None\nbenchmark_serving {\n \"nr-qps-pair_\": \"300,1\",\n \"dataset\": \"sharegpt\"\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"}`	`84.71225885242751` ms	`85.039470859161` ms	`1.00`