Skip to content
This repository has been archived by the owner on Oct 11, 2024. It is now read-only.

Commit

Permalink
remove strip part
Browse files Browse the repository at this point in the history
  • Loading branch information
dhuangnm authored and dhuangnm committed Jun 26, 2024
1 parent ddacf2c commit b79c0ac
Showing 1 changed file with 1 addition and 14 deletions.
15 changes: 1 addition & 14 deletions .github/actions/nm-build-vllm/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,14 @@ runs:
SUCCESS=0
python setup.py bdist_wheel --dist-dir=dist --py-limited-api=cp38 || SUCCESS=$?
echo "whl_status=${SUCCESS}" >> "$GITHUB_OUTPUT"
ls -alh dist
WHL_FILEPATH=$(find dist -type f -iname "*linux_x86_64.whl")
echo "whl: ${WHL_FILEPATH}"
# strip wheel
if [ ! $(command -v strip) ]; then
sudo apt install -y binutils
fi
if [ ! $(command -v file) ]; then
sudo apt install -y file
fi
python -m wheel unpack ${WHL_FILEPATH} -d stripped-package
find stripped-package/ -type f -name '*.so' -exec strip {} \;
find stripped-package/ -type f -name '*.so' -exec file {} \;
STRIPPED_PACKAGE=`ls stripped-package/`
rm -rf ${WHL_FILEPATH}
python -m wheel pack stripped-package/${STRIPPED_PACKAGE} -d dist/
RENAME=$(echo ${WHL_FILEPATH} | sed -e 's/linux_x86_64/manylinux_2_17_x86_64/')
echo "rename: ${RENAME}"
mv ${WHL_FILEPATH} ${RENAME}
WHL=$(basename ${RENAME})
echo "whl=${WHL}" >> "$GITHUB_OUTPUT"
ls -alh dist
if [ ${SUCCESS} -ne 0 ]; then
exit 1
fi
Expand Down

2 comments on commit b79c0ac

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: b79c0ac Previous: 93b1bb9 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4692018525090735 prompts/s 2.4872798902675823 prompts/s 1.01
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 948.1735113634842 tokens/s 955.1154778627516 tokens/s 1.01

This comment was automatically generated by workflow using github-action-benchmark.

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bigger_is_better

Benchmark suite Current: b79c0ac Previous: 93b1bb9 Ratio
{"name": "request_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 2.4872332276127915 prompts/s 2.4872798902675823 prompts/s 1.00
{"name": "token_throughput", "description": "VLLM Engine throughput - synthetic\nmodel - NousResearch/Llama-2-7b-chat-hf\nmax_model_len - 4096\nbenchmark_throughput {\n \"use-all-available-gpus_\": \"\",\n \"input-len\": 256,\n \"output-len\": 128,\n \"num-prompts\": 1000\n}", "gpu_description": "NVIDIA L4 x 1", "vllm_version": "0.5.1", "python_version": "3.10.12 (main, Jun 7 2023, 13:43:11) [GCC 11.3.0]", "torch_version": "2.3.0+cu121"} 955.097559403312 tokens/s 955.1154778627516 tokens/s 1.00

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.