Skip to content

Commit

Permalink
Update the build script to use vLLM 0.3.3 (#1637)
Browse files Browse the repository at this point in the history
  • Loading branch information
Qing Lan authored Mar 19, 2024
1 parent 39bc296 commit 286b7dd
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 26 deletions.
27 changes: 2 additions & 25 deletions .github/workflows/lmi-dist-deps-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,25 +60,6 @@ jobs:
cd flash-attention-v2
pip wheel . --no-deps
cp flash_attn-*.whl ../build_artifacts
- name: Build FlashAttn V1
run: |
. ./venv/bin/activate
git clone codecommit::us-east-1://flash-attention-v1
cd flash-attention-v1
pip wheel . --no-deps
cd csrc/layer_norm && pip wheel . --no-deps
cd ../rotary && pip wheel . --no-deps
cd ../../
cp flash_attn*.whl ../build_artifacts
cp csrc/layer_norm/*.whl ../build_artifacts
cp csrc/rotary/*.whl ../build_artifacts
- name: Build vllm 0.1.1
run: |
. ./venv/bin/activate
git clone codecommit::us-east-1://lmi_vllm
cd lmi_vllm
pip wheel . --no-deps
cp lmi_vllm-*.whl ../build_artifacts
- name: Build awq kernels
run: |
. ./venv/bin/activate
Expand All @@ -87,10 +68,10 @@ jobs:
cd llm-awq/awq/kernels && git checkout 8baf5dd9c3bfe8bdc5987f52ae4dffde7471346f
pip wheel . --no-deps
cp awq*.whl ../../../build_artifacts
- name: Build vllm 0.3.2 speculative decoding
- name: Build vllm 0.3.3 speculative decoding
run: |
. ./venv/bin/activate
git clone https://github.com/ymwangg/vllm -b specdec_v0.3.2
git clone https://github.com/ymwangg/vllm -b specdec_v0.3.3
cd vllm
export TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 8.9 9.0+PTX"
export VLLM_INSTALL_PUNICA_KERNELS=1
Expand Down Expand Up @@ -118,11 +99,7 @@ jobs:
name: build-artifacts
- name: upload to S3
run: |
aws s3 cp flash_attn_1*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt212/
aws s3 cp flash_attn-2*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt212/
aws s3 cp dropout_layer_norm*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt212/
aws s3 cp rotary_emb*.whl s3://djl-ai-staging/publish/flash_attn/cu121-pt212/
aws s3 cp lmi_vllm*.whl s3://djl-ai-staging/publish/lmi_vllm/cu121-pt212/
aws s3 cp vllm*.whl s3://djl-ai-staging/publish/vllm/cu121-pt212/
aws s3 cp awq*.whl s3://djl-ai-staging/publish/awq/cu121-pt212/
Expand Down
2 changes: 1 addition & 1 deletion serving/docker/deepspeed.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ ARG datasets_version=2.17.1
ARG deepspeed_version=nightly
ARG deepspeed_wheel="https://publish.djl.ai/deepspeed/deepspeed-${deepspeed_version}-cp310-cp310-linux_x86_64.whl"
# LMI-Dist Deps
ARG vllm_wheel="https://publish.djl.ai/vllm/cu121-pt212/vllm-0.3.2-cp310-cp310-linux_x86_64.whl"
ARG vllm_wheel="https://publish.djl.ai/vllm/cu121-pt212/vllm-0.3.3-cp310-cp310-linux_x86_64.whl"
ARG flash_attn_wheel="https://publish.djl.ai/flash_attn/flash_attn_1-1.0.9-cp310-cp310-linux_x86_64.whl"
ARG dropout_layer_norm_wheel="https://publish.djl.ai/flash_attn/dropout_layer_norm-0.1-cp310-cp310-linux_x86_64.whl"
ARG rotary_emb_wheel="https://publish.djl.ai/flash_attn/rotary_emb-0.1-cp310-cp310-linux_x86_64.whl"
Expand Down

0 comments on commit 286b7dd

Please sign in to comment.