Skip to content

Large model integration tests #1252

Large model integration tests

Large model integration tests #1252

name: Large model integration tests
on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL'
required: false
default: ''
run_test:
description: 'Run only the tests you need [ hf, trtllm, scheduler, lmi-dist, vllm, vllm-lora, lmi-dist-lora ]'
required: false
default: ''
schedule:
- cron: '0 15 * * *'
jobs:
create-runners:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new G6 instance
id: create_gpu
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6 instance
id: create_gpu2
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
- name: Create new G6 instance
id: create_gpu3
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_g6 $token djl-serving
outputs:
gpu_instance_id_1: ${{ steps.create_gpu.outputs.action_g6_instance_id }}
gpu_instance_id_2: ${{ steps.create_gpu2.outputs.action_g6_instance_id }}
gpu_instance_id_3: ${{ steps.create_gpu3.outputs.action_g6_instance_id }}
hf-handler-test:
if: contains(fromJson('["", "hf"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
strategy:
matrix:
arch: [ lmi ]
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh ${{ matrix.arch }} ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test gpt-neo
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py huggingface gpt-neo-2.7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py huggingface gpt-neo-2.7b
docker rm -f $(docker ps -aq)
- name: Test bloom-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py huggingface bloom-7b1
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface bloom-7b1
docker rm -f $(docker ps -aq)
- name: Test LLAMA-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py huggingface llama-2-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface llama-2-7b
docker rm -f $(docker ps -aq)
- name: Test GPTJ-6B
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py huggingface gpt-j-6b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface gpt-j-6b
docker rm -f $(docker ps -aq)
- name: Test gpt4all-lora
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py huggingface gpt4all-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface gpt4all-lora
docker rm -f $(docker ps -aq)
- name: Test streaming bigscience/bloom-3b
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=1,2" > docker_env
python3 llm/prepare.py huggingface bigscience/bloom-3b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface bigscience/bloom-3b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: Test streaming t5-large
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=1" > docker_env
python3 llm/prepare.py huggingface t5-large
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py huggingface t5-large
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: hf-handler-${{ matrix.arch }}-logs
path: tests/integration/logs/
trt-llm-handler-test:
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 120
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: llama2-13b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm llama2-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm llama2-13b
rm -rf docker_env
docker rm -f $(docker ps -aq)
# TODO (maybe): model is compiled for g5, needs recompile for g6
# - name: falcon-7b triton repo with tp=1
# working-directory: tests/integration
# run: |
# rm -rf models
# echo -en "CUDA_VISIBLE_DEVICES=0" > docker_env
# python3 llm/prepare.py trtllm falcon-7b
# ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
# serve
# python3 llm/client.py trtllm falcon-7b
# rm -rf docker_env
# docker rm -f $(docker ps -aq)
- name: internlm-7b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm internlm-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm internlm-7b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: baichuan2-13b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm baichuan2-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm baichuan2-13b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: chatglm3-6b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm chatglm3-6b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm chatglm3-6b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: GPT2 HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm gpt2
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm gpt2
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: SantaCoder HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm santacoder
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm santacoder
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: trtllm-handler-logs
path: tests/integration/logs/
trt-llm-handler-test-2:
if: contains(fromJson('["", "trtllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 120
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: llama2-7b HF model with tp=4 and smoothquant
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm llama2-7b-smoothquant
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm-sq \
serve
python3 llm/client.py trtllm llama2-7b-smoothquant
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: mistral-7b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm mistral-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm mistral-7b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: gpt-j-6b HF model with tp=1
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0" > docker_env
python3 llm/prepare.py trtllm gpt-j-6b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm gpt-j-6b
rm -rf docker_env
docker rm -f $(docker ps -aq)
- name: qwen-7b HF model with tp=4
working-directory: tests/integration
run: |
rm -rf models
echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
python3 llm/prepare.py trtllm qwen-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm qwen-7b
rm -rf docker_env
docker rm -f $(docker ps -aq)
# TODO (maybe): model is compiled for g5, needs recompile for g6
# - name: flan-t5-xxl pre-compiled model with python backend
# working-directory: tests/integration
# run: |
# rm -rf models
# echo -en "CUDA_VISIBLE_DEVICES=0,1,2,3" > docker_env
# python3 llm/prepare.py trtllm flan-t5-xxl
# ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
# serve
# python3 llm/client.py trtllm-python flan-t5-xxl
# rm -rf docker_env
# docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: trtllm-handler-quantization-logs
path: tests/integration/logs/
scheduler-single-gpu-test:
if: contains(fromJson('["", "scheduler"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install awscurl
working-directory: tests/integration
run: |
curl -OL https://github.com/frankfliu/junkyard/releases/download/v0.2.2/awscurl
chmod +x awscurl
mkdir outputs
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test gpt2
working-directory: tests/integration
run: |
# Correctness test
rm -rf models
python3 llm/prepare.py rolling_batch_scheduler gpt2
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 rb_client.py correctness gpt2
docker rm -f $(docker ps -aq)
- name: Test bloom-560m
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py rolling_batch_scheduler bloom-560m
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 rb_client.py scheduler_single_gpu bloom-560m
docker rm -f $(docker ps -aq)
- name: Print outputs
working-directory: tests/integration
run: for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
- name: Cleanup
working-directory: tests/integration
run: |
rm -rf outputs
rm awscurl
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
rm -rf outputs && rm -rf models
rm awscurl
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: rb-single-gpu-logs
path: tests/integration/logs/
scheduler-multi-gpu-test:
if: contains(fromJson('["", "scheduler"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install awscurl
working-directory: tests/integration
run: |
curl -OL https://github.com/frankfliu/junkyard/releases/download/v0.2.2/awscurl
chmod +x awscurl
mkdir outputs
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test gptj-6b
working-directory: tests/integration
run: |
# Concurrent requests test
rm -rf models
python3 llm/prepare.py rolling_batch_scheduler gpt-j-6b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 rb_client.py scheduler_multi_gpu gpt-j-6b
docker rm -f $(docker ps -aq)
- name: Print outputs
working-directory: tests/integration
run: for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
- name: Cleanup
working-directory: tests/integration
run: |
rm -rf models && rm -rf outputs
rm awscurl
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
for file in outputs/*; do if [ -f "$file" ]; then echo "Contents of $file:"; cat "$file"; echo; fi; done
rm -rf outputs && rm -rf models
rm awscurl
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: rb-multi-gpu-logs
path: tests/integration/logs/
lmi-dist-test-1:
if: contains(fromJson('["", "lmi-dist"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test gpt-neox-20b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gpt-neox-20b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gpt-neox-20b
docker rm -f $(docker ps -aq)
- name: Test falcon-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist falcon-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist falcon-7b
docker rm -f $(docker ps -aq)
- name: Test falcon2-11b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist falcon-11b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist falcon-11b
docker rm -f $(docker ps -aq)
- name: Test flan-t5-xxl
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist flan-t5-xxl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist flan-t5-xxl
docker rm -f $(docker ps -aq)
- name: Test gpt2
working-directory: tests/integration
run: |
rm -rf models
echo -en "SERVING_LOAD_MODELS=test::MPI=/opt/ml/model\nOPTION_MAX_ROLLING_BATCH_SIZE=2\nOPTION_OUTPUT_FORMATTER=jsonlines\nOPTION_TENSOR_PARALLEL_DEGREE=1\nOPTION_MODEL_ID=gpt2\nOPTION_TASK=text-generation\nOPTION_ROLLING_BATCH=lmi-dist" > docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG nocode lmi
python3 llm/client.py lmi_dist gpt2
docker rm -f $(docker ps -aq)
- name: Test mpt-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist mpt-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist mpt-7b
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: lmi-dist-logs-1
path: tests/integration/logs/
lmi-dist-test-2:
if: contains(fromJson('["", "lmi-dist"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2"
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test octocoder
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist octocoder
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist octocoder
docker rm -f $(docker ps -aq)
- name: Test speculative-llama-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist speculative-llama-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist speculative-llama-13b
docker rm -f $(docker ps -aq)
- name: Test starcoder2-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist starcoder2-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist starcoder2-7b
docker rm -f $(docker ps -aq)
- name: Test gemma-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist gemma-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist gemma-7b
docker rm -f $(docker ps -aq)
- name: Test llama2-13b-gptq
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama2-13b-gptq
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist llama2-13b-gptq
docker rm -f $(docker ps -aq)
- name: Test Mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist mistral-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist mistral-7b
docker rm -f $(docker ps -aq)
- name: Test llama2-7b-32k
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama2-7b-32k
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist llama2-7b-32k
docker rm -f $(docker ps -aq)
- name: Test mistral-7b-128k-awq
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist mistral-7b-128k-awq
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist mistral-7b-128k-awq
docker rm -f $(docker ps -aq)
- name: Test llama2-7b-chat
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama2-7b-chat
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py lmi_dist_chat llama2-7b-chat
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: lmi-dist-logs-2
path: tests/integration/logs/
vllm-test:
if: contains(fromJson('["", "vllm"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test llama2-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama2-13b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm llama2-13b
docker rm -f $(docker ps -aq)
- name: Test llama2-13b awq
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama2-13b-awq
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm llama2-13b
docker rm -f $(docker ps -aq)
- name: Test gpt-neox-20b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm gpt-neox-20b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm gpt-neox-20b
docker rm -f $(docker ps -aq)
- name: Test Mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm mistral-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm mistral-7b
docker rm -f $(docker ps -aq)
- name: Test phi-2
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm phi-2
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm phi-2
docker rm -f $(docker ps -aq)
- name: Test starcoder2-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm starcoder2-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm starcoder2-7b
docker rm -f $(docker ps -aq)
- name: Test gemma-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm gemma-7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm gemma-7b
docker rm -f $(docker ps -aq)
- name: Test llama2-7b-chat
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama2-7b-chat
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve -m test=file:/opt/ml/model/test/
python3 llm/client.py vllm_chat llama2-7b-chat
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: vllm-logs
path: tests/integration/logs/
vllm-lora-test:
if: contains(fromJson('["", "vllm-lora"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test vllm unmerged lora - llama7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama-7b-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm_adapters llama-7b-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test vllm unmerged lora overflow - llama7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama-7b-unmerged-lora-overflow
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm_adapters llama-7b-unmerged-lora-overflow
docker rm -f $(docker ps -aq)
- name: Test vllm lora awq - llama2-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama2-13b-awq-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm_adapters llama2-13b-awq-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test vllm lora - mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm mistral-7b-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm_adapters mistral-7b-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test vllm lora awq - mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm mistral-7b-awq-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm_adapters mistral-7b-awq-unmerged-lora
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: vllm-lora-logs
path: tests/integration/logs/
lmi-dist-lora-test:
if: contains(fromJson('["", "lmi-dist-lora"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, g6 ]
timeout-minutes: 60
needs: create-runners
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2" huggingface_hub
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download docker
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test lmi-dist unmerged lora - llama7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama-7b-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_adapters llama-7b-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test lmi-dist unmerged lora overflow - llama7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama-7b-unmerged-lora-overflow
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_adapters llama-7b-unmerged-lora-overflow
docker rm -f $(docker ps -aq)
- name: Test lmi-dist lora awq - llama2-13b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist llama2-13b-awq-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_adapters llama2-13b-awq-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test lmi-dist lora - mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist mistral-7b-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_adapters mistral-7b-unmerged-lora
docker rm -f $(docker ps -aq)
- name: Test lmi-dist lora awq - mistral-7b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist mistral-7b-awq-unmerged-lora
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_adapters mistral-7b-awq-unmerged-lora
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
docker rm -f $(docker ps -aq) || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v3
with:
name: lmi-dist-lora-logs
path: tests/integration/logs/
stop-runners:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners, hf-handler-test, trt-llm-handler-test, trt-llm-handler-test-2, scheduler-single-gpu-test, scheduler-multi-gpu-test, lmi-dist-test-1, lmi-dist-test-2, vllm-test, vllm-lora-test, lmi-dist-lora-test]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_1 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_2 }}
./stop_instance.sh $instance_id
instance_id=${{ needs.create-runners.outputs.gpu_instance_id_3 }}
./stop_instance.sh $instance_id