diff --git a/.github/workflows/lmi-no-code.yml b/.github/workflows/lmi-no-code.yml index fc7f72b3d..55f17d178 100644 --- a/.github/workflows/lmi-no-code.yml +++ b/.github/workflows/lmi-no-code.yml @@ -113,7 +113,20 @@ jobs: ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ serve python3 llm/client.py no_code falcon-40b - docker rm -f $(docker ps -aq) + docker rm -f $(docker ps -aq) + - name: DBRX lmi container + working-directory: tests/integration + if: ${{ matrix.container == 'deepspeed' }} + run: | + rm -rf models + echo -e "HF_MODEL_ID=s3://djl-llm/dbrx-instruct/" > docker_env + echo -e "HF_MODEL_TRUST_REMOTE_CODE=true" >> docker_env + echo -e "MODEL_LOADING_TIMEOUT=3600" >> docker_env + echo -e "OPTION_GPU_MEMORY_UTILIZATION=0.95" >> docker_env + ./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \ + serve + python3 llm/client.py no_code dbrx + docker rm -f $(docker ps -aq) - name: On fail step if: ${{ failure() }} working-directory: tests/integration @@ -210,7 +223,7 @@ jobs: serve python3 llm/client.py no_code phi-2 docker rm -f $(docker ps -aq) - - name: Baichun lmi container + - name: Baichuan lmi container working-directory: tests/integration run: | rm -rf models diff --git a/tests/integration/llm/client.py b/tests/integration/llm/client.py index c466f466e..d818b9938 100644 --- a/tests/integration/llm/client.py +++ b/tests/integration/llm/client.py @@ -703,6 +703,11 @@ def get_model_name(): "batch_size": [1, 4], "seq_length": [256], "tokenizer": "tiiuae/falcon-40b", + }, + "dbrx": { + "max_memory_per_gpu": [40.0], + "batch_size": [1, 4], + "seq_length": [256], } } diff --git a/wlm/src/main/java/ai/djl/serving/wlm/LmiConfigRecommender.java b/wlm/src/main/java/ai/djl/serving/wlm/LmiConfigRecommender.java index af6bd9d9c..d7eb38092 100644 --- a/wlm/src/main/java/ai/djl/serving/wlm/LmiConfigRecommender.java +++ b/wlm/src/main/java/ai/djl/serving/wlm/LmiConfigRecommender.java @@ -49,7 +49,8 @@ public final class LmiConfigRecommender { Map.entry("phi", "vllm"), Map.entry("qwen", "vllm"), Map.entry("qwen2", "vllm"), - Map.entry("stablelm", "vllm")); + Map.entry("stablelm", "vllm"), + Map.entry("dbrx", "lmi-dist")); private static final Set OPTIMIZED_TASK_ARCHITECTURES = Set.of("ForCausalLM", "LMHeadModel", "ForConditionalGeneration");