Skip to content

Commit

Permalink
[lcnc] support dbrx model
Browse files Browse the repository at this point in the history
  • Loading branch information
siddvenk committed Mar 29, 2024
1 parent 3e948d8 commit baee2ab
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
17 changes: 15 additions & 2 deletions .github/workflows/lmi-no-code.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,20 @@ jobs:
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code falcon-40b
docker rm -f $(docker ps -aq)
docker rm -f $(docker ps -aq)
- name: DBRX lmi container
working-directory: tests/integration
if: ${{ matrix.container == 'deepspeed' }}
run: |
rm -rf models
echo -e "HF_MODEL_ID=s3://djl-llm/dbrx-instruct/" > docker_env
echo -e "HF_MODEL_TRUST_REMOTE_CODE=true" >> docker_env
echo -e "MODEL_LOADING_TIMEOUT=3600" >> docker_env
echo -e "OPTION_GPU_MEMORY_UTILIZATION=0.95" >> docker_env
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models ${{ matrix.container }} \
serve
python3 llm/client.py no_code dbrx
docker rm -f $(docker ps -aq)
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
Expand Down Expand Up @@ -210,7 +223,7 @@ jobs:
serve
python3 llm/client.py no_code phi-2
docker rm -f $(docker ps -aq)
- name: Baichun lmi container
- name: Baichuan lmi container
working-directory: tests/integration
run: |
rm -rf models
Expand Down
5 changes: 5 additions & 0 deletions tests/integration/llm/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,11 @@ def get_model_name():
"batch_size": [1, 4],
"seq_length": [256],
"tokenizer": "tiiuae/falcon-40b",
},
"dbrx": {
"max_memory_per_gpu": [40.0],
"batch_size": [1, 4],
"seq_length": [256],
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public final class LmiConfigRecommender {
Map.entry("phi", "vllm"),
Map.entry("qwen", "vllm"),
Map.entry("qwen2", "vllm"),
Map.entry("stablelm", "vllm"));
Map.entry("stablelm", "vllm"),
Map.entry("dbrx", "lmi-dist"));

private static final Set<String> OPTIMIZED_TASK_ARCHITECTURES =
Set.of("ForCausalLM", "LMHeadModel", "ForConditionalGeneration");
Expand Down

0 comments on commit baee2ab

Please sign in to comment.