Skip to content

Commit

Permalink
feat: remove port specification for inference containers (#307)
Browse files Browse the repository at this point in the history
* first commit

* refactor config.example.toml
  • Loading branch information
jorgeantonio21 authored Dec 26, 2024
1 parent 818cbf9 commit 3d683a9
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 13 deletions.
2 changes: 1 addition & 1 deletion config.example.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[atoma_service]
chat_completions_service_url = "http://chat-completions:80" # Internal Docker network URL
chat_completions_service_url = "http://chat-completions:8000" # Internal Docker network URL
embeddings_service_url = "http://embeddings:80"
image_generations_service_url = "http://image-generations:80"
# List of models to be used by the service, the current value here is just a placeholder, please change it to the models you want to deploy
Expand Down
12 changes: 0 additions & 12 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,6 @@ services:
container_name: chat-completions
profiles: [chat_completions_vllm]
image: vllm/vllm-openai:v0.6.5
ports:
- "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
ipc: host
command: ${VLLM_ENGINE_ARGS}

Expand All @@ -138,8 +136,6 @@ services:
build:
context: https://github.com/atoma-network/vllm.git#main
dockerfile: Dockerfile.cpu
ports:
- "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN}

vllm-rocm:
Expand All @@ -149,8 +145,6 @@ services:
build:
context: https://github.com/atoma-network/vllm.git#main
dockerfile: Dockerfile.rocm
ports:
- "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN} --tensor-parallel-size ${VLLM_TENSOR_PARALLEL_SIZE}

mistralrs-cpu:
Expand All @@ -160,26 +154,20 @@ services:
build:
context: https://github.com/EricLBuehler/mistral.rs.git
dockerfile: Dockerfile
ports:
- "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:80"
command: plain -m ${CHAT_COMPLETIONS_MODEL}

tei:
<<: *inference-service-cuda
container_name: embeddings
profiles: [embeddings_tei]
image: ${TEI_IMAGE}
ports:
- "127.0.0.1:${EMBEDDINGS_SERVER_PORT}:80"
command: --model-id ${EMBEDDINGS_MODEL} --huggingface-hub-cache /root/.cache/huggingface/hub

mistralrs:
<<: *inference-service-cuda
container_name: image-generations
profiles: [image_generations_mistralrs]
image: ${MISTRALRS_IMAGE}
ports:
- "127.0.0.1:${IMAGE_GENERATIONS_SERVER_PORT}:80"
command: diffusion-plain -m ${IMAGE_GENERATIONS_MODEL} --arch ${IMAGE_GENERATIONS_ARCHITECTURE}

networks:
Expand Down

0 comments on commit 3d683a9

Please sign in to comment.