atoma-network · jorgeantonio21 · Dec 26, 2024 · Dec 26, 2024 · Dec 26, 2024
diff --git a/config.example.toml b/config.example.toml
@@ -1,5 +1,5 @@
 [atoma_service]
-chat_completions_service_url = "http://chat-completions:80" # Internal Docker network URL
+chat_completions_service_url = "http://chat-completions:8000" # Internal Docker network URL
 embeddings_service_url = "http://embeddings:80"
 image_generations_service_url = "http://image-generations:80"
 # List of models to be used by the service, the current value here is just a placeholder, please change it to the models you want to deploy

diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -126,8 +126,6 @@ services:
     container_name: chat-completions
     profiles: [chat_completions_vllm]
     image: vllm/vllm-openai:v0.6.5
-    ports:
-      - "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
     ipc: host
     command: ${VLLM_ENGINE_ARGS}
 
@@ -138,8 +136,6 @@ services:
     build:
       context: https://github.com/atoma-network/vllm.git#main
       dockerfile: Dockerfile.cpu
-    ports:
-      - "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
     command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN}
 
   vllm-rocm:
@@ -149,8 +145,6 @@ services:
     build:
       context: https://github.com/atoma-network/vllm.git#main
       dockerfile: Dockerfile.rocm
-    ports:
-      - "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:8000"
     command: --model ${CHAT_COMPLETIONS_MODEL} --max-model-len ${CHAT_COMPLETIONS_MAX_MODEL_LEN} --tensor-parallel-size ${VLLM_TENSOR_PARALLEL_SIZE}
 
   mistralrs-cpu:
@@ -160,26 +154,20 @@ services:
     build:
       context: https://github.com/EricLBuehler/mistral.rs.git
       dockerfile: Dockerfile
-    ports:
-      - "127.0.0.1:${CHAT_COMPLETIONS_SERVER_PORT}:80"
     command: plain -m ${CHAT_COMPLETIONS_MODEL}
 
   tei:
     <<: *inference-service-cuda
     container_name: embeddings
     profiles: [embeddings_tei]
     image: ${TEI_IMAGE}
-    ports:
-      - "127.0.0.1:${EMBEDDINGS_SERVER_PORT}:80"
     command: --model-id ${EMBEDDINGS_MODEL} --huggingface-hub-cache /root/.cache/huggingface/hub
 
   mistralrs:
     <<: *inference-service-cuda
     container_name: image-generations
     profiles: [image_generations_mistralrs]
     image: ${MISTRALRS_IMAGE}
-    ports:
-      - "127.0.0.1:${IMAGE_GENERATIONS_SERVER_PORT}:80"
     command: diffusion-plain -m ${IMAGE_GENERATIONS_MODEL} --arch ${IMAGE_GENERATIONS_ARCHITECTURE}
 
 networks: