docker-compose.prod.yml

x-app: &default-app
  image: radis_prod:latest
  volumes:
    - web_data:/var/www/web
    - ${SSL_CERT_FILE:?}:/etc/web/ssl/cert.pem
    - ${SSL_KEY_FILE:?}:/etc/web/ssl/key.pem
  environment:
    DJANGO_EMAIL_URL: ${DJANGO_EMAIL_URL:?}
    DJANGO_SETTINGS_MODULE: "radis.settings.production"
    DJANGO_STATIC_ROOT: "/var/www/web/static/"
    POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}

x-deploy: &deploy
  replicas: 1
  restart_policy:
    condition: on-failure
    max_attempts: 3

services:
  # We can't use those manage commands inside the web container in production because
  # the web service may have multiple replicas. So we make sure to only run them once
  # and wait for it to be finished by the web service containers.
  init:
    <<: *default-app
    hostname: init.local
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t 120 && 
        ./manage.py migrate &&
        ./manage.py collectstatic --no-input &&
        ./manage.py create_superuser &&
        wait-for-it -s llamacpp.local:8080 -t 60 &&
        ./manage.py ok_server --host 0.0.0.0 --port 8000
      "
    deploy:
      <<: *deploy

  web:
    <<: *default-app
    build:
      target: production
    ports:
      - "${WEB_HTTP_PORT:-80}:80"
      - "${WEB_HTTPS_PORT:-443}:443"
    command: >
      bash -c "
        wait-for-it -s init.local:8000 -t 300 &&
        echo 'Starting web server ...' &&
        daphne -b 0.0.0.0 -p 80 \\
          -e ssl:443:privateKey=/etc/web/ssl/key.pem:certKey=/etc/web/ssl/cert.pem \\
          radis.asgi:application
      "
    deploy:
      <<: *deploy
      replicas: 3

  default_worker:
    <<: *default-app
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t 60 &&
        ./manage.py bg_worker -q default
      "
    deploy:
      <<: *deploy

  llm_worker:
    <<: *default-app
    command: >
      bash -c "
        wait-for-it -s postgres.local:5432 -t 60 &&
        ./manage.py bg_worker -q llm
      "
    deploy:
      <<: *deploy

  postgres:
    environment:
      POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?}
    deploy:
      <<: *deploy

  llamacpp_gpu:
    image: ghcr.io/ggerganov/llama.cpp:server-cuda
    hostname: llamacpp.local
    environment:
      HTTP_PROXY: ${HTTP_PROXY:-}
      HTTPS_PROXY: ${HTTPS_PROXY:-}
      LLAMA_CACHE: "/models"
      NO_PROXY: ${NO_PROXY:-}
    volumes:
      - models_data:/models
    entrypoint: >
      bash -c "
        /llama-server \\
          --model-url ${LLM_MODEL_URL} \\
          --host 0.0.0.0 \\
          --port 8080 \\
          --ctx-size 4096 \\
          --gpu-layers 99
      "
    deploy:
      <<: *deploy
      resources:
        reservations:
          # https://gist.github.com/medihack/6a6d24dc6376939e1919f32409c2119f
          generic_resources:
            - discrete_resource_spec:
                kind: "gpu"
                value: 1

volumes:
  web_data:
  models_data: