From 3e0327111fda03084fca25723d8eb6b8ab562d50 Mon Sep 17 00:00:00 2001 From: Sylvain <35365065+sanderegg@users.noreply.github.com> Date: Wed, 4 Nov 2020 21:25:12 +0100 Subject: [PATCH] Bugfix/sidecar starts as cpu (#1932) * correctly read in the FORCE_START_CPU_MODE environ * check return value of container --- services/docker-compose.yml | 1 + .../simcore_service_sidecar/celery_configurator.py | 2 +- .../sidecar/src/simcore_service_sidecar/config.py | 6 +++--- .../sidecar/src/simcore_service_sidecar/utils.py | 11 ++++++++--- .../sidecar/tests/unit/test_celery_configurator.py | 12 +++++++++--- 5 files changed, 22 insertions(+), 10 deletions(-) diff --git a/services/docker-compose.yml b/services/docker-compose.yml index d95d24f7b01..2ee1236bda5 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -162,6 +162,7 @@ services: - REGISTRY_PW=${REGISTRY_PW} - SWARM_STACK_NAME=${SWARM_STACK_NAME:-simcore} - SIDECAR_LOGLEVEL=${LOG_LEVEL:-WARNING} + - START_AS_MODE_CPU=${SIDECAR_FORCE_CPU_NODE:-0} networks: - computational_services_subnet diff --git a/services/sidecar/src/simcore_service_sidecar/celery_configurator.py b/services/sidecar/src/simcore_service_sidecar/celery_configurator.py index 176dabef2c0..eec96e9a537 100644 --- a/services/sidecar/src/simcore_service_sidecar/celery_configurator.py +++ b/services/sidecar/src/simcore_service_sidecar/celery_configurator.py @@ -65,7 +65,7 @@ def define_celery_task(app: Celery, name: str) -> None: def configure_node(bootmode: BootMode) -> Celery: - log.info("Initializing celery app...") + log.info("Initializing celery app in %s...", bootmode) app = Celery( f"sidecar.{str(bootmode.name).lower()}.{config.SIDECAR_HOST_HOSTNAME_PATH.read_text()}", broker=config.CELERY_CONFIG.broker_url, diff --git a/services/sidecar/src/simcore_service_sidecar/config.py b/services/sidecar/src/simcore_service_sidecar/config.py index 46d92beebde..16ea9ab3ad9 100644 --- a/services/sidecar/src/simcore_service_sidecar/config.py +++ b/services/sidecar/src/simcore_service_sidecar/config.py @@ -1,8 +1,8 @@ import logging import multiprocessing import os +from distutils.util import strtobool from pathlib import Path -from typing import Optional from models_library.settings.celery import CeleryConfig @@ -66,8 +66,8 @@ logging.getLogger("sqlalchemy.pool").setLevel(SIDECAR_LOGLEVEL) # sidecar celery starting mode overwrite -FORCE_START_CPU_MODE: Optional[str] = os.environ.get("START_AS_MODE_CPU") -FORCE_START_GPU_MODE: Optional[str] = os.environ.get("START_AS_MODE_GPU") +FORCE_START_CPU_MODE: bool = strtobool(os.environ.get("START_AS_MODE_CPU", "false")) +FORCE_START_GPU_MODE: bool = strtobool(os.environ.get("START_AS_MODE_GPU", "false")) # if a node has this amount of CPUs it will be a candidate an MPI candidate TARGET_MPI_NODE_CPU_COUNT: int = int(os.environ.get("TARGET_MPI_NODE_CPU_COUNT", "-1")) diff --git a/services/sidecar/src/simcore_service_sidecar/utils.py b/services/sidecar/src/simcore_service_sidecar/utils.py index 2a9ab40a0de..e7bc9c55f2b 100644 --- a/services/sidecar/src/simcore_service_sidecar/utils.py +++ b/services/sidecar/src/simcore_service_sidecar/utils.py @@ -89,13 +89,18 @@ async def async_is_gpu_node() -> bool: "AttachStderr": False, "Tty": False, "OpenStdin": False, - "HostConfig": {"Init": True, "AutoRemove": True}, + "HostConfig": { + "Init": True, + "AutoRemove": True, + }, # NOTE: The Init parameter shows a weird behavior: no exception thrown when the container fails } try: - await docker.containers.run( + container = await docker.containers.run( config=spec_config, name=f"sidecar_{uuid.uuid4()}_test_gpu" ) - return True + + container_data = await container.wait(timeout=30) + return container_data["StatusCode"] == 0 except aiodocker.exceptions.DockerError as err: logger.debug( "is_gpu_node DockerError while check-run %s: %s", spec_config, err diff --git a/services/sidecar/tests/unit/test_celery_configurator.py b/services/sidecar/tests/unit/test_celery_configurator.py index d209734575e..9290c54df6a 100644 --- a/services/sidecar/tests/unit/test_celery_configurator.py +++ b/services/sidecar/tests/unit/test_celery_configurator.py @@ -15,7 +15,13 @@ def _toggle_gpu_mock(mocker, has_gpu: bool) -> None: containers_get = mocker.patch( "aiodocker.containers.DockerContainers.run", return_value=asyncio.Future() ) - containers_get.return_value.set_result("") + + class FakeContainer: + async def wait(self, **kwargs): + return {"StatusCode": 0 if has_gpu else 127} + + containers_get.return_value.set_result(FakeContainer()) + if not has_gpu: containers_get.side_effect = aiodocker.exceptions.DockerError( "MOCK Error", {"message": "this is a mocked exception"} @@ -46,12 +52,12 @@ def mock_node_has_gpu(request, mocker) -> None: @pytest.fixture def force_cpu_mode(monkeypatch): - monkeypatch.setattr(config, "FORCE_START_CPU_MODE", "1", raising=True) + monkeypatch.setattr(config, "FORCE_START_CPU_MODE", True, raising=True) @pytest.fixture def force_gpu_mode(monkeypatch): - monkeypatch.setattr(config, "FORCE_START_GPU_MODE", "1", raising=True) + monkeypatch.setattr(config, "FORCE_START_GPU_MODE", True, raising=True) @pytest.mark.parametrize(