From 2abde51b7782d67ba5b1285327170d36800e74a3 Mon Sep 17 00:00:00 2001 From: Heiru Wu Date: Tue, 15 Oct 2024 17:26:11 +0800 Subject: [PATCH] feat(ray): support shorter downscale config for test models --- instill/helpers/__init__.py | 1 + instill/helpers/ray_config.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/instill/helpers/__init__.py b/instill/helpers/__init__.py index 2be8ecd..928a663 100644 --- a/instill/helpers/__init__.py +++ b/instill/helpers/__init__.py @@ -1,5 +1,6 @@ # pylint: disable=no-name-in-module from instill.helpers.protobufs.ray_pb2 import CallRequest, CallResponse +from instill.helpers.ray_config import InstillDeployable, instill_deployment from instill.helpers.ray_io import ( construct_custom_output, construct_task_chat_output, diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 1aa0bd6..373adb1 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -31,6 +31,7 @@ class InstillDeployable: def __init__(self, deployable: Deployment) -> None: self._deployment: Deployment = deployable + self._autoscaling_config = DEFAULT_AUTOSCALING_CONFIG.copy() num_of_cpus = os.getenv(ENV_NUM_OF_CPUS) if num_of_cpus is not None and num_of_cpus != "": @@ -39,6 +40,7 @@ def __init__(self, deployable: Deployment) -> None: is_test_model = os.getenv(ENV_IS_TEST_MODEL) if is_test_model is not None and is_test_model.lower() == "true": self._update_num_cpus(float(0.001)) + self._update_downscale_delay(60) memory = os.getenv(ENV_MEMORY) if memory is not None and memory != "": @@ -164,23 +166,24 @@ def _update_custom_resource(self, resource_name: str, ratio=0.001): return self def _update_min_replicas(self, num_replicas: int): - new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG - new_autoscaling_config["min_replicas"] = num_replicas + self._autoscaling_config["min_replicas"] = num_replicas self._deployment = self._deployment.options( - autoscaling_config=new_autoscaling_config + autoscaling_config=self._autoscaling_config ) return self def _update_max_replicas(self, num_replicas: int): - new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG - new_autoscaling_config["max_replicas"] = num_replicas + self._autoscaling_config["max_replicas"] = num_replicas self._deployment = self._deployment.options( - autoscaling_config=new_autoscaling_config + autoscaling_config=self._autoscaling_config ) return self + def _update_downscale_delay(self, downscale_delay_s: int): + self._autoscaling_config["downscale_delay_s"] = downscale_delay_s + def get_deployment_handle(self): return self._deployment.bind()