Skip to content

Commit

Permalink
fix replicas query
Browse files Browse the repository at this point in the history
  • Loading branch information
SawyerCzupka committed Nov 4, 2024
1 parent 70d9580 commit ea73f16
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 11 deletions.
5 changes: 5 additions & 0 deletions deployment/vllm/autoscaler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ spec:
containers:
- name: autoscaler
image: ghcr.io/wmgeolab/k8s-autoscaler:latest # Update this to your image
imagePullPolicy: Always
securityContext:
runAsGroup: 50036
runAsUser: 237827
Expand All @@ -38,6 +39,10 @@ spec:
env:
- name: KUBECONFIG
value: /app/.kube/config
- name: UVICORN_LOG_LEVEL
value: "debug"
- name: LOG_LEVEL
value: "debug"
- name: VLLM_SERVICE_HOST
value: "vllm-svc"
- name: VLLM_SERVICE_PORT
Expand Down
4 changes: 2 additions & 2 deletions k8s-autoscaler/k8s_autoscaler/api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ async def health_check(
):
"""Health check endpoint."""
phase = await kube.get_pod_phase()
current_replicas, desired_replicas = await kube.get_replicas()
current_replicas = await kube.get_replicas()
return {
"status": "healthy",
"vllm_status": phase,
"vllm_running": phase == PodPhase.RUNNING,
"current_replicas": current_replicas,
"desired_replicas": desired_replicas,
# "desired_replicas": desired_replicas,
"last_activity": time.strftime(
"%Y-%m-%d %H:%M:%S", time.localtime(state.last_activity)
),
Expand Down
22 changes: 14 additions & 8 deletions k8s-autoscaler/k8s_autoscaler/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ async def execute(self, cmd: str) -> tuple[bool, str]:
output = stdout.decode().strip() if success else stderr.decode().strip()
if not success:
logger.error(f"kubectl command failed: {output}")

logger.debug(f"Command output: `{output}`")
return success, output
except Exception as e:
logger.error(f"kubectl command failed: {e}")
Expand Down Expand Up @@ -60,27 +62,31 @@ async def scale_deployment(self, replicas: int) -> bool:
logger.error(f"Invalid replica count: {replicas}")
return False

# success, output = await self.execute(
# f"scale deployment {self.settings.vllm_deployment} --replicas={replicas}"
# )

success, output = await self.execute(
f"scale deployment {self.settings.vllm_deployment} --replicas={replicas}"
f"patch deployment {self.settings.vllm_deployment} -p '{{\"spec\":{{\"replicas\":{replicas}}}}}'"
)

if success:
logger.info(f"Successfully scaled deployment to {replicas} replicas")
return success

async def get_replicas(self) -> tuple[int, int]:
"""Get current and desired replica counts."""
async def get_replicas(self) -> int:
"""Get desired replica count."""
cmd = (
f"get deployment {self.settings.vllm_deployment} "
"-o jsonpath='{.status.replicas} {.spec.replicas}'"
"-o jsonpath='{.spec.replicas}'"
)
success, output = await self.execute(cmd)
if success and output:
try:
current, desired = map(int, output.split())
return current, desired
return int(output)
except ValueError:
logger.error(f"Failed to parse replica counts: {output}")
return -1, -1
logger.error(f"Failed to parse replica count: {output}")
return -1

async def deployment_exists(self) -> bool:
"""Check if the vLLM deployment exists."""
Expand Down
2 changes: 1 addition & 1 deletion k8s-autoscaler/k8s_autoscaler/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ async def monitor_inactivity(self):
if not await self.kube.deployment_exists():
break

current_replicas, _ = await self.kube.get_replicas()
current_replicas = await self.kube.get_replicas()
if (
time.time() - self.state.last_activity
> self.settings.inactivity_timeout
Expand Down

0 comments on commit ea73f16

Please sign in to comment.