From 0887b677d2a6eb3a5f5a06d08e503cbf537d916a Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 21:41:43 +0900 Subject: [PATCH 01/16] Add kubernetes inference deploy api --- .../kubernetes_inference_deploy/.gitignore | 4 + .../kubernetes_inference_deploy/Dockerfile | 13 ++ .../IaC/.gitignore | 1 + .../kubernetes_inference_deploy/IaC/main.tf | 33 ++++ .../IaC/var.tf.sample | 19 ++ .../kubernetes_inference_deploy/main.py | 177 ++++++++++++++++++ .../push_aws_ecr.sh.sample | 7 + 7 files changed, 254 insertions(+) create mode 100644 automation/kubernetes_inference_deploy/.gitignore create mode 100644 automation/kubernetes_inference_deploy/Dockerfile create mode 100644 automation/kubernetes_inference_deploy/IaC/.gitignore create mode 100644 automation/kubernetes_inference_deploy/IaC/main.tf create mode 100644 automation/kubernetes_inference_deploy/IaC/var.tf.sample create mode 100644 automation/kubernetes_inference_deploy/main.py create mode 100644 automation/kubernetes_inference_deploy/push_aws_ecr.sh.sample diff --git a/automation/kubernetes_inference_deploy/.gitignore b/automation/kubernetes_inference_deploy/.gitignore new file mode 100644 index 0000000000..ee1f4c4fb5 --- /dev/null +++ b/automation/kubernetes_inference_deploy/.gitignore @@ -0,0 +1,4 @@ +push_aws_ecr.sh +ecr_login.sh +get_kubeconfig.sh +*test* \ No newline at end of file diff --git a/automation/kubernetes_inference_deploy/Dockerfile b/automation/kubernetes_inference_deploy/Dockerfile new file mode 100644 index 0000000000..d50f453231 --- /dev/null +++ b/automation/kubernetes_inference_deploy/Dockerfile @@ -0,0 +1,13 @@ +FROM public.ecr.aws/lambda/python:3.11 + +RUN pip install awscli requests --no-cache-dir + +COPY main.py ${LAMBDA_TASK_ROOT} + +RUN chmod +x /var/task + +# x86_64 +RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ + && chmod +x ./kubectl + +CMD ["main.handler"] \ No newline at end of file diff --git a/automation/kubernetes_inference_deploy/IaC/.gitignore b/automation/kubernetes_inference_deploy/IaC/.gitignore new file mode 100644 index 0000000000..4ee3ae2ee7 --- /dev/null +++ b/automation/kubernetes_inference_deploy/IaC/.gitignore @@ -0,0 +1 @@ +var.tf \ No newline at end of file diff --git a/automation/kubernetes_inference_deploy/IaC/main.tf b/automation/kubernetes_inference_deploy/IaC/main.tf new file mode 100644 index 0000000000..5dfbfdf561 --- /dev/null +++ b/automation/kubernetes_inference_deploy/IaC/main.tf @@ -0,0 +1,33 @@ +module "kubernetes_inference_deploy" { + source = "github.com/kookmin-sw/capstone-2024-12//IaC/serverless_api_template" + prefix = "kubernetes_inference_deploy" + container_registry = "694448341573.dkr.ecr.ap-northeast-2.amazonaws.com" + container_repository = "kubernetes-inference-deploy" + container_image_tag = "latest" + lambda_ram_size = 256 + attach_s3_policy = true + attach_ec2_policy = true + attach_eks_policy = true + attach_ssm_readonly_policy = true + region_name = var.region + eks_cluster_name = var.eks_cluster_name + db_api_url = var.db_api_url +} + +output "kubernetes_inference_deploy_function_url" { + value = module.kubernetes_inference_deploy.function_url +} + +provider "aws" { + region = var.region + profile = var.awscli_profile +} + +terraform { + backend "s3" { + bucket = "sskai-terraform-state" + key = "kubernetes_inference_deploy/tf.state" + region = "ap-northeast-2" + encrypt = true + } +} diff --git a/automation/kubernetes_inference_deploy/IaC/var.tf.sample b/automation/kubernetes_inference_deploy/IaC/var.tf.sample new file mode 100644 index 0000000000..758c8ab6c2 --- /dev/null +++ b/automation/kubernetes_inference_deploy/IaC/var.tf.sample @@ -0,0 +1,19 @@ +variable "region" { + type = string + default = "ap-northeast-2" +} + +variable "awscli_profile" { + type = string + default = "" +} + +variable "eks_cluster_name" { + type = string + default = "" +} + +variable "db_api_url" { + type = string + default = "" +} \ No newline at end of file diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py new file mode 100644 index 0000000000..22843cec3e --- /dev/null +++ b/automation/kubernetes_inference_deploy/main.py @@ -0,0 +1,177 @@ +import subprocess +import requests +import boto3 +import os +import json + +kubectl = '/var/task/kubectl' +kubeconfig = '/tmp/kubeconfig' + +eks_cluster_name = os.getenv('EKS_CLUSTER_NAME') +region = os.getenv("REGION") +db_api_url = os.getenv("DB_API_URL") +ecr_uri = os.getenv("ECR_URI") + +# get eks cluster kubernetes configuration by aws cli +result_get_kubeconfig = subprocess.run([ + "aws", "eks", "update-kubeconfig", + "--name", eks_cluster_name, + "--region", region, + "--kubeconfig", kubeconfig +]) + +def generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size): + content = f"""--- +apiVersion: v1 +kind: Namespace +metadata: + name: {user_namespace} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: {user_namespace} + name: deployment-{endpoint_uid} +spec: + selector: + matchLabels: + app.kubernetes.io/name: app-{endpoint_uid} + replicas: 2 + template: + metadata: + labels: + app.kubernetes.io/name: app-{endpoint_uid} + spec: + containers: + - image: {ecr_uri}/kubernetes-inference:latest + imagePullPolicy: Always + name: app-{endpoint_uid} + ports: + - containerPort: 8080 + env: + - name: MODEL_S3_URL + value: {model_s3_url} + resources: + requests: + memory: {ram_size}M + nvidia.com/gpu: 1 + limits: + memory: {ram_size}M + nvidia.com/gpu: 1 + nodeSelector: + karpenter.sh/nodepool: {node_pool_name} +--- +apiVersion: v1 +kind: Service +metadata: + namespace: {user_namespace} + name: service-{endpoint_uid} +spec: + ports: + - port: 8080 + targetPort: 8080 + protocol: TCP + type: ClusterIP + selector: + app.kubernetes.io/name: app-{endpoint_uid} +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + namespace: {user_namespace} + name: ingress-{endpoint_uid} + annotations: + alb.ingress.kubernetes.io/scheme: internet-facing + alb.ingress.kubernetes.io/target-type: ip + alb.ingress.kubernetes.io/group.name: "{user_namespace}" +spec: + ingressClassName: alb + rules: + - http: + paths: + - path: /{endpoint_uid} + pathType: Prefix + backend: + service: + name: service-{endpoint_uid} + port: + number: 8080 +""" + + filepath = f"/tmp/{endpoint_uid}.yaml" + with open(filepath, 'w') as f: + f.write(content) + + return filepath + +def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size): + nodepool_filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size) + result = subprocess.run([ + kubectl, "apply", "-f", nodepool_filename, "--kubeconfig", kubeconfig + ]) + if result != 0: print("create resource returncode != 0") + return result + +def delete_resource(user_namespace, endpoint_uid): + deployment_name = f"deployment-{endpoint_uid}" + service_name = f"service-{endpoint_uid}" + ingress_name = f"ingress-{endpoint_uid}" + ingress_result = subprocess.run([ + kubectl, "-n", user_namespace, "delete", "ingress", ingress_name, "--kubeconfig", kubeconfig + ]) + service_result = subprocess.run([ + kubectl, "-n", user_namespace, "delete", "service", service_name, "--kubeconfig", kubeconfig + ]) + deployment_result = subprocess.run([ + kubectl, "-n", user_namespace, "delete", "deployment", deployment_name, "--kubeconfig", kubeconfig + ]) + result = 0 + if ingress_result != 0 or service_result != 0 or deployment_result != 0: + result = 1 + print("delete resource returncode != 0") + return result + +def handler(event, context): + body = json.loads(event.get("body", "{}")) + # 사용자 지정 값을 어디까지 받아올 것인지? + user_uid = body.get("USER_UID") + endpoint_uid = body.get("ENDPOINT_NAME") + action = body.get("ACTION") + + if action == "create": + model_s3_url = body['model']['s3_url'] + node_pool_name = body['model']['deployment_type'] + ram_size = body['model']['max_used_ram'] + result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) + requests.put() + if result == 0: + return { + 'statusCode': 200, + 'body': "complete create inference endpoint" + } + else: + return { + 'statusCode': 500, + 'body': "error with create inference endpoint" + } + elif action == "delete": + result = delete_resource(user_uid, endpoint_uid) + if result == 0: + requests.delete(url=f"{db_api_url}/inferences/{endpoint_uid}") + return { + 'statusCode': 200, + 'body': "complete delete inference deployment" + } + else: + return { + 'statusCode': 500, + 'body': "error with delete inference endpoint" + } + else: + return { + 'statusCode': 500, + 'body': "invalid action" + } + +if __name__ == '__main__': + generate_yaml("5d9b890e-1316-4e25-8f67-829702a24331","828873F0-7F7D-4B5A-9ACE-BFA7B5A3D55A","https://sskai-model-storage.s3.ap-northeast-2.amazonaws.com/5d9b890e-1316-4e25-8f67-829702a24331/model/e45ac127-fb43-44de-9ea2-8091a43a700f/model.zip","nodepool-1",2048) \ No newline at end of file diff --git a/automation/kubernetes_inference_deploy/push_aws_ecr.sh.sample b/automation/kubernetes_inference_deploy/push_aws_ecr.sh.sample new file mode 100644 index 0000000000..28879fceab --- /dev/null +++ b/automation/kubernetes_inference_deploy/push_aws_ecr.sh.sample @@ -0,0 +1,7 @@ +#!/bin/sh + +ECR_URI="" + +aws ecr get-login-password --region ap-northeast-2 | docker login --username AWS --password-stdin $ECR_URI +docker build -t $ECR_URI/kubernetes-inference-deploy:latest . +docker push $ECR_URI/kubernetes-inference-deploy:latest \ No newline at end of file From b138c86d74bdc8d7c8bbd839c9d9acc2f70d0de2 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 22:12:43 +0900 Subject: [PATCH 02/16] add healthcheck --- inference/template_code/kubernetes_app.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/inference/template_code/kubernetes_app.py b/inference/template_code/kubernetes_app.py index 424d8500b1..17758a2a8a 100644 --- a/inference/template_code/kubernetes_app.py +++ b/inference/template_code/kubernetes_app.py @@ -45,6 +45,12 @@ app = FastAPI() +@app.get("/") +async def healthcheck(): + return { + "body": "healthy" + } + @app.post("/") async def inference(data: dict): try: From 602fe5f259347d767746aa6ac35b9591496c273b Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:31:45 +0900 Subject: [PATCH 03/16] Update kubernetes_app.py --- inference/template_code/kubernetes_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inference/template_code/kubernetes_app.py b/inference/template_code/kubernetes_app.py index 17758a2a8a..248fba93ba 100644 --- a/inference/template_code/kubernetes_app.py +++ b/inference/template_code/kubernetes_app.py @@ -51,7 +51,7 @@ async def healthcheck(): "body": "healthy" } -@app.post("/") +@app.post("/{full_path:path}") async def inference(data: dict): try: print(data) From 2be780af21d1583d86cbeafa8d5a1d0616210d82 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:33:39 +0900 Subject: [PATCH 04/16] Update kubernetes-inference-deploy code --- automation/kubernetes_inference_deploy/main.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 22843cec3e..d9fbeedae7 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -134,8 +134,8 @@ def delete_resource(user_namespace, endpoint_uid): def handler(event, context): body = json.loads(event.get("body", "{}")) # 사용자 지정 값을 어디까지 받아올 것인지? - user_uid = body.get("USER_UID") - endpoint_uid = body.get("ENDPOINT_NAME") + user_uid = body.get("USER_UID").lower() + endpoint_uid = body.get("ENDPOINT_NAME").lower() action = body.get("ACTION") if action == "create": @@ -143,7 +143,11 @@ def handler(event, context): node_pool_name = body['model']['deployment_type'] ram_size = body['model']['max_used_ram'] result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) - requests.put() + endpoint_url = subprocess.run(f"kubectl get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() + update_data = { + "endpiont": endpoint_url + } + requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data) if result == 0: return { 'statusCode': 200, @@ -171,7 +175,4 @@ def handler(event, context): return { 'statusCode': 500, 'body': "invalid action" - } - -if __name__ == '__main__': - generate_yaml("5d9b890e-1316-4e25-8f67-829702a24331","828873F0-7F7D-4B5A-9ACE-BFA7B5A3D55A","https://sskai-model-storage.s3.ap-northeast-2.amazonaws.com/5d9b890e-1316-4e25-8f67-829702a24331/model/e45ac127-fb43-44de-9ea2-8091a43a700f/model.zip","nodepool-1",2048) \ No newline at end of file + } \ No newline at end of file From 4f0f82f1ffbe7377d4b4c696d65da19a394ede8c Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:45:58 +0900 Subject: [PATCH 05/16] Fix typo --- automation/kubernetes_inference_deploy/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index d9fbeedae7..5ab1dc0be7 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -134,9 +134,9 @@ def delete_resource(user_namespace, endpoint_uid): def handler(event, context): body = json.loads(event.get("body", "{}")) # 사용자 지정 값을 어디까지 받아올 것인지? - user_uid = body.get("USER_UID").lower() - endpoint_uid = body.get("ENDPOINT_NAME").lower() - action = body.get("ACTION") + user_uid = body.get("user").lower() + endpoint_uid = body.get("uid").lower() + action = body.get("action") if action == "create": model_s3_url = body['model']['s3_url'] From 224ea249e1461ef8a65c50d7cd6433f810a2c463 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:46:44 +0900 Subject: [PATCH 06/16] Fix Dockerfile --- automation/kubernetes_inference_deploy/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/automation/kubernetes_inference_deploy/Dockerfile b/automation/kubernetes_inference_deploy/Dockerfile index d50f453231..d7c845cb6f 100644 --- a/automation/kubernetes_inference_deploy/Dockerfile +++ b/automation/kubernetes_inference_deploy/Dockerfile @@ -2,12 +2,12 @@ FROM public.ecr.aws/lambda/python:3.11 RUN pip install awscli requests --no-cache-dir -COPY main.py ${LAMBDA_TASK_ROOT} - -RUN chmod +x /var/task - # x86_64 RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \ && chmod +x ./kubectl +COPY main.py ${LAMBDA_TASK_ROOT} + +RUN chmod +x /var/task + CMD ["main.handler"] \ No newline at end of file From 3eced945488cbbbe8fbed50197e36fb4e1f853b9 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:50:00 +0900 Subject: [PATCH 07/16] Update code --- automation/kubernetes_inference_deploy/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 5ab1dc0be7..e834f7e5d2 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -105,9 +105,9 @@ def generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ra return filepath def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size): - nodepool_filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size) + filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size) result = subprocess.run([ - kubectl, "apply", "-f", nodepool_filename, "--kubeconfig", kubeconfig + kubectl, "apply", "-f", filename, "--kubeconfig", kubeconfig ]) if result != 0: print("create resource returncode != 0") return result From 074134f805248e133dca210d2f04d2c78ec7ad15 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:52:11 +0900 Subject: [PATCH 08/16] Fix code --- automation/kubernetes_inference_deploy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index e834f7e5d2..40595f562d 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -143,7 +143,7 @@ def handler(event, context): node_pool_name = body['model']['deployment_type'] ram_size = body['model']['max_used_ram'] result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) - endpoint_url = subprocess.run(f"kubectl get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() + endpoint_url = subprocess.run(f"{kubectl} get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() update_data = { "endpiont": endpoint_url } From 3ae6f1e7dfabc68e1213669a5ac327b1b06ee2aa Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:55:24 +0900 Subject: [PATCH 09/16] Fix error --- automation/kubernetes_inference_deploy/main.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 40595f562d..027d8885ee 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -109,8 +109,8 @@ def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_s result = subprocess.run([ kubectl, "apply", "-f", filename, "--kubeconfig", kubeconfig ]) - if result != 0: print("create resource returncode != 0") - return result + if result.returncode != 0: print("create resource returncode != 0") + return result.returncode def delete_resource(user_namespace, endpoint_uid): deployment_name = f"deployment-{endpoint_uid}" @@ -126,7 +126,7 @@ def delete_resource(user_namespace, endpoint_uid): kubectl, "-n", user_namespace, "delete", "deployment", deployment_name, "--kubeconfig", kubeconfig ]) result = 0 - if ingress_result != 0 or service_result != 0 or deployment_result != 0: + if ingress_result.returncode != 0 or service_result.returncode != 0 or deployment_result.returncode != 0: result = 1 print("delete resource returncode != 0") return result From 83047eb7b6694b59ce86debbaf2fcdc1b9fe8972 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:57:50 +0900 Subject: [PATCH 10/16] Fix typo (endpiont, endpoint) --- automation/kubernetes_inference_deploy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 027d8885ee..9d84d45f52 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -145,7 +145,7 @@ def handler(event, context): result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) endpoint_url = subprocess.run(f"{kubectl} get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() update_data = { - "endpiont": endpoint_url + "endpoint": endpoint_url } requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data) if result == 0: From 971ce167ac753fe049468ac257c989670ace5c29 Mon Sep 17 00:00:00 2001 From: mhsong Date: Fri, 10 May 2024 23:59:48 +0900 Subject: [PATCH 11/16] Fix DB Input --- automation/kubernetes_inference_deploy/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 9d84d45f52..2461e55926 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -145,9 +145,9 @@ def handler(event, context): result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) endpoint_url = subprocess.run(f"{kubectl} get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() update_data = { - "endpoint": endpoint_url + "endpoint": f"http://endpoint_url/{endpoint_uid}" } - requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data) + response = requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data) if result == 0: return { 'statusCode': 200, From d658681682ec85cc13c4f77a1810014a283c57d5 Mon Sep 17 00:00:00 2001 From: mhsong Date: Sat, 11 May 2024 00:02:40 +0900 Subject: [PATCH 12/16] Fix typo --- automation/kubernetes_inference_deploy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 2461e55926..e70aa90c44 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -145,7 +145,7 @@ def handler(event, context): result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) endpoint_url = subprocess.run(f"{kubectl} get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() update_data = { - "endpoint": f"http://endpoint_url/{endpoint_uid}" + "endpoint": f"http://{endpoint_url}/{endpoint_uid}" } response = requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data) if result == 0: From 30f92da0ff275a946d918e6203e1eeb033112dfb Mon Sep 17 00:00:00 2001 From: mhsong Date: Sat, 11 May 2024 00:10:34 +0900 Subject: [PATCH 13/16] Fix error --- automation/kubernetes_inference_deploy/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index e70aa90c44..2a1dd3e61c 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -143,7 +143,7 @@ def handler(event, context): node_pool_name = body['model']['deployment_type'] ram_size = body['model']['max_used_ram'] result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) - endpoint_url = subprocess.run(f"{kubectl} get ingress -A | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() + endpoint_url = subprocess.run(f"{kubectl} get ingress -A --kubeconfig {kubeconfig} | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() update_data = { "endpoint": f"http://{endpoint_url}/{endpoint_uid}" } From 6fc6ef2005502e5285ff0f5c9d51fcf83559c338 Mon Sep 17 00:00:00 2001 From: mh3ong Date: Sat, 11 May 2024 00:44:27 +0900 Subject: [PATCH 14/16] Fix Lambda Code --- automation/kubernetes_inference_deploy/main.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 2a1dd3e61c..c18ed107fe 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -143,7 +143,10 @@ def handler(event, context): node_pool_name = body['model']['deployment_type'] ram_size = body['model']['max_used_ram'] result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size) - endpoint_url = subprocess.run(f"{kubectl} get ingress -A --kubeconfig {kubeconfig} | grep ingress-{endpoint_uid} | awk {'print $5'}", capture_output=True, text=True, shell=True).stdout.strip() + + cmd = "{} get ingress -A --kubeconfig {} | grep {}".format(kubectl, kubeconfig, endpoint_uid) + endpoint_url = subprocess.run(cmd, capture_output=True, shell=True).stdout.decode('utf-8').strip().split()[4] + print(f"endpoint_url: {endpoint_url}") update_data = { "endpoint": f"http://{endpoint_url}/{endpoint_uid}" } From a7a53e67c59a27cf67bea96e1d63000320dc68d7 Mon Sep 17 00:00:00 2001 From: mhsong Date: Sat, 11 May 2024 00:55:37 +0900 Subject: [PATCH 15/16] remove boto3 imoprt --- automation/kubernetes_inference_deploy/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index c18ed107fe..4dab468a5c 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -1,6 +1,5 @@ import subprocess import requests -import boto3 import os import json From cb4f321c02173d27cb41ce48c8a8915596bd8d3c Mon Sep 17 00:00:00 2001 From: mhsong Date: Sat, 11 May 2024 00:55:52 +0900 Subject: [PATCH 16/16] remove unuse comment --- automation/kubernetes_inference_deploy/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/automation/kubernetes_inference_deploy/main.py b/automation/kubernetes_inference_deploy/main.py index 4dab468a5c..efe5946199 100644 --- a/automation/kubernetes_inference_deploy/main.py +++ b/automation/kubernetes_inference_deploy/main.py @@ -132,7 +132,6 @@ def delete_resource(user_namespace, endpoint_uid): def handler(event, context): body = json.loads(event.get("body", "{}")) - # 사용자 지정 값을 어디까지 받아올 것인지? user_uid = body.get("user").lower() endpoint_uid = body.get("uid").lower() action = body.get("action")