Skip to content

Commit

Permalink
Merge pull request #55 from kookmin-sw/mhsong-dev
Browse files Browse the repository at this point in the history
Kubernetes Inference 배포 추가
  • Loading branch information
mh3ong authored May 11, 2024
2 parents 408eea9 + cb4f321 commit def9bf0
Show file tree
Hide file tree
Showing 8 changed files with 263 additions and 1 deletion.
4 changes: 4 additions & 0 deletions automation/kubernetes_inference_deploy/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
push_aws_ecr.sh
ecr_login.sh
get_kubeconfig.sh
*test*
13 changes: 13 additions & 0 deletions automation/kubernetes_inference_deploy/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM public.ecr.aws/lambda/python:3.11

RUN pip install awscli requests --no-cache-dir

# x86_64
RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" \
&& chmod +x ./kubectl

COPY main.py ${LAMBDA_TASK_ROOT}

RUN chmod +x /var/task

CMD ["main.handler"]
1 change: 1 addition & 0 deletions automation/kubernetes_inference_deploy/IaC/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
var.tf
33 changes: 33 additions & 0 deletions automation/kubernetes_inference_deploy/IaC/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
module "kubernetes_inference_deploy" {
source = "github.com/kookmin-sw/capstone-2024-12//IaC/serverless_api_template"
prefix = "kubernetes_inference_deploy"
container_registry = "694448341573.dkr.ecr.ap-northeast-2.amazonaws.com"
container_repository = "kubernetes-inference-deploy"
container_image_tag = "latest"
lambda_ram_size = 256
attach_s3_policy = true
attach_ec2_policy = true
attach_eks_policy = true
attach_ssm_readonly_policy = true
region_name = var.region
eks_cluster_name = var.eks_cluster_name
db_api_url = var.db_api_url
}

output "kubernetes_inference_deploy_function_url" {
value = module.kubernetes_inference_deploy.function_url
}

provider "aws" {
region = var.region
profile = var.awscli_profile
}

terraform {
backend "s3" {
bucket = "sskai-terraform-state"
key = "kubernetes_inference_deploy/tf.state"
region = "ap-northeast-2"
encrypt = true
}
}
19 changes: 19 additions & 0 deletions automation/kubernetes_inference_deploy/IaC/var.tf.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
variable "region" {
type = string
default = "ap-northeast-2"
}

variable "awscli_profile" {
type = string
default = ""
}

variable "eks_cluster_name" {
type = string
default = ""
}

variable "db_api_url" {
type = string
default = ""
}
179 changes: 179 additions & 0 deletions automation/kubernetes_inference_deploy/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import subprocess
import requests
import os
import json

kubectl = '/var/task/kubectl'
kubeconfig = '/tmp/kubeconfig'

eks_cluster_name = os.getenv('EKS_CLUSTER_NAME')
region = os.getenv("REGION")
db_api_url = os.getenv("DB_API_URL")
ecr_uri = os.getenv("ECR_URI")

# get eks cluster kubernetes configuration by aws cli
result_get_kubeconfig = subprocess.run([
"aws", "eks", "update-kubeconfig",
"--name", eks_cluster_name,
"--region", region,
"--kubeconfig", kubeconfig
])

def generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size):
content = f"""---
apiVersion: v1
kind: Namespace
metadata:
name: {user_namespace}
---
apiVersion: apps/v1
kind: Deployment
metadata:
namespace: {user_namespace}
name: deployment-{endpoint_uid}
spec:
selector:
matchLabels:
app.kubernetes.io/name: app-{endpoint_uid}
replicas: 2
template:
metadata:
labels:
app.kubernetes.io/name: app-{endpoint_uid}
spec:
containers:
- image: {ecr_uri}/kubernetes-inference:latest
imagePullPolicy: Always
name: app-{endpoint_uid}
ports:
- containerPort: 8080
env:
- name: MODEL_S3_URL
value: {model_s3_url}
resources:
requests:
memory: {ram_size}M
nvidia.com/gpu: 1
limits:
memory: {ram_size}M
nvidia.com/gpu: 1
nodeSelector:
karpenter.sh/nodepool: {node_pool_name}
---
apiVersion: v1
kind: Service
metadata:
namespace: {user_namespace}
name: service-{endpoint_uid}
spec:
ports:
- port: 8080
targetPort: 8080
protocol: TCP
type: ClusterIP
selector:
app.kubernetes.io/name: app-{endpoint_uid}
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
namespace: {user_namespace}
name: ingress-{endpoint_uid}
annotations:
alb.ingress.kubernetes.io/scheme: internet-facing
alb.ingress.kubernetes.io/target-type: ip
alb.ingress.kubernetes.io/group.name: "{user_namespace}"
spec:
ingressClassName: alb
rules:
- http:
paths:
- path: /{endpoint_uid}
pathType: Prefix
backend:
service:
name: service-{endpoint_uid}
port:
number: 8080
"""

filepath = f"/tmp/{endpoint_uid}.yaml"
with open(filepath, 'w') as f:
f.write(content)

return filepath

def apply_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size):
filename = generate_yaml(user_namespace, endpoint_uid, model_s3_url, node_pool_name, ram_size)
result = subprocess.run([
kubectl, "apply", "-f", filename, "--kubeconfig", kubeconfig
])
if result.returncode != 0: print("create resource returncode != 0")
return result.returncode

def delete_resource(user_namespace, endpoint_uid):
deployment_name = f"deployment-{endpoint_uid}"
service_name = f"service-{endpoint_uid}"
ingress_name = f"ingress-{endpoint_uid}"
ingress_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "ingress", ingress_name, "--kubeconfig", kubeconfig
])
service_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "service", service_name, "--kubeconfig", kubeconfig
])
deployment_result = subprocess.run([
kubectl, "-n", user_namespace, "delete", "deployment", deployment_name, "--kubeconfig", kubeconfig
])
result = 0
if ingress_result.returncode != 0 or service_result.returncode != 0 or deployment_result.returncode != 0:
result = 1
print("delete resource returncode != 0")
return result

def handler(event, context):
body = json.loads(event.get("body", "{}"))
user_uid = body.get("user").lower()
endpoint_uid = body.get("uid").lower()
action = body.get("action")

if action == "create":
model_s3_url = body['model']['s3_url']
node_pool_name = body['model']['deployment_type']
ram_size = body['model']['max_used_ram']
result = apply_yaml(user_uid, endpoint_uid, model_s3_url, node_pool_name, ram_size)

cmd = "{} get ingress -A --kubeconfig {} | grep {}".format(kubectl, kubeconfig, endpoint_uid)
endpoint_url = subprocess.run(cmd, capture_output=True, shell=True).stdout.decode('utf-8').strip().split()[4]
print(f"endpoint_url: {endpoint_url}")
update_data = {
"endpoint": f"http://{endpoint_url}/{endpoint_uid}"
}
response = requests.put(url=f"{db_api_url}/inferences/{endpoint_uid}", json=update_data)
if result == 0:
return {
'statusCode': 200,
'body': "complete create inference endpoint"
}
else:
return {
'statusCode': 500,
'body': "error with create inference endpoint"
}
elif action == "delete":
result = delete_resource(user_uid, endpoint_uid)
if result == 0:
requests.delete(url=f"{db_api_url}/inferences/{endpoint_uid}")
return {
'statusCode': 200,
'body': "complete delete inference deployment"
}
else:
return {
'statusCode': 500,
'body': "error with delete inference endpoint"
}
else:
return {
'statusCode': 500,
'body': "invalid action"
}
7 changes: 7 additions & 0 deletions automation/kubernetes_inference_deploy/push_aws_ecr.sh.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/sh

ECR_URI=""

aws ecr get-login-password --region ap-northeast-2 | docker login --username AWS --password-stdin $ECR_URI
docker build -t $ECR_URI/kubernetes-inference-deploy:latest .
docker push $ECR_URI/kubernetes-inference-deploy:latest
8 changes: 7 additions & 1 deletion inference/template_code/kubernetes_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,13 @@

app = FastAPI()

@app.post("/")
@app.get("/")
async def healthcheck():
return {
"body": "healthy"
}

@app.post("/{full_path:path}")
async def inference(data: dict):
try:
print(data)
Expand Down

0 comments on commit def9bf0

Please sign in to comment.