diff --git a/ai-ml/jupyterhub/addons.tf b/ai-ml/jupyterhub/addons.tf index 5f0bccbd4..864ce10bd 100755 --- a/ai-ml/jupyterhub/addons.tf +++ b/ai-ml/jupyterhub/addons.tf @@ -302,7 +302,7 @@ module "eks_data_addons" { jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name region = var.region })] - version = "3.2.1" + version = "3.2.1" } #--------------------------------------------------------------- diff --git a/ai-ml/trainium-inferentia/addons.tf b/ai-ml/trainium-inferentia/addons.tf index c523bb1ed..6b30dbbff 100644 --- a/ai-ml/trainium-inferentia/addons.tf +++ b/ai-ml/trainium-inferentia/addons.tf @@ -321,9 +321,6 @@ module "eks_data_addons" { - provisionerType: Karpenter - hub.jupyter.org/node-purpose: user taints: - - key: aws.amazon.com/neuroncore - value: "true" - effect: "NoSchedule" - key: aws.amazon.com/neuron value: "true" effect: "NoSchedule" diff --git a/ai-ml/trainium-inferentia/eks.tf b/ai-ml/trainium-inferentia/eks.tf index 9a28b1b92..3518dfae1 100644 --- a/ai-ml/trainium-inferentia/eks.tf +++ b/ai-ml/trainium-inferentia/eks.tf @@ -529,12 +529,7 @@ module "eks" { key = "aws.amazon.com/neuron", value = "true", effect = "NO_SCHEDULE" - }, - { - key = "aws.amazon.com/neuroncore", - value = "true", - effect = "NO_SCHEDULE" - }, + } ] tags = merge(local.tags, { @@ -586,12 +581,7 @@ module "eks" { key = "aws.amazon.com/neuron", value = true, effect = "NO_SCHEDULE" - }, - { - key = "aws.amazon.com/neuroncore", - value = true, - effect = "NO_SCHEDULE" - }, + } ] tags = merge(local.tags, { diff --git a/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml b/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml index 05b409d43..4dda9bb3f 100644 --- a/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml +++ b/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml @@ -1,4 +1,3 @@ ---- apiVersion: v1 kind: Namespace metadata: @@ -14,7 +13,7 @@ spec: serviceUnhealthySecondThreshold: 900 deploymentUnhealthySecondThreshold: 300 serveConfig: - importPath: ray_serve_llama2:entrypoint # Specify the correct path to your Python script + importPath: ray_serve_llama2:entrypoint # Specify the correct path to your Python script runtimeEnv: | env_vars: {"MODEL_ID": "NousResearch/Llama-2-13b-chat-hf"} # Replace with the appropriate model ID @@ -31,79 +30,76 @@ spec: template: spec: containers: - - name: ray-head - image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest # Image created using the Dockerfile attached in the folder - imagePullPolicy: Always # Ensure the image is always pulled when updated - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] - ports: - - containerPort: 6379 - name: gcs - - containerPort: 8265 - name: dashboard - - containerPort: 10001 - name: client - - containerPort: 8000 - name: serve - volumeMounts: - - mountPath: /tmp/ray - name: ray-logs - resources: - limits: - cpu: 4 - memory: 20Gi - requests: - cpu: 4 - memory: 20Gi + - name: ray-head + image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest # Image created using the Dockerfile attached in the folder + imagePullPolicy: Always # Ensure the image is always pulled when updated + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "ray stop"] + ports: + - containerPort: 6379 + name: gcs + - containerPort: 8265 + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + volumeMounts: + - mountPath: /tmp/ray + name: ray-logs + resources: + limits: + cpu: 4 + memory: 20Gi + requests: + cpu: 4 + memory: 20Gi nodeSelector: # This is using Karpenter Nodes with the provisioner label instanceType: mixed-x86 provisionerType: Karpenter workload: rayhead volumes: - - name: ray-logs - emptyDir: {} + - name: ray-logs + emptyDir: {} workerGroupSpecs: - - groupName: inf2-worker-group - replicas: 1 - minReplicas: 1 - maxReplicas: 1 - rayStartParams: {} - template: - spec: - containers: - - name: ray-worker - image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest - imagePullPolicy: Always # Ensure the image is always pulled when updated - lifecycle: - preStop: - exec: - command: [ "/bin/sh","-c","ray stop" ] - resources: - limits: - cpu: "180" - memory: "700G" - aws.amazon.com/neuron: "12" - requests: - cpu: "180" - memory: "700G" - aws.amazon.com/neuron: "12" - nodeSelector: - instanceType: inferentia-inf2 - provisionerType: Karpenter - tolerations: - - key: "aws.amazon.com/neuroncore" - operator: "Exists" - effect: "NoSchedule" - - key: "aws.amazon.com/neuron" - operator: "Exists" - effect: "NoSchedule" - - key: "hub.jupyter.org/dedicated" - operator: "Equal" - value: "user" - effect: "NoSchedule" + - groupName: inf2-worker-group + replicas: 1 + minReplicas: 1 + maxReplicas: 1 + rayStartParams: {} + template: + spec: + containers: + - name: ray-worker + image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest + imagePullPolicy: Always # Ensure the image is always pulled when updated + lifecycle: + preStop: + exec: + command: ["/bin/sh", "-c", "ray stop"] + resources: + limits: + cpu: "180" + memory: "700G" + aws.amazon.com/neuron: "12" + requests: + cpu: "180" + memory: "700G" + aws.amazon.com/neuron: "12" + nodeSelector: + instanceType: inferentia-inf2 + provisionerType: Karpenter + tolerations: + - key: "aws.amazon.com/neuron" + operator: "Exists" + effect: "NoSchedule" + - key: "hub.jupyter.org/dedicated" + operator: "Equal" + value: "user" + effect: "NoSchedule" --- @@ -117,21 +113,21 @@ metadata: spec: ingressClassName: nginx rules: - - http: - paths: - # Ray Dashboard - - path: /dashboard/(.*) - pathType: ImplementationSpecific - backend: - service: - name: llama2-service - port: - number: 8265 - # Ray Serve - - path: /serve/(.*) - pathType: ImplementationSpecific - backend: - service: - name: llama2-service - port: - number: 8000 + - http: + paths: + # Ray Dashboard + - path: /dashboard/(.*) + pathType: ImplementationSpecific + backend: + service: + name: llama2-service + port: + number: 8265 + # Ray Serve + - path: /serve/(.*) + pathType: ImplementationSpecific + backend: + service: + name: llama2-service + port: + number: 8000 diff --git a/ai-ml/trainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml b/ai-ml/trainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml index ebde03cb8..2cf5d2eae 100644 --- a/ai-ml/trainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml +++ b/ai-ml/trainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml @@ -114,9 +114,6 @@ spec: instanceType: inferentia-inf2 provisionerType: Karpenter tolerations: - - key: "aws.amazon.com/neuroncore" - operator: "Exists" - effect: "NoSchedule" - key: "aws.amazon.com/neuron" operator: "Exists" effect: "NoSchedule" diff --git a/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw b/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw index 9da459865..0f471da55 100644 --- a/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw +++ b/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw @@ -5458,4 +5458,4 @@ "lastRetrieved": 1708463794770 } } -} \ No newline at end of file +}