fix: Remove unwanted taints for neuron-device-plugin ds for stable di…

…ffusion and llama2 models (#479)
awslabs · Mar 29, 2024 · 2a1879c · 2a1879c
1 parent 1b9cc4b
commit 2a1879c
Show file tree

Hide file tree

Showing 6 changed files with 86 additions and 106 deletions.
diff --git a/ai-ml/jupyterhub/addons.tf b/ai-ml/jupyterhub/addons.tf
@@ -302,7 +302,7 @@ module "eks_data_addons" {
       jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
       region                      = var.region
     })]
-    version                     = "3.2.1"
+    version = "3.2.1"
   }
 
   #---------------------------------------------------------------

diff --git a/ai-ml/trainium-inferentia/addons.tf b/ai-ml/trainium-inferentia/addons.tf
@@ -321,9 +321,6 @@ module "eks_data_addons" {
           - provisionerType: Karpenter
           - hub.jupyter.org/node-purpose: user
         taints:
-          - key: aws.amazon.com/neuroncore
-            value: "true"
-            effect: "NoSchedule"
           - key: aws.amazon.com/neuron
             value: "true"
             effect: "NoSchedule"

diff --git a/ai-ml/trainium-inferentia/eks.tf b/ai-ml/trainium-inferentia/eks.tf
@@ -529,12 +529,7 @@ module "eks" {
           key    = "aws.amazon.com/neuron",
           value  = "true",
           effect = "NO_SCHEDULE"
-        },
-        {
-          key    = "aws.amazon.com/neuroncore",
-          value  = "true",
-          effect = "NO_SCHEDULE"
-        },
+        }
       ]
 
       tags = merge(local.tags, {
@@ -586,12 +581,7 @@ module "eks" {
           key    = "aws.amazon.com/neuron",
           value  = true,
           effect = "NO_SCHEDULE"
-        },
-        {
-          key    = "aws.amazon.com/neuroncore",
-          value  = true,
-          effect = "NO_SCHEDULE"
-        },
+        }
       ]
 
       tags = merge(local.tags, {

diff --git a/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml b/ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2/ray-service-llama2.yaml
@@ -1,4 +1,3 @@
----
 apiVersion: v1
 kind: Namespace
 metadata:
@@ -14,7 +13,7 @@ spec:
   serviceUnhealthySecondThreshold: 900
   deploymentUnhealthySecondThreshold: 300
   serveConfig:
-    importPath: ray_serve_llama2:entrypoint  # Specify the correct path to your Python script
+    importPath: ray_serve_llama2:entrypoint # Specify the correct path to your Python script
     runtimeEnv: |
       env_vars: {"MODEL_ID": "NousResearch/Llama-2-13b-chat-hf"}  # Replace with the appropriate model ID
 
@@ -31,79 +30,76 @@ spec:
       template:
         spec:
           containers:
-            - name: ray-head
-              image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest # Image created using the Dockerfile attached in the folder
-              imagePullPolicy: Always # Ensure the image is always pulled when updated
-              lifecycle:
-                preStop:
-                  exec:
-                    command: [ "/bin/sh","-c","ray stop" ]
-              ports:
-                - containerPort: 6379
-                  name: gcs
-                - containerPort: 8265
-                  name: dashboard
-                - containerPort: 10001
-                  name: client
-                - containerPort: 8000
-                  name: serve
-              volumeMounts:
-                - mountPath: /tmp/ray
-                  name: ray-logs
-              resources:
-                limits:
-                  cpu: 4
-                  memory: 20Gi
-                requests:
-                  cpu: 4
-                  memory: 20Gi
+          - name: ray-head
+            image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest # Image created using the Dockerfile attached in the folder
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            ports:
+            - containerPort: 6379
+              name: gcs
+            - containerPort: 8265
+              name: dashboard
+            - containerPort: 10001
+              name: client
+            - containerPort: 8000
+              name: serve
+            volumeMounts:
+            - mountPath: /tmp/ray
+              name: ray-logs
+            resources:
+              limits:
+                cpu: 4
+                memory: 20Gi
+              requests:
+                cpu: 4
+                memory: 20Gi
           nodeSelector: # This is using Karpenter Nodes with the provisioner label
             instanceType: mixed-x86
             provisionerType: Karpenter
             workload: rayhead
           volumes:
-            - name: ray-logs
-              emptyDir: {}
+          - name: ray-logs
+            emptyDir: {}
 
     workerGroupSpecs:
-      - groupName: inf2-worker-group
-        replicas: 1
-        minReplicas: 1
-        maxReplicas: 1
-        rayStartParams: {}
-        template:
-          spec:
-            containers:
-              - name: ray-worker
-                image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest
-                imagePullPolicy: Always # Ensure the image is always pulled when updated
-                lifecycle:
-                  preStop:
-                    exec:
-                      command: [ "/bin/sh","-c","ray stop" ]
-                resources:
-                  limits:
-                    cpu: "180"
-                    memory: "700G"
-                    aws.amazon.com/neuron: "12"
-                  requests:
-                    cpu: "180"
-                    memory: "700G"
-                    aws.amazon.com/neuron: "12"
-            nodeSelector:
-              instanceType: inferentia-inf2
-              provisionerType: Karpenter
-            tolerations:
-              - key: "aws.amazon.com/neuroncore"
-                operator: "Exists"
-                effect: "NoSchedule"
-              - key: "aws.amazon.com/neuron"
-                operator: "Exists"
-                effect: "NoSchedule"
-              - key: "hub.jupyter.org/dedicated"
-                operator: "Equal"
-                value: "user"
-                effect: "NoSchedule"
+    - groupName: inf2-worker-group
+      replicas: 1
+      minReplicas: 1
+      maxReplicas: 1
+      rayStartParams: {}
+      template:
+        spec:
+          containers:
+          - name: ray-worker
+            image: public.ecr.aws/data-on-eks/ray-serve-inf2-llama2:latest
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            resources:
+              limits:
+                cpu: "180"
+                memory: "700G"
+                aws.amazon.com/neuron: "12"
+              requests:
+                cpu: "180"
+                memory: "700G"
+                aws.amazon.com/neuron: "12"
+          nodeSelector:
+            instanceType: inferentia-inf2
+            provisionerType: Karpenter
+          tolerations:
+          - key: "aws.amazon.com/neuron"
+            operator: "Exists"
+            effect: "NoSchedule"
+          - key: "hub.jupyter.org/dedicated"
+            operator: "Equal"
+            value: "user"
+            effect: "NoSchedule"
 
 
 ---
@@ -117,21 +113,21 @@ metadata:
 spec:
   ingressClassName: nginx
   rules:
-    - http:
-        paths:
-          # Ray Dashboard
-          - path: /dashboard/(.*)
-            pathType: ImplementationSpecific
-            backend:
-              service:
-                name: llama2-service
-                port:
-                  number: 8265
-          # Ray Serve
-          - path: /serve/(.*)
-            pathType: ImplementationSpecific
-            backend:
-              service:
-                name: llama2-service
-                port:
-                  number: 8000
+  - http:
+      paths:
+      # Ray Dashboard
+      - path: /dashboard/(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: llama2-service
+            port:
+              number: 8265
+      # Ray Serve
+      - path: /serve/(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: llama2-service
+            port:
+              number: 8000
diff --git a/...entia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml b/...entia/examples/inference/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml
@@ -114,9 +114,6 @@ spec:
             instanceType: inferentia-inf2
             provisionerType: Karpenter
           tolerations:
-          - key: "aws.amazon.com/neuroncore"
-            operator: "Exists"
-            effect: "NoSchedule"
           - key: "aws.amazon.com/neuron"
             operator: "Exists"
             effect: "NoSchedule"

diff --git a/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw b/website/docs/gen-ai/excalidraw/stable-diffusion-inf2.excalidraw
@@ -5458,4 +5458,4 @@
       "lastRetrieved": 1708463794770
     }
   }
-}
+}
-Original file line number
+Diff line change
@@ Expand Up / @@ -5458,4 +5458,4 @@ @@
           "lastRetrieved": 1708463794770
         }
       }
-    }
+    }