feat: Add sample blueprint to run stable diffusion model on inferenti…

…a2 using rayserve (#406) Co-authored-by: Vara Bonthu <[email protected]>
awslabs · Feb 13, 2024 · 8d3515d · 8d3515d
1 parent 1fd8438
commit 8d3515d
Show file tree

Hide file tree

Showing 24 changed files with 861 additions and 73 deletions.
diff --git a/.gitignore b/.gitignore
@@ -49,3 +49,5 @@ site
 
 # Checks
 .tfsec
+
+examples/gradio-ui/*
diff --git a/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml b/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml
@@ -5,7 +5,7 @@ controller:
       service.beta.kubernetes.io/aws-load-balancer-type: external
       service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
       service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
-      service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing # Private Load Balancer can only be accessed within the VPC
+      service.beta.kubernetes.io/aws-load-balancer-scheme: internal # Private Load Balancer can only be accessed within the VPC
     targetPorts:
       http: http
       https: http
diff --git a/ai-ml/trainium-inferentia/addons.tf b/ai-ml/trainium-inferentia/addons.tf
@@ -15,7 +15,7 @@ resource "kubernetes_annotations" "disable_gp2" {
   depends_on = [module.eks.eks_cluster_id]
 }
 
-resource "kubernetes_storage_class" "default_gp3" {
+resource "kubernetes_storage_class_v1" "default_gp3" {
   metadata {
     name = "gp3"
     annotations = {
@@ -138,6 +138,7 @@ module "eks_blueprints_addons" {
     }
   }
   karpenter = {
+    chart_version       = "v0.34.0"
     repository_username = data.aws_ecrpublic_authorization_token.token.user_name
     repository_password = data.aws_ecrpublic_authorization_token.token.password
   }
@@ -199,13 +200,15 @@ module "eks_blueprints_addons" {
   kube_prometheus_stack = {
     values = [
       var.enable_amazon_prometheus ? templatefile("${path.module}/helm-values/kube-prometheus-amp-enable.yaml", {
+        storage_class_type  = kubernetes_storage_class_v1.default_gp3.id
         region              = local.region
         amp_sa              = local.amp_ingest_service_account
         amp_irsa            = module.amp_ingest_irsa[0].iam_role_arn
         amp_remotewrite_url = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}/api/v1/remote_write"
         amp_url             = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}"
-        storage_class_type  = kubernetes_storage_class.default_gp3.id
-      }) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {})
+        }) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {
+        storage_class_type = kubernetes_storage_class_v1.default_gp3.id
+      })
     ]
     chart_version = "48.1.1"
     set_sensitive = [
@@ -248,14 +251,15 @@ module "eks_blueprints_addons" {
 #---------------------------------------------------------------
 module "eks_data_addons" {
   source  = "aws-ia/eks-data-addons/aws"
-  version = "~> 1.2.9" # ensure to update this to the latest/desired version
+  version = "~> 1.30" # ensure to update this to the latest/desired version
 
   oidc_provider_arn = module.eks.oidc_provider_arn
 
   enable_aws_neuron_device_plugin  = true
   enable_aws_efa_k8s_device_plugin = true
   #---------------------------------------
-  # Volcano Scheduler for TorchX
+  # Volcano Scheduler for TorchX used in BERT-Large distributed training example
+  # Volcano is also a default scheduler for KubeRay Operator
   #---------------------------------------
   enable_volcano = true
 
@@ -274,15 +278,24 @@ module "eks_data_addons" {
     ]
   }
 
-  enable_jupyterhub = true
+  #---------------------------------------
+  # JupyterHub Addon
+  #---------------------------------------
+  enable_jupyterhub = var.enable_jupyterhub
   jupyterhub_helm_config = {
     values = [
       templatefile("${path.module}/helm-values/jupyterhub-values.yaml", {
-        jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
+        jupyter_single_user_sa_name = "${module.eks.cluster_name}-jupyterhub-single-user"
       })
     ]
   }
+
+  #---------------------------------------
+  # Deploying Karpenter resources(Nodepool and NodeClass) with Helm Chart
+  #---------------------------------------
   enable_karpenter_resources = true
+  # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+  #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
   karpenter_resources_helm_config = {
     inferentia-inf2 = {
       values = [
@@ -292,13 +305,20 @@ module "eks_data_addons" {
       ec2NodeClass:
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
-          id: ${module.vpc.private_subnets[3]}
+          id: ${module.vpc.private_subnets[2]}
         securityGroupSelectorTerms:
           tags:
             Name: ${module.eks.cluster_name}-node
+        blockDevice:
+          deviceName: /dev/xvda
+          volumeSize: 500Gi
+          volumeType: gp3
+          encrypted: true
+          deleteOnTermination: true
       nodePool:
         labels:
-          - provisioner: inferentia-inf2
+          - instanceType: inferentia-inf2
+          - provisionerType: Karpenter
           - hub.jupyter.org/node-purpose: user
         taints:
           - key: aws.amazon.com/neuroncore
@@ -324,6 +344,13 @@ module "eks_data_addons" {
           - key: "karpenter.sh/capacity-type"
             operator: In
             values: ["spot", "on-demand"]
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 100
       EOT
       ]
     }
@@ -334,13 +361,20 @@ module "eks_data_addons" {
       ec2NodeClass:
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
-          id: ${module.vpc.private_subnets[3]}
+          id: ${module.vpc.private_subnets[2]}
         securityGroupSelectorTerms:
           tags:
             Name: ${module.eks.cluster_name}-node
+          blockDevice:
+            deviceName: /dev/xvda
+            volumeSize: 200Gi
+            volumeType: gp3
+            encrypted: true
+            deleteOnTermination: true
       nodePool:
         labels:
-          - provisioner: default
+          - instanceType: mixed-x86
+          - provisionerType: Karpenter
           - workload: rayhead
         requirements:
           - key: "karpenter.k8s.aws/instance-family"
@@ -355,6 +389,13 @@ module "eks_data_addons" {
           - key: "karpenter.sh/capacity-type"
             operator: In
             values: ["spot", "on-demand"]
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 100
       EOT
       ]
     }
@@ -395,7 +436,7 @@ resource "random_password" "grafana" {
 
 #tfsec:ignore:aws-ssm-secret-use-customer-key
 resource "aws_secretsmanager_secret" "grafana" {
-  name                    = "${local.name}-oss-grafana"
+  name_prefix             = "${local.name}-oss-grafana"
   recovery_window_in_days = 0 # Set to zero for this example to force delete during Terraform destroy
 }
 
@@ -420,7 +461,7 @@ module "s3_bucket" {
 # MPI Operator for distributed training on Trainium
 #---------------------------------------------------------------
 data "http" "mpi_operator_yaml" {
-  url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/${var.mpi_operator_version}/deploy/v2beta1/mpi-operator.yaml"
+  url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/v0.4.0/deploy/v2beta1/mpi-operator.yaml"
 }
 
 data "kubectl_file_documents" "mpi_operator_yaml" {

diff --git a/ai-ml/trainium-inferentia/eks.tf b/ai-ml/trainium-inferentia/eks.tf
@@ -121,6 +121,7 @@ module "eks" {
       labels = {
         WorkerType    = "ON_DEMAND"
         NodeGroupType = "core"
+        workload      = "rayhead"
       }
 
       tags = merge(local.tags, {
@@ -488,16 +489,14 @@ module "eks" {
     inf2-24xl-ng = {
       name        = "inf2-24xl-ng"
       description = "inf2 24xl node group for ML inference workloads"
-      # The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
-      # The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
-      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
-        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
-      ]
+      # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+      #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
+      subnet_ids = [module.vpc.private_subnets[2]]
 
       # aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
       # ami_id   = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
       ami_type       = "AL2_x86_64_GPU"
-      capacity_type  = "SPOT"
+      capacity_type  = "ON_DEMAND" # Use SPOT for Spot instances
       instance_types = ["inf2.24xlarge"]
 
       pre_bootstrap_user_data = <<-EOT
@@ -511,41 +510,50 @@ module "eks" {
       desired_size = var.inf2_24xl_desired_size
 
       labels = {
-        instance-type = "inf2"
-        provisioner   = "cluster-autoscaler"
+        instanceType    = "inf2-24xl"
+        provisionerType = "cluster-autoscaler"
+      }
+
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size = 500
+            volume_type = "gp3"
+          }
+        }
       }
 
       taints = [
         {
           key    = "aws.amazon.com/neuron",
-          value  = true,
+          value  = "true",
           effect = "NO_SCHEDULE"
         },
         {
           key    = "aws.amazon.com/neuroncore",
-          value  = true,
+          value  = "true",
           effect = "NO_SCHEDULE"
         },
       ]
 
       tags = merge(local.tags, {
-        Name                     = "inf2-ng1",
+        Name                     = "inf2-24xl-ng",
         "karpenter.sh/discovery" = local.name
       })
     }
+
     inf2-48xl-ng = {
       name        = "inf2-48xl-ng"
       description = "inf2 48x large node group for ML inference workloads"
-      # The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
-      # The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
-      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
-        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
-      ]
+      # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+      #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
+      subnet_ids = [module.vpc.private_subnets[2]]
 
       # aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
       # ami_id   = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
       ami_type       = "AL2_x86_64_GPU"
-      capacity_type  = "SPOT"
+      capacity_type  = "ON_DEMAND" # Use SPOT for Spot instances
       instance_types = ["inf2.48xlarge"]
 
       pre_bootstrap_user_data = <<-EOT
@@ -554,13 +562,23 @@ module "eks" {
         export PATH=/opt/aws/neuron/bin:$PATH
       EOT
 
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size = 500
+            volume_type = "gp3"
+          }
+        }
+      }
+
       min_size     = var.inf2_48xl_min_size
       max_size     = 2
       desired_size = var.inf2_48xl_desired_size
 
       labels = {
-        instance-type = "inf2-48xl"
-        provisioner   = "cluster-autoscaler"
+        instanceType    = "inf2-48xl"
+        provisionerType = "cluster-autoscaler"
       }
 
       taints = [

diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md b/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md
@@ -0,0 +1,54 @@
+# Steps to Deploy Gradio on Your Mac
+
+## Pre-requisites
+Deploy the `trainium-inferentia` blueprint using this [link](https://awslabs.github.io/data-on-eks/docs/blueprints/ai-ml/trainium)
+
+## Step 1: Execute Port Forward to the StableDiffusion Ray Service
+First, execute a port forward to the StableDiffusion Ray Service using kubectl:
+
+```bash
+kubectl -n stablediffusion port-forward svc/stablediffusion-service 8000:8000
+```
+
+## Step 2: Deploy Gradio WebUI Locally
+
+### 2.1. Create a Virtual Environment
+Create a virtual environment for the Gradio application:
+
+```bash
+cd ai-ml/trainium-inferentia/examples/gradio-ui
+python3 -m venv .venv
+source .venv/bin/activate
+```
+### 2.2. Install Gradio WebUI app
+
+Install all the Gradio WebUI app dependencies with pip
+
+```bash
+pip install gradio requests
+```
+
+### 2.3. Invoke the WebUI
+Run the Gradio WebUI using the following command:
+
+NOTE: `gradio-app-stablediffusion.py` refers to the port forward url. e.g., `service_name = "http://localhost:8000" `
+
+```bash
+python gradio-app-stablediffusion.py
+```
+
+You should see output similar to the following:
+```text
+Running on local URL:  http://127.0.0.1:7860
+
+To create a public link, set `share=True` in `launch()`.
+```
+
+### 2.4. Access the WebUI from Your Browser
+Open your web browser and access the Gradio WebUI by navigating to the following URL:
+
+http://127.0.0.1:7860
+
+![gradio-sd](gradio-app-stable-diffusion-xl.png)
+
+You should now be able to interact with the Gradio application from your local machine.
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stablediffusion.py b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stablediffusion.py
@@ -0,0 +1,33 @@
+import gradio as gr
+import requests
+import json
+from PIL import Image
+from io import BytesIO
+
+# Constants for model endpoint and service name
+model_endpoint = "/imagine"
+# service_name = "http://<REPLACE_ME_WITH_ELB_DNS_NAME>/serve"
+service_name = "http://localhost:8000"  # Replace with your actual service name
+
+
+# Function to generate image based on prompt
+def generate_image(prompt):
+
+    # Create the URL for the inference
+    url = f"{service_name}{model_endpoint}"
+
+    try:
+        # Send the request to the model service
+        response = requests.get(url, params={"prompt": prompt}, timeout=180)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        i = Image.open(BytesIO(response.content))
+        return i
+
+    except requests.exceptions.RequestException as e:
+        # Handle any request exceptions (e.g., connection errors)
+        return f"AI: Error: {str(e)}"
+
+# Define the Gradio PromptInterface
+demo = gr.Interface(fn=generate_image,
+                    inputs = [gr.Textbox(label="Enter the Prompt")],
+                    outputs = gr.Image(type='pil')).launch(debug='True')
Original file line number	Diff line number	Diff line change
Expand Up		@@ -49,3 +49,5 @@ site

		# Checks
		.tfsec

		examples/gradio-ui/*