Skip to content

Commit

Permalink
feat: Add sample blueprint to run stable diffusion model on inferenti…
Browse files Browse the repository at this point in the history
…a2 using rayserve (#406)

Co-authored-by: Vara Bonthu <[email protected]>
  • Loading branch information
ratnopamc and vara-bonthu authored Feb 13, 2024
1 parent 1fd8438 commit 8d3515d
Show file tree
Hide file tree
Showing 24 changed files with 861 additions and 73 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,5 @@ site

# Checks
.tfsec

examples/gradio-ui/*
2 changes: 1 addition & 1 deletion ai-ml/mlflow/helm-values/ingress-nginx-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ controller:
service.beta.kubernetes.io/aws-load-balancer-type: external
service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing # Private Load Balancer can only be accessed within the VPC
service.beta.kubernetes.io/aws-load-balancer-scheme: internal # Private Load Balancer can only be accessed within the VPC
targetPorts:
http: http
https: http
67 changes: 54 additions & 13 deletions ai-ml/trainium-inferentia/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ resource "kubernetes_annotations" "disable_gp2" {
depends_on = [module.eks.eks_cluster_id]
}

resource "kubernetes_storage_class" "default_gp3" {
resource "kubernetes_storage_class_v1" "default_gp3" {
metadata {
name = "gp3"
annotations = {
Expand Down Expand Up @@ -138,6 +138,7 @@ module "eks_blueprints_addons" {
}
}
karpenter = {
chart_version = "v0.34.0"
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password
}
Expand Down Expand Up @@ -199,13 +200,15 @@ module "eks_blueprints_addons" {
kube_prometheus_stack = {
values = [
var.enable_amazon_prometheus ? templatefile("${path.module}/helm-values/kube-prometheus-amp-enable.yaml", {
storage_class_type = kubernetes_storage_class_v1.default_gp3.id
region = local.region
amp_sa = local.amp_ingest_service_account
amp_irsa = module.amp_ingest_irsa[0].iam_role_arn
amp_remotewrite_url = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}/api/v1/remote_write"
amp_url = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}"
storage_class_type = kubernetes_storage_class.default_gp3.id
}) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {})
}) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {
storage_class_type = kubernetes_storage_class_v1.default_gp3.id
})
]
chart_version = "48.1.1"
set_sensitive = [
Expand Down Expand Up @@ -248,14 +251,15 @@ module "eks_blueprints_addons" {
#---------------------------------------------------------------
module "eks_data_addons" {
source = "aws-ia/eks-data-addons/aws"
version = "~> 1.2.9" # ensure to update this to the latest/desired version
version = "~> 1.30" # ensure to update this to the latest/desired version

oidc_provider_arn = module.eks.oidc_provider_arn

enable_aws_neuron_device_plugin = true
enable_aws_efa_k8s_device_plugin = true
#---------------------------------------
# Volcano Scheduler for TorchX
# Volcano Scheduler for TorchX used in BERT-Large distributed training example
# Volcano is also a default scheduler for KubeRay Operator
#---------------------------------------
enable_volcano = true

Expand All @@ -274,15 +278,24 @@ module "eks_data_addons" {
]
}

enable_jupyterhub = true
#---------------------------------------
# JupyterHub Addon
#---------------------------------------
enable_jupyterhub = var.enable_jupyterhub
jupyterhub_helm_config = {
values = [
templatefile("${path.module}/helm-values/jupyterhub-values.yaml", {
jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
jupyter_single_user_sa_name = "${module.eks.cluster_name}-jupyterhub-single-user"
})
]
}

#---------------------------------------
# Deploying Karpenter resources(Nodepool and NodeClass) with Helm Chart
#---------------------------------------
enable_karpenter_resources = true
# We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
# module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
karpenter_resources_helm_config = {
inferentia-inf2 = {
values = [
Expand All @@ -292,13 +305,20 @@ module "eks_data_addons" {
ec2NodeClass:
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
subnetSelectorTerms:
id: ${module.vpc.private_subnets[3]}
id: ${module.vpc.private_subnets[2]}
securityGroupSelectorTerms:
tags:
Name: ${module.eks.cluster_name}-node
blockDevice:
deviceName: /dev/xvda
volumeSize: 500Gi
volumeType: gp3
encrypted: true
deleteOnTermination: true
nodePool:
labels:
- provisioner: inferentia-inf2
- instanceType: inferentia-inf2
- provisionerType: Karpenter
- hub.jupyter.org/node-purpose: user
taints:
- key: aws.amazon.com/neuroncore
Expand All @@ -324,6 +344,13 @@ module "eks_data_addons" {
- key: "karpenter.sh/capacity-type"
operator: In
values: ["spot", "on-demand"]
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 30s
expireAfter: 720h
weight: 100
EOT
]
}
Expand All @@ -334,13 +361,20 @@ module "eks_data_addons" {
ec2NodeClass:
karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
subnetSelectorTerms:
id: ${module.vpc.private_subnets[3]}
id: ${module.vpc.private_subnets[2]}
securityGroupSelectorTerms:
tags:
Name: ${module.eks.cluster_name}-node
blockDevice:
deviceName: /dev/xvda
volumeSize: 200Gi
volumeType: gp3
encrypted: true
deleteOnTermination: true
nodePool:
labels:
- provisioner: default
- instanceType: mixed-x86
- provisionerType: Karpenter
- workload: rayhead
requirements:
- key: "karpenter.k8s.aws/instance-family"
Expand All @@ -355,6 +389,13 @@ module "eks_data_addons" {
- key: "karpenter.sh/capacity-type"
operator: In
values: ["spot", "on-demand"]
limits:
cpu: 1000
disruption:
consolidationPolicy: WhenEmpty
consolidateAfter: 30s
expireAfter: 720h
weight: 100
EOT
]
}
Expand Down Expand Up @@ -395,7 +436,7 @@ resource "random_password" "grafana" {

#tfsec:ignore:aws-ssm-secret-use-customer-key
resource "aws_secretsmanager_secret" "grafana" {
name = "${local.name}-oss-grafana"
name_prefix = "${local.name}-oss-grafana"
recovery_window_in_days = 0 # Set to zero for this example to force delete during Terraform destroy
}

Expand All @@ -420,7 +461,7 @@ module "s3_bucket" {
# MPI Operator for distributed training on Trainium
#---------------------------------------------------------------
data "http" "mpi_operator_yaml" {
url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/${var.mpi_operator_version}/deploy/v2beta1/mpi-operator.yaml"
url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/v0.4.0/deploy/v2beta1/mpi-operator.yaml"
}

data "kubectl_file_documents" "mpi_operator_yaml" {
Expand Down
56 changes: 37 additions & 19 deletions ai-ml/trainium-inferentia/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ module "eks" {
labels = {
WorkerType = "ON_DEMAND"
NodeGroupType = "core"
workload = "rayhead"
}

tags = merge(local.tags, {
Expand Down Expand Up @@ -488,16 +489,14 @@ module "eks" {
inf2-24xl-ng = {
name = "inf2-24xl-ng"
description = "inf2 24xl node group for ML inference workloads"
# The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
# The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
]
# We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
# module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
subnet_ids = [module.vpc.private_subnets[2]]

# aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
# ami_id = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
ami_type = "AL2_x86_64_GPU"
capacity_type = "SPOT"
capacity_type = "ON_DEMAND" # Use SPOT for Spot instances
instance_types = ["inf2.24xlarge"]

pre_bootstrap_user_data = <<-EOT
Expand All @@ -511,41 +510,50 @@ module "eks" {
desired_size = var.inf2_24xl_desired_size

labels = {
instance-type = "inf2"
provisioner = "cluster-autoscaler"
instanceType = "inf2-24xl"
provisionerType = "cluster-autoscaler"
}

block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 500
volume_type = "gp3"
}
}
}

taints = [
{
key = "aws.amazon.com/neuron",
value = true,
value = "true",
effect = "NO_SCHEDULE"
},
{
key = "aws.amazon.com/neuroncore",
value = true,
value = "true",
effect = "NO_SCHEDULE"
},
]

tags = merge(local.tags, {
Name = "inf2-ng1",
Name = "inf2-24xl-ng",
"karpenter.sh/discovery" = local.name
})
}

inf2-48xl-ng = {
name = "inf2-48xl-ng"
description = "inf2 48x large node group for ML inference workloads"
# The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
# The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
]
# We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
# module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
subnet_ids = [module.vpc.private_subnets[2]]

# aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
# ami_id = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
ami_type = "AL2_x86_64_GPU"
capacity_type = "SPOT"
capacity_type = "ON_DEMAND" # Use SPOT for Spot instances
instance_types = ["inf2.48xlarge"]

pre_bootstrap_user_data = <<-EOT
Expand All @@ -554,13 +562,23 @@ module "eks" {
export PATH=/opt/aws/neuron/bin:$PATH
EOT

block_device_mappings = {
xvda = {
device_name = "/dev/xvda"
ebs = {
volume_size = 500
volume_type = "gp3"
}
}
}

min_size = var.inf2_48xl_min_size
max_size = 2
desired_size = var.inf2_48xl_desired_size

labels = {
instance-type = "inf2-48xl"
provisioner = "cluster-autoscaler"
instanceType = "inf2-48xl"
provisionerType = "cluster-autoscaler"
}

taints = [
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Steps to Deploy Gradio on Your Mac

## Pre-requisites
Deploy the `trainium-inferentia` blueprint using this [link](https://awslabs.github.io/data-on-eks/docs/blueprints/ai-ml/trainium)

## Step 1: Execute Port Forward to the StableDiffusion Ray Service
First, execute a port forward to the StableDiffusion Ray Service using kubectl:

```bash
kubectl -n stablediffusion port-forward svc/stablediffusion-service 8000:8000
```

## Step 2: Deploy Gradio WebUI Locally

### 2.1. Create a Virtual Environment
Create a virtual environment for the Gradio application:

```bash
cd ai-ml/trainium-inferentia/examples/gradio-ui
python3 -m venv .venv
source .venv/bin/activate
```
### 2.2. Install Gradio WebUI app

Install all the Gradio WebUI app dependencies with pip

```bash
pip install gradio requests
```

### 2.3. Invoke the WebUI
Run the Gradio WebUI using the following command:

NOTE: `gradio-app-stablediffusion.py` refers to the port forward url. e.g., `service_name = "http://localhost:8000" `

```bash
python gradio-app-stablediffusion.py
```

You should see output similar to the following:
```text
Running on local URL: http://127.0.0.1:7860
To create a public link, set `share=True` in `launch()`.
```

### 2.4. Access the WebUI from Your Browser
Open your web browser and access the Gradio WebUI by navigating to the following URL:

http://127.0.0.1:7860

![gradio-sd](gradio-app-stable-diffusion-xl.png)

You should now be able to interact with the Gradio application from your local machine.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import gradio as gr
import requests
import json
from PIL import Image
from io import BytesIO

# Constants for model endpoint and service name
model_endpoint = "/imagine"
# service_name = "http://<REPLACE_ME_WITH_ELB_DNS_NAME>/serve"
service_name = "http://localhost:8000" # Replace with your actual service name


# Function to generate image based on prompt
def generate_image(prompt):

# Create the URL for the inference
url = f"{service_name}{model_endpoint}"

try:
# Send the request to the model service
response = requests.get(url, params={"prompt": prompt}, timeout=180)
response.raise_for_status() # Raise an exception for HTTP errors
i = Image.open(BytesIO(response.content))
return i

except requests.exceptions.RequestException as e:
# Handle any request exceptions (e.g., connection errors)
return f"AI: Error: {str(e)}"

# Define the Gradio PromptInterface
demo = gr.Interface(fn=generate_image,
inputs = [gr.Textbox(label="Enter the Prompt")],
outputs = gr.Image(type='pil')).launch(debug='True')
Loading

0 comments on commit 8d3515d

Please sign in to comment.