diff --git a/.gitignore b/.gitignore
index 2f1732ce9..5a15f96df 100755
--- a/.gitignore
+++ b/.gitignore
@@ -49,3 +49,5 @@ site
 
 # Checks
 .tfsec
+
+examples/gradio-ui/*
diff --git a/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml b/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml
index 22e48c7f9..c8b1a5d74 100644
--- a/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml
+++ b/ai-ml/mlflow/helm-values/ingress-nginx-values.yaml
@@ -5,7 +5,7 @@ controller:
       service.beta.kubernetes.io/aws-load-balancer-type: external
       service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
       service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
-      service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing # Private Load Balancer can only be accessed within the VPC
+      service.beta.kubernetes.io/aws-load-balancer-scheme: internal # Private Load Balancer can only be accessed within the VPC
     targetPorts:
       http: http
       https: http
diff --git a/ai-ml/trainium-inferentia/addons.tf b/ai-ml/trainium-inferentia/addons.tf
index ccffe0c8d..6bf894f84 100644
--- a/ai-ml/trainium-inferentia/addons.tf
+++ b/ai-ml/trainium-inferentia/addons.tf
@@ -15,7 +15,7 @@ resource "kubernetes_annotations" "disable_gp2" {
   depends_on = [module.eks.eks_cluster_id]
 }
 
-resource "kubernetes_storage_class" "default_gp3" {
+resource "kubernetes_storage_class_v1" "default_gp3" {
   metadata {
     name = "gp3"
     annotations = {
@@ -138,6 +138,7 @@ module "eks_blueprints_addons" {
     }
   }
   karpenter = {
+    chart_version       = "v0.34.0"
     repository_username = data.aws_ecrpublic_authorization_token.token.user_name
     repository_password = data.aws_ecrpublic_authorization_token.token.password
   }
@@ -199,13 +200,15 @@ module "eks_blueprints_addons" {
   kube_prometheus_stack = {
     values = [
       var.enable_amazon_prometheus ? templatefile("${path.module}/helm-values/kube-prometheus-amp-enable.yaml", {
+        storage_class_type  = kubernetes_storage_class_v1.default_gp3.id
         region              = local.region
         amp_sa              = local.amp_ingest_service_account
         amp_irsa            = module.amp_ingest_irsa[0].iam_role_arn
         amp_remotewrite_url = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}/api/v1/remote_write"
         amp_url             = "https://aps-workspaces.${local.region}.amazonaws.com/workspaces/${aws_prometheus_workspace.amp[0].id}"
-        storage_class_type  = kubernetes_storage_class.default_gp3.id
-      }) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {})
+        }) : templatefile("${path.module}/helm-values/kube-prometheus.yaml", {
+        storage_class_type = kubernetes_storage_class_v1.default_gp3.id
+      })
     ]
     chart_version = "48.1.1"
     set_sensitive = [
@@ -248,14 +251,15 @@ module "eks_blueprints_addons" {
 #---------------------------------------------------------------
 module "eks_data_addons" {
   source  = "aws-ia/eks-data-addons/aws"
-  version = "~> 1.2.9" # ensure to update this to the latest/desired version
+  version = "~> 1.30" # ensure to update this to the latest/desired version
 
   oidc_provider_arn = module.eks.oidc_provider_arn
 
   enable_aws_neuron_device_plugin  = true
   enable_aws_efa_k8s_device_plugin = true
   #---------------------------------------
-  # Volcano Scheduler for TorchX
+  # Volcano Scheduler for TorchX used in BERT-Large distributed training example
+  # Volcano is also a default scheduler for KubeRay Operator
   #---------------------------------------
   enable_volcano = true
 
@@ -274,15 +278,24 @@ module "eks_data_addons" {
     ]
   }
 
-  enable_jupyterhub = true
+  #---------------------------------------
+  # JupyterHub Addon
+  #---------------------------------------
+  enable_jupyterhub = var.enable_jupyterhub
   jupyterhub_helm_config = {
     values = [
       templatefile("${path.module}/helm-values/jupyterhub-values.yaml", {
-        jupyter_single_user_sa_name = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
+        jupyter_single_user_sa_name = "${module.eks.cluster_name}-jupyterhub-single-user"
       })
     ]
   }
+
+  #---------------------------------------
+  # Deploying Karpenter resources(Nodepool and NodeClass) with Helm Chart
+  #---------------------------------------
   enable_karpenter_resources = true
+  # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+  #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
   karpenter_resources_helm_config = {
     inferentia-inf2 = {
       values = [
@@ -292,13 +305,20 @@ module "eks_data_addons" {
       ec2NodeClass:
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
-          id: ${module.vpc.private_subnets[3]}
+          id: ${module.vpc.private_subnets[2]}
         securityGroupSelectorTerms:
           tags:
             Name: ${module.eks.cluster_name}-node
+        blockDevice:
+          deviceName: /dev/xvda
+          volumeSize: 500Gi
+          volumeType: gp3
+          encrypted: true
+          deleteOnTermination: true
       nodePool:
         labels:
-          - provisioner: inferentia-inf2
+          - instanceType: inferentia-inf2
+          - provisionerType: Karpenter
           - hub.jupyter.org/node-purpose: user
         taints:
           - key: aws.amazon.com/neuroncore
@@ -324,6 +344,13 @@ module "eks_data_addons" {
           - key: "karpenter.sh/capacity-type"
             operator: In
             values: ["spot", "on-demand"]
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 100
       EOT
       ]
     }
@@ -334,13 +361,20 @@ module "eks_data_addons" {
       ec2NodeClass:
         karpenterRole: ${split("/", module.eks_blueprints_addons.karpenter.node_iam_role_arn)[1]}
         subnetSelectorTerms:
-          id: ${module.vpc.private_subnets[3]}
+          id: ${module.vpc.private_subnets[2]}
         securityGroupSelectorTerms:
           tags:
             Name: ${module.eks.cluster_name}-node
+          blockDevice:
+            deviceName: /dev/xvda
+            volumeSize: 200Gi
+            volumeType: gp3
+            encrypted: true
+            deleteOnTermination: true
       nodePool:
         labels:
-          - provisioner: default
+          - instanceType: mixed-x86
+          - provisionerType: Karpenter
           - workload: rayhead
         requirements:
           - key: "karpenter.k8s.aws/instance-family"
@@ -355,6 +389,13 @@ module "eks_data_addons" {
           - key: "karpenter.sh/capacity-type"
             operator: In
             values: ["spot", "on-demand"]
+        limits:
+          cpu: 1000
+        disruption:
+          consolidationPolicy: WhenEmpty
+          consolidateAfter: 30s
+          expireAfter: 720h
+        weight: 100
       EOT
       ]
     }
@@ -395,7 +436,7 @@ resource "random_password" "grafana" {
 
 #tfsec:ignore:aws-ssm-secret-use-customer-key
 resource "aws_secretsmanager_secret" "grafana" {
-  name                    = "${local.name}-oss-grafana"
+  name_prefix             = "${local.name}-oss-grafana"
   recovery_window_in_days = 0 # Set to zero for this example to force delete during Terraform destroy
 }
 
@@ -420,7 +461,7 @@ module "s3_bucket" {
 # MPI Operator for distributed training on Trainium
 #---------------------------------------------------------------
 data "http" "mpi_operator_yaml" {
-  url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/${var.mpi_operator_version}/deploy/v2beta1/mpi-operator.yaml"
+  url = "https://raw.githubusercontent.com/kubeflow/mpi-operator/v0.4.0/deploy/v2beta1/mpi-operator.yaml"
 }
 
 data "kubectl_file_documents" "mpi_operator_yaml" {
diff --git a/ai-ml/trainium-inferentia/eks.tf b/ai-ml/trainium-inferentia/eks.tf
index 621ca8fc8..0aede3240 100644
--- a/ai-ml/trainium-inferentia/eks.tf
+++ b/ai-ml/trainium-inferentia/eks.tf
@@ -121,6 +121,7 @@ module "eks" {
       labels = {
         WorkerType    = "ON_DEMAND"
         NodeGroupType = "core"
+        workload      = "rayhead"
       }
 
       tags = merge(local.tags, {
@@ -488,16 +489,14 @@ module "eks" {
     inf2-24xl-ng = {
       name        = "inf2-24xl-ng"
       description = "inf2 24xl node group for ML inference workloads"
-      # The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
-      # The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
-      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
-        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
-      ]
+      # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+      #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
+      subnet_ids = [module.vpc.private_subnets[2]]
 
       # aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
       # ami_id   = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
       ami_type       = "AL2_x86_64_GPU"
-      capacity_type  = "SPOT"
+      capacity_type  = "ON_DEMAND" # Use SPOT for Spot instances
       instance_types = ["inf2.24xlarge"]
 
       pre_bootstrap_user_data = <<-EOT
@@ -511,41 +510,50 @@ module "eks" {
       desired_size = var.inf2_24xl_desired_size
 
       labels = {
-        instance-type = "inf2"
-        provisioner   = "cluster-autoscaler"
+        instanceType    = "inf2-24xl"
+        provisionerType = "cluster-autoscaler"
+      }
+
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size = 500
+            volume_type = "gp3"
+          }
+        }
       }
 
       taints = [
         {
           key    = "aws.amazon.com/neuron",
-          value  = true,
+          value  = "true",
           effect = "NO_SCHEDULE"
         },
         {
           key    = "aws.amazon.com/neuroncore",
-          value  = true,
+          value  = "true",
           effect = "NO_SCHEDULE"
         },
       ]
 
       tags = merge(local.tags, {
-        Name                     = "inf2-ng1",
+        Name                     = "inf2-24xl-ng",
         "karpenter.sh/discovery" = local.name
       })
     }
+
     inf2-48xl-ng = {
       name        = "inf2-48xl-ng"
       description = "inf2 48x large node group for ML inference workloads"
-      # The code filters the private subnets based on their CIDR blocks and selects the subnet ID if the CIDR block starts with "100." Otherwise, it assigns a null value.
-      # The element(compact([...]), 0) expression ensures that only the first non-null value is included in the resulting list of subnet IDs.
-      subnet_ids = [element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) :
-        substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), 0)
-      ]
+      # We use index 2 to select the subnet in AZ1 with the 100.x CIDR:
+      #   module.vpc.private_subnets = [AZ1_10.x, AZ2_10.x, AZ1_100.x, AZ2_100.x]
+      subnet_ids = [module.vpc.private_subnets[2]]
 
       # aws ssm get-parameters --names /aws/service/eks/optimized-ami/1.27/amazon-linux-2-gpu/recommended/image_id --region us-west-2
       # ami_id   = "ami-0e0deb7ae582f6fe9" # Use this to pass custom AMI ID and ignore ami_type
       ami_type       = "AL2_x86_64_GPU"
-      capacity_type  = "SPOT"
+      capacity_type  = "ON_DEMAND" # Use SPOT for Spot instances
       instance_types = ["inf2.48xlarge"]
 
       pre_bootstrap_user_data = <<-EOT
@@ -554,13 +562,23 @@ module "eks" {
         export PATH=/opt/aws/neuron/bin:$PATH
       EOT
 
+      block_device_mappings = {
+        xvda = {
+          device_name = "/dev/xvda"
+          ebs = {
+            volume_size = 500
+            volume_type = "gp3"
+          }
+        }
+      }
+
       min_size     = var.inf2_48xl_min_size
       max_size     = 2
       desired_size = var.inf2_48xl_desired_size
 
       labels = {
-        instance-type = "inf2-48xl"
-        provisioner   = "cluster-autoscaler"
+        instanceType    = "inf2-48xl"
+        provisionerType = "cluster-autoscaler"
       }
 
       taints = [
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md b/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md
new file mode 100644
index 000000000..1189a337a
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md
@@ -0,0 +1,54 @@
+# Steps to Deploy Gradio on Your Mac
+
+## Pre-requisites
+Deploy the `trainium-inferentia` blueprint using this [link](https://awslabs.github.io/data-on-eks/docs/blueprints/ai-ml/trainium)
+
+## Step 1: Execute Port Forward to the StableDiffusion Ray Service
+First, execute a port forward to the StableDiffusion Ray Service using kubectl:
+
+```bash
+kubectl -n stablediffusion port-forward svc/stablediffusion-service 8000:8000
+```
+
+## Step 2: Deploy Gradio WebUI Locally
+
+### 2.1. Create a Virtual Environment
+Create a virtual environment for the Gradio application:
+
+```bash
+cd ai-ml/trainium-inferentia/examples/gradio-ui
+python3 -m venv .venv
+source .venv/bin/activate
+```
+### 2.2. Install Gradio WebUI app
+
+Install all the Gradio WebUI app dependencies with pip
+
+```bash
+pip install gradio requests
+```
+
+### 2.3. Invoke the WebUI
+Run the Gradio WebUI using the following command:
+
+NOTE: `gradio-app-stablediffusion.py` refers to the port forward url. e.g., `service_name = "http://localhost:8000" `
+
+```bash
+python gradio-app-stablediffusion.py
+```
+
+You should see output similar to the following:
+```text
+Running on local URL:  http://127.0.0.1:7860
+
+To create a public link, set `share=True` in `launch()`.
+```
+
+### 2.4. Access the WebUI from Your Browser
+Open your web browser and access the Gradio WebUI by navigating to the following URL:
+
+http://127.0.0.1:7860
+
+![gradio-sd](gradio-app-stable-diffusion-xl.png)
+
+You should now be able to interact with the Gradio application from your local machine.
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png
new file mode 100644
index 000000000..9576241fe
Binary files /dev/null and b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png differ
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stablediffusion.py b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stablediffusion.py
new file mode 100644
index 000000000..5fa377158
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stablediffusion.py
@@ -0,0 +1,33 @@
+import gradio as gr
+import requests
+import json
+from PIL import Image
+from io import BytesIO
+
+# Constants for model endpoint and service name
+model_endpoint = "/imagine"
+# service_name = "http://<REPLACE_ME_WITH_ELB_DNS_NAME>/serve"
+service_name = "http://localhost:8000"  # Replace with your actual service name
+
+
+# Function to generate image based on prompt
+def generate_image(prompt):
+
+    # Create the URL for the inference
+    url = f"{service_name}{model_endpoint}"
+
+    try:
+        # Send the request to the model service
+        response = requests.get(url, params={"prompt": prompt}, timeout=180)
+        response.raise_for_status()  # Raise an exception for HTTP errors
+        i = Image.open(BytesIO(response.content))
+        return i
+
+    except requests.exceptions.RequestException as e:
+        # Handle any request exceptions (e.g., connection errors)
+        return f"AI: Error: {str(e)}"
+
+# Define the Gradio PromptInterface
+demo = gr.Interface(fn=generate_image,
+                    inputs = [gr.Textbox(label="Enter the Prompt")],
+                    outputs = gr.Image(type='pil')).launch(debug='True')
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/llama2-inf2/ray-service-llama2.yaml b/ai-ml/trainium-inferentia/examples/ray-serve/llama2-inf2/ray-service-llama2.yaml
index e751c2e4c..05b409d43 100644
--- a/ai-ml/trainium-inferentia/examples/ray-serve/llama2-inf2/ray-service-llama2.yaml
+++ b/ai-ml/trainium-inferentia/examples/ray-serve/llama2-inf2/ray-service-llama2.yaml
@@ -57,8 +57,9 @@ spec:
                 requests:
                   cpu: 4
                   memory: 20Gi
-          nodeSelector:
-            provisioner: default
+          nodeSelector: # This is using Karpenter Nodes with the provisioner label
+            instanceType: mixed-x86
+            provisionerType: Karpenter
             workload: rayhead
           volumes:
             - name: ray-logs
@@ -90,14 +91,15 @@ spec:
                     memory: "700G"
                     aws.amazon.com/neuron: "12"
             nodeSelector:
-              provisioner: inferentia-inf2
+              instanceType: inferentia-inf2
+              provisionerType: Karpenter
             tolerations:
-              - key: aws.amazon.com/neuroncore
-                operator: Exists
-                effect: NoSchedule
-              - key: aws.amazon.com/neuron
-                operator: Exists
-                effect: NoSchedule
+              - key: "aws.amazon.com/neuroncore"
+                operator: "Exists"
+                effect: "NoSchedule"
+              - key: "aws.amazon.com/neuron"
+                operator: "Exists"
+                effect: "NoSchedule"
               - key: "hub.jupyter.org/dedicated"
                 operator: "Equal"
                 value: "user"
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/Dockerfile b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/Dockerfile
new file mode 100644
index 000000000..cf51d2fb1
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/Dockerfile
@@ -0,0 +1,37 @@
+FROM rayproject/ray:2.7.1-py310
+
+# Maintainer label
+LABEL maintainer="DoEKS"
+
+# Set environment variables to non-interactive (this prevents some prompts)
+ENV DEBIAN_FRONTEND=non-interactive
+
+# Switch to root to add Neuron repo and install necessary packages
+USER root
+
+# Set up the Neuron repository and install Neuron packages
+RUN . /etc/os-release && \
+    sudo echo "deb https://apt.repos.neuron.amazonaws.com ${VERSION_CODENAME} main" > /etc/apt/sources.list.d/neuron.list && \
+    sudo wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - && \
+    sudo apt-get update -y && \
+    sudo apt-get install aws-neuronx-collectives=2.* aws-neuronx-runtime-lib=2.* aws-neuronx-tools=2.* -y && \
+    sudo apt-get clean
+
+# Switch back to a non-root user for the subsequent commands
+USER $USER
+
+# Set pip repository pointing to the Neuron repository and install required Python packages
+RUN pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com && \
+    pip install neuronx-cc==2.11.0.34 torch-neuronx==1.13.1.1.12.0 torchvision transformers-neuronx sentencepiece transformers && \
+    pip install optimum[neuronx] && \
+    pip install optimum[diffusers] && \
+    pip install optimum-neuron==0.0.13 diffusers==0.21.4 transformers==4.36.0 && \
+    pip install starlette==0.34.0
+
+
+# Add Neuron path to PATH
+ENV PATH /opt/aws/neuron/bin:$PATH
+
+WORKDIR /serve_app
+
+COPY ray_serve_stablediffusion.py /serve_app/ray_serve_stablediffusion.py
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/README.md b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/README.md
new file mode 100644
index 000000000..fd072838d
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/README.md
@@ -0,0 +1,42 @@
+# How to deploy StableDiffusion on Inferentia2 and EKS
+
+## Pre-requisites
+Deploy the `trainium-inferentia` blueprint using this [link](https://awslabs.github.io/data-on-eks/docs/blueprints/ai-ml/trainium)
+
+## Step 1: Deploy RayServe Cluster
+
+To deploy the RayServe cluster with `StableDiffusion` LLM on `Inf2.24xlarge` instance, run the following command:
+
+**IMPORTANT NOTE: RAY MODEL DEPLOYMENT CAN TAKE UPTO 8 TO 10 MINS**
+
+```bash
+cd data-on-eks/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2
+kubectl apply -f ray-service-stablediffusion.yaml
+```
+
+This will deploy a RayServe cluster with a `inf2.24xlarge` instances. The `Stable-Diffusion-xl` LLM will be loaded on the instance and will be available to serve inference requests.
+
+Once the RayServe cluster is deployed, you can start sending inference requests to it. To do this, you can use the following steps:
+
+Get the NLB DNS Name address of the RayServe cluster. You can do this by running the following command:
+
+```bash
+kubectl get ingress -n stablediffusion
+```
+
+Now, you can access the Ray Dashboard from the URL Below
+
+    http://<NLB_DNS_NAME>/dashboard/#/serve
+
+## Step 2: To Test the StableDiffusion Model
+
+To test the StableDiffusion-xl model, you can use the following command with a prompt added at the end of the URL to generate an image.
+
+This uses the GET method to get the response:
+
+`http://<NLB_DNS_NAME>/serve/imagine?prompt=an astronaut is dancing on green grass, sunlit`
+
+
+You will see an output like this in your browser:
+
+![generated_image](stable-diffusion-xl-prompt_3.png)
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml
new file mode 100644
index 000000000..42d1e2f9a
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray-service-stablediffusion.yaml
@@ -0,0 +1,137 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: stablediffusion
+
+---
+apiVersion: ray.io/v1alpha1
+kind: RayService
+metadata:
+  name: stablediffusion-service
+  namespace: stablediffusion
+spec:
+  serviceUnhealthySecondThreshold: 900
+  deploymentUnhealthySecondThreshold: 300
+  serveConfig:
+    importPath: ray_serve_stablediffusion:entrypoint # Specify the correct path to your Python script
+    runtimeEnv: |
+      env_vars: {"MODEL_ID": "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024"}  # Replace with the appropriate model ID
+
+  rayClusterConfig:
+    rayVersion: '2.7.1'
+    headGroupSpec:
+      serviceType: NodePort
+      headService:
+        metadata:
+          name: stablediffusion-service
+          namespace: stablediffusion
+      rayStartParams:
+        dashboard-host: '0.0.0.0'
+      template:
+        spec:
+          containers:
+          - name: ray-head
+            image: public.ecr.aws/data-on-eks/ray2.7.1-py310-stablediffusion-neuron:latest # Image created using the Dockerfile attached in the folder
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            ports:
+            - containerPort: 6379
+              name: gcs
+            - containerPort: 8265
+              name: dashboard
+            - containerPort: 10001
+              name: client
+            - containerPort: 8000
+              name: serve
+            volumeMounts:
+            - mountPath: /tmp/ray
+              name: ray-logs
+            resources:
+              limits:
+                cpu: "2"
+                memory: "20G"
+              requests:
+                cpu: "2"
+                memory: "20G"
+          nodeSelector:
+            instanceType: mixed-x86
+            provisionerType: Karpenter
+            workload: rayhead
+          volumes:
+          - name: ray-logs
+            emptyDir: {}
+
+    workerGroupSpecs:
+    - groupName: inf2-worker-group
+      replicas: 1
+      minReplicas: 1
+      maxReplicas: 1
+      rayStartParams: {}
+      template:
+        spec:
+          containers:
+          - name: ray-worker
+            image: public.ecr.aws/data-on-eks/ray2.7.1-py310-stablediffusion-neuron:latest # Image created using the Dockerfile attached in the folder
+            imagePullPolicy: Always # Ensure the image is always pulled when updated
+            lifecycle:
+              preStop:
+                exec:
+                  command: ["/bin/sh", "-c", "ray stop"]
+            # We are using 2 Neuron cores per HTTP request hence this configuration handles 6 requests per second
+            resources:
+              limits:
+                cpu: "90" # All vCPUs of inf2.24xlarge; 6vCPU daemonset overhead
+                memory: "360G" # All memory of inf2.24xlarge; 24G for daemonset overhead
+                aws.amazon.com/neuron: "6" # All Neuron cores of inf2.24xlarge
+              requests:
+                cpu: "45" # Half of vCPUs for reservation for inf2.24xlarge; leaving 3 vCPUs for daemonset overhead
+                memory: "180G" # Half of memory for reservation for inf2.24xlarge; leaving 12G for daemonset overhead
+                # Set maximum neuron core available to the instance to acocomodate multiple requests to leverage all the neuron cores
+                # You cannot run multiple pods of the model on the same instance unless you shard the model
+                aws.amazon.com/neuron: "6" # All Neuron cores of inf2.24xlarge
+          nodeSelector:
+            instanceType: inferentia-inf2
+            provisionerType: Karpenter
+          tolerations:
+          - key: "aws.amazon.com/neuroncore"
+            operator: "Exists"
+            effect: "NoSchedule"
+          - key: "aws.amazon.com/neuron"
+            operator: "Exists"
+            effect: "NoSchedule"
+          - key: "hub.jupyter.org/dedicated"
+            operator: "Equal"
+            value: "user"
+            effect: "NoSchedule"
+---
+apiVersion: networking.k8s.io/v1
+kind: Ingress
+metadata:
+  name: stablediffusion-ingress
+  namespace: stablediffusion
+  annotations:
+    nginx.ingress.kubernetes.io/rewrite-target: "/$1"
+spec:
+  ingressClassName: nginx
+  rules:
+  - http:
+      paths:
+      # Ray Dashboard
+      - path: /dashboard/(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: stablediffusion-service
+            port:
+              number: 8265
+      # Ray Serve
+      - path: /serve/(.*)
+        pathType: ImplementationSpecific
+        backend:
+          service:
+            name: stablediffusion-service
+            port:
+              number: 8000
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray_serve_stablediffusion.py b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray_serve_stablediffusion.py
new file mode 100644
index 000000000..c312d92c7
--- /dev/null
+++ b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/ray_serve_stablediffusion.py
@@ -0,0 +1,55 @@
+from io import BytesIO
+from fastapi import FastAPI
+from fastapi.responses import Response
+import os
+import base64
+
+from ray import serve
+
+app = FastAPI()
+
+neuron_cores = 2
+
+@serve.deployment(num_replicas=1, route_prefix="/")
+@serve.ingress(app)
+class APIIngress:
+    def __init__(self, diffusion_model_handle) -> None:
+        self.handle = diffusion_model_handle
+
+    @app.get(
+        "/imagine",
+        responses={200: {"content": {"image/png": {}}}},
+        response_class=Response,
+    )
+    async def generate(self, prompt: str):
+
+        image_ref = await self.handle.generate.remote(prompt)
+        image = await image_ref
+        file_stream = BytesIO()
+        image.save(file_stream, "PNG")
+        return Response(content=file_stream.getvalue(), media_type="image/png")
+
+
+@serve.deployment(
+    ray_actor_options={
+        "resources": {"neuron_cores": neuron_cores},
+        "runtime_env": {"env_vars": {"NEURON_CC_FLAGS": "-O1"}},
+    },
+    autoscaling_config={"min_replicas": 1, "max_replicas": 1},
+)
+class StableDiffusionV2:
+    def __init__(self):
+        from optimum.neuron import NeuronStableDiffusionXLPipeline
+
+        compiled_model_id = "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024"
+
+        # To avoid saving the model locally, we can use the pre-compiled model directly from HF
+        self.pipe = NeuronStableDiffusionXLPipeline.from_pretrained(compiled_model_id, device_ids=[0, 1])
+
+    async def generate(self, prompt: str):
+
+        assert len(prompt), "prompt parameter cannot be empty"
+        image = self.pipe(prompt).images[0]
+        return image
+
+entrypoint = APIIngress.bind(StableDiffusionV2.bind())
diff --git a/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/stable-diffusion-xl-prompt_3.png b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/stable-diffusion-xl-prompt_3.png
new file mode 100644
index 000000000..e0da1f3e6
Binary files /dev/null and b/ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2/stable-diffusion-xl-prompt_3.png differ
diff --git a/ai-ml/trainium-inferentia/helm-values/ingress-nginx-values.yaml b/ai-ml/trainium-inferentia/helm-values/ingress-nginx-values.yaml
index 22e48c7f9..10ae9bfc2 100644
--- a/ai-ml/trainium-inferentia/helm-values/ingress-nginx-values.yaml
+++ b/ai-ml/trainium-inferentia/helm-values/ingress-nginx-values.yaml
@@ -2,10 +2,10 @@ controller:
   service:
     externalTrafficPolicy: "Local"
     annotations:
-      service.beta.kubernetes.io/aws-load-balancer-type: external
+      service.beta.kubernetes.io/aws-load-balancer-type: external # nlb-ip or external
       service.beta.kubernetes.io/aws-load-balancer-nlb-target-type: ip
       service.beta.kubernetes.io/aws-load-balancer-backend-protocol: http
-      service.beta.kubernetes.io/aws-load-balancer-scheme: internet-facing # Private Load Balancer can only be accessed within the VPC
+      service.beta.kubernetes.io/aws-load-balancer-scheme: internal # Valid values are internal, internet-facing
     targetPorts:
       http: http
       https: http
diff --git a/ai-ml/trainium-inferentia/install.sh b/ai-ml/trainium-inferentia/install.sh
index b87db5117..7cf09a27e 100755
--- a/ai-ml/trainium-inferentia/install.sh
+++ b/ai-ml/trainium-inferentia/install.sh
@@ -1,9 +1,29 @@
+
+#--------------------------------------------------------------
+# Llama2 Distributed Training pre-requisites
+#--------------------------------------------------------------
+# export TF_VAR_enable_mpi_operator=true
+# export TF_VAR_trn1_32xl_min_size=4
+# export TF_VAR_trn1_32xl_desired_size=4
+#--------------------------------------------------------------
+
+#--------------------------------------------------------------
+# BERT-large Distributed Training pre-requisites
+#--------------------------------------------------------------
+# export TF_VAR_enable_volcano=true
+# export TF_VAR_trn1_32xl_min_size=2
+# export TF_VAR_trn1_32xl_desired_size=2
+#--------------------------------------------------------------
+
 #!/bin/bash
 
 echo "Initializing ..."
+
 terraform init || echo "\"terraform init\" failed"
 
+#-------------------------------------------------------------------------
 # List of Terraform modules to apply in sequence
+#-------------------------------------------------------------------------
 targets=(
   "module.vpc"
   "module.eks"
@@ -31,3 +51,5 @@ else
   echo "FAILED: Terraform apply of all modules failed"
   exit 1
 fi
+
+##-------------------------------------------------------------------------
diff --git a/ai-ml/trainium-inferentia/jupyterhub.tf b/ai-ml/trainium-inferentia/jupyterhub.tf
index 916c3dfb2..8d2754597 100644
--- a/ai-ml/trainium-inferentia/jupyterhub.tf
+++ b/ai-ml/trainium-inferentia/jupyterhub.tf
@@ -1,13 +1,17 @@
 #-----------------------------------------------------------------------------------------
 # JupyterHub Single User IRSA, maybe that block could be incorporated in add-on registry
 #-----------------------------------------------------------------------------------------
-resource "kubernetes_namespace" "jupyterhub" {
+resource "kubernetes_namespace_v1" "jupyterhub" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   metadata {
     name = "jupyterhub"
   }
 }
 
 module "jupyterhub_single_user_irsa" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   source = "terraform-aws-modules/iam/aws//modules/iam-role-for-service-accounts-eks"
 
   role_name = "${module.eks.cluster_name}-jupyterhub-single-user-sa"
@@ -19,28 +23,32 @@ module "jupyterhub_single_user_irsa" {
   oidc_providers = {
     main = {
       provider_arn               = module.eks.oidc_provider_arn
-      namespace_service_accounts = ["${kubernetes_namespace.jupyterhub.metadata[0].name}:jupyterhub-single-user"]
+      namespace_service_accounts = ["${kubernetes_namespace_v1.jupyterhub[0].metadata[0].name}:jupyterhub-single-user"]
     }
   }
 }
 
 resource "kubernetes_service_account_v1" "jupyterhub_single_user_sa" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   metadata {
     name        = "${module.eks.cluster_name}-jupyterhub-single-user"
-    namespace   = kubernetes_namespace.jupyterhub.metadata[0].name
-    annotations = { "eks.amazonaws.com/role-arn" : module.jupyterhub_single_user_irsa.iam_role_arn }
+    namespace   = kubernetes_namespace_v1.jupyterhub[0].metadata[0].name
+    annotations = { "eks.amazonaws.com/role-arn" : module.jupyterhub_single_user_irsa[0].iam_role_arn }
   }
 
   automount_service_account_token = true
 }
 
 resource "kubernetes_secret_v1" "jupyterhub_single_user" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   metadata {
     name      = "${module.eks.cluster_name}-jupyterhub-single-user-secret"
-    namespace = kubernetes_namespace.jupyterhub.metadata[0].name
+    namespace = kubernetes_namespace_v1.jupyterhub[0].metadata[0].name
     annotations = {
-      "kubernetes.io/service-account.name"      = kubernetes_service_account_v1.jupyterhub_single_user_sa.metadata[0].name
-      "kubernetes.io/service-account.namespace" = kubernetes_namespace.jupyterhub.metadata[0].name
+      "kubernetes.io/service-account.name"      = kubernetes_service_account_v1.jupyterhub_single_user_sa[0].metadata[0].name
+      "kubernetes.io/service-account.namespace" = kubernetes_namespace_v1.jupyterhub[0].metadata[0].name
     }
   }
 
@@ -52,6 +60,8 @@ resource "kubernetes_secret_v1" "jupyterhub_single_user" {
 # This will be replaced with Dynamic EFS provision using EFS CSI Driver
 #---------------------------------------------------------------
 resource "aws_efs_file_system" "efs" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   creation_token = "efs-jupyter-single-user"
   encrypted      = true
 
@@ -59,14 +69,16 @@ resource "aws_efs_file_system" "efs" {
 }
 
 resource "aws_efs_mount_target" "efs_mt" {
-  count = length(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]))
+  count = var.enable_jupyterhub ? 1 : 0
 
-  file_system_id  = aws_efs_file_system.efs.id
-  subnet_id       = element(compact([for subnet_id, cidr_block in zipmap(module.vpc.private_subnets, module.vpc.private_subnets_cidr_blocks) : substr(cidr_block, 0, 4) == "100." ? subnet_id : null]), count.index)
-  security_groups = [aws_security_group.efs.id]
+  file_system_id  = aws_efs_file_system.efs[0].id
+  subnet_id       = module.vpc.private_subnets[2]
+  security_groups = [aws_security_group.efs[0].id]
 }
 
 resource "aws_security_group" "efs" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   name        = "${local.name}-efs"
   description = "Allow inbound NFS traffic from private subnets of the VPC"
   vpc_id      = module.vpc.vpc_id
@@ -83,6 +95,8 @@ resource "aws_security_group" "efs" {
 }
 
 resource "kubectl_manifest" "pv" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   yaml_body = <<YAML
 apiVersion: v1
 kind: PersistentVolume
@@ -95,7 +109,7 @@ spec:
   accessModes:
     - ReadWriteMany
   nfs:
-    server: ${aws_efs_file_system.efs.dns_name}
+    server: ${aws_efs_file_system.efs[0].dns_name}
     path: "/"
 YAML
 
@@ -103,6 +117,8 @@ YAML
 }
 
 resource "kubectl_manifest" "pvc" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   yaml_body = <<YAML
 apiVersion: v1
 kind: PersistentVolumeClaim
@@ -122,6 +138,8 @@ YAML
 }
 
 resource "kubectl_manifest" "pv_shared" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   yaml_body = <<YAML
 apiVersion: v1
 kind: PersistentVolume
@@ -134,7 +152,7 @@ spec:
   accessModes:
     - ReadWriteMany
   nfs:
-    server: ${aws_efs_file_system.efs.dns_name}
+    server: ${aws_efs_file_system.efs[0].dns_name}
     path: "/"
 YAML
 
@@ -142,6 +160,8 @@ YAML
 }
 
 resource "kubectl_manifest" "pvc_shared" {
+  count = var.enable_jupyterhub ? 1 : 0
+
   yaml_body = <<YAML
 apiVersion: v1
 kind: PersistentVolumeClaim
diff --git a/ai-ml/trainium-inferentia/variables.tf b/ai-ml/trainium-inferentia/variables.tf
index f4259359b..153e646d5 100755
--- a/ai-ml/trainium-inferentia/variables.tf
+++ b/ai-ml/trainium-inferentia/variables.tf
@@ -1,48 +1,48 @@
 variable "name" {
   description = "Name of the VPC and EKS Cluster"
-  default     = "trainium-inferentia"
   type        = string
+  default     = "trainium-inferentia"
 }
 
 # NOTE: As of 2024/01/04 Trainium instances only available in us-west-2, us-east-1, and us-east-2 regions
 #       Inferentia instances are available in the above regions + several others
 variable "region" {
   description = "region"
-  default     = "us-west-2"
   type        = string
+  default     = "us-west-2"
 }
 
 variable "eks_cluster_version" {
   description = "EKS Cluster version"
-  default     = "1.28"
   type        = string
+  default     = "1.29"
 }
 
 # VPC with 2046 IPs (10.1.0.0/21) and 2 AZs
 variable "vpc_cidr" {
   description = "VPC CIDR. This should be a valid private (RFC 1918) CIDR range"
-  default     = "10.1.0.0/21"
   type        = string
+  default     = "10.1.0.0/21"
 }
 
 # RFC6598 range 100.64.0.0/10
 # Note you can only /16 range to VPC. You can add multiples of /16 if required
 variable "secondary_cidr_blocks" {
   description = "Secondary CIDR blocks to be attached to VPC"
-  default     = ["100.64.0.0/16"]
   type        = list(string)
+  default     = ["100.64.0.0/16"]
 }
 
-variable "enable_amazon_prometheus" {
-  description = "Enable AWS Managed Prometheus service"
+variable "enable_jupyterhub" {
+  description = "Enable JupyterHub deployment"
   type        = bool
-  default     = true
+  default     = false
 }
 
-variable "mpi_operator_version" {
-  description = "The version of the MPI Operator to install"
-  default     = "v0.4.0"
-  type        = string
+variable "enable_amazon_prometheus" {
+  description = "Enable AWS Managed Prometheus service"
+  type        = bool
+  default     = false
 }
 
 variable "enable_mpi_operator" {
diff --git a/website/docs/blueprints/troubleshooting/troubleshooting.md b/website/docs/blueprints/troubleshooting/troubleshooting.md
index 23ddcaf5d..33865e5d3 100644
--- a/website/docs/blueprints/troubleshooting/troubleshooting.md
+++ b/website/docs/blueprints/troubleshooting/troubleshooting.md
@@ -176,3 +176,47 @@ kubectl get namespace $NAMESPACE -o json | sed 's/"kubernetes"//' | kubectl repl
 This command retrieves the namespace details in JSON format, removes the "kubernetes" finalizer, and performs a replace operation to remove the finalizer from the namespace. This should allow the namespace to complete the termination process and be successfully deleted.
 
 Please ensure that you have the necessary permissions to perform this operation. If you continue to experience issues or require further assistance, please reach out to our support team for additional guidance and troubleshooting steps.
+
+## KMS Alias AlreadyExistsException
+
+During your Terraform installation or redeployment, you might encounter an error saying: `AlreadyExistsException: An alias with the name ...` already exists. This happens when the KMS alias you're trying to create already exists in your AWS account.
+
+```
+│ Error: creating KMS Alias (alias/eks/trainium-inferentia): AlreadyExistsException: An alias with the name arn:aws:kms:us-west-2:23423434:alias/eks/trainium-inferentia already exists
+│
+│   with module.eks.module.kms.aws_kms_alias.this["cluster"],
+│   on .terraform/modules/eks.kms/main.tf line 452, in resource "aws_kms_alias" "this":
+│  452: resource "aws_kms_alias" "this" {
+│
+```
+
+**Solution:**
+
+To resolve this, delete the existing KMS alias using the aws kms delete-alias command. Remember to update the alias name and region in the command before running it.
+
+
+```sh
+aws kms delete-alias --alias-name <KMS_ALIAS_NAME> --region <ENTER_REGION>
+```
+
+## Error: creating CloudWatch Logs Log Group
+
+Terraform cannot create a CloudWatch Logs log group because it already exists in your AWS account.
+
+```
+╷
+│ Error: creating CloudWatch Logs Log Group (/aws/eks/trainium-inferentia/cluster): operation error CloudWatch Logs: CreateLogGroup, https response error StatusCode: 400, RequestID: 5c34c47a-72c6-44b2-a345-925824f24d38, ResourceAlreadyExistsException: The specified log group already exists
+│
+│   with module.eks.aws_cloudwatch_log_group.this[0],
+│   on .terraform/modules/eks/main.tf line 106, in resource "aws_cloudwatch_log_group" "this":
+│  106: resource "aws_cloudwatch_log_group" "this" {
+
+```
+
+**Solution:**
+
+Delete the existing log group by updating log group name and the region.
+
+```sh
+aws logs delete-log-group --log-group-name <LOG_GROUP_NAME> --region <ENTER_REGION>
+```
diff --git a/website/docs/gen-ai/inference/StableDiffusion.md b/website/docs/gen-ai/inference/StableDiffusion.md
index f26334b04..c48e08bc7 100644
--- a/website/docs/gen-ai/inference/StableDiffusion.md
+++ b/website/docs/gen-ai/inference/StableDiffusion.md
@@ -1,12 +1,294 @@
 ---
-title: Stable Diffusion on GPUs
+title: Stable Diffusion on Inferentia
 sidebar_position: 2
 ---
+import CollapsibleContent from '../../../src/components/CollapsibleContent';
 
 :::info
 
-COMING SOON
+This example blueprint deploys a `stable-diffusion-xl-base-1-0` model on Inferentia2 instance running as a worker node in an EKS cluster. The model is served using `RayServe`.
 
-Please note that this section is currently a work in progress and will serve as a comprehensive collection of resources for running data and ML workloads on EKS.
+:::
+
+# Deploying Stable Diffusion XL Base  Model with Inferentia, Ray Serve and Gradio
+Welcome to the comprehensive guide on deploying the [Stable Diffusion XL Base](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) model on Amazon Elastic Kubernetes Service (EKS) using [Ray Serve](https://docs.ray.io/en/latest/serve/index.html).
+In this tutorial, you will not only learn how to harness the power of Stable Diffusion models, but also gain insights into the intricacies of deploying large language models (LLMs) efficiently, particularly on [trn1/inf2](https://aws.amazon.com/machine-learning/neuron/) (powered by AWS Trainium and Inferentia) instances, such as `inf2.24xlarge` and `inf2.48xlarge`,
+which are optimized for deploying and scaling large language models.
+
+### What is Stable Diffusion?
+Stable Diffusion is a text-to-image model for creating stunning art within seconds. It is one of the largest and most powerful LLMs available today. It is primarily used to generate detailed images conditioned on text descriptions, though it can also be applied to other tasks such as inpainting, outpainting, and generating image-to-image translations guided by a text prompt.
+
+#### Stable Diffusion XL(SDXL)
+SDXL is a latent diffusion model for text-to-image synthesis. Compared to previous versions of Stable Diffusion, SDXL  uses pipelines for latent diffusion and noise reduction. SDXL also improves the quality of generated images compared to prior Stable Diffusion models by using a times larger UNet. The increase of model parameters is mainly due to more attention blocks and a larger cross-attention context as SDXL uses a second text encoder.
+
+SDXL has been designed with multiple novel conditioning schemes and trained on multiple aspect ratios. It also uses a refinement model which is used to improve the visual fidelity of samples generated by SDXL using a post-hoc image-to-image technique.
+
+This process results in a highly capable and fine-tuned language model that we will guide you to deploy and utilize effectively on **Amazon EKS** with **Ray Serve**.
+
+## Inference on Trn1/Inf2 Instances: Unlocking the Full Potential of Stable Diffusion LLMs
+**Stable Diffusion XL** can be deployed on a variety of hardware platforms, each with its own set of advantages. However, when it comes to maximizing the efficiency, scalability, and cost-effectiveness of Stable Diffusion models, [AWS Trn1/Inf2 instances](https://aws.amazon.com/ec2/instance-types/inf2/) shine as the optimal choice.
+
+**Scalability and Availability**
+One of the key challenges in deploying large language models (`LLMs`) like StableDiffusion XL is the scalability and availability of suitable hardware. Traditional `GPU` instances often face scarcity due to high demand, making it challenging to provision and scale resources effectively.
+In contrast, `Trn1/Inf2` instances, such as `trn1.32xlarge`, `trn1n.32xlarge`, `inf2.24xlarge` and `inf2.48xlarge`, are purpose built for high-performance deep learning (DL) training and inference of generative AI models, including LLMs. They offer both scalability and availability, ensuring that you can deploy and scale your `Stable-diffusion-xl` models as needed, without resource bottlenecks or delays.
+
+**Cost Optimization:**
+Running LLMs on traditional GPU instances can be cost-prohibitive, especially given the scarcity of GPUs and their competitive pricing.
+**Trn1/Inf2** instances provide a cost-effective alternative. By offering dedicated hardware optimized for AI and machine learning tasks, Trn1/Inf2 instances allow you to achieve top-notch performance at a fraction of the cost.
+This cost optimization enables you to allocate your budget efficiently, making LLM deployment accessible and sustainable.
+
+**Performance Boost**
+While Stable-Diffusion-xl can achieve high-performance inference on GPUs, Neuron accelerators take performance to the next level. Neuron accelerators are purpose-built for machine learning workloads, providing hardware acceleration that significantly enhances Stable-diffusion's inference speeds. This translates to faster response times and improved user experiences when deploying Stable-Diffusion-xl on Trn1/Inf2 instances.
+
+### Example usecase
+A digital art company wants to deploy Stable-diffusion-xl powered image generator to help generate possible art based on prompts. Using a selection of textual prompts, users can create artwork, graphics and logos in a wide variety of styles. The image generator can be used to predict or fine-tune the art and can result in significant time saving in product iteration cycle. Company has a large customer base and wants the model to be scalable at high load. The company needs to design an infrastructure that can handle the high volume of requests and provide a fast response time.
+
+The company can use Inferentia2 instances to scale its Stable diffusion image generator efficiently. Inferentia2 instances are specialized hardware accelerators for machine learning tasks. They can provide up to 20x better performance and up to 7x lower cost than GPUs for machine learning workloads.
+
+The company can also use Ray Serve to horizontally scale its Stable diffusion image generator. Ray Serve is a distributed framework for serving machine learning models. It can automatically scale your models up or down based on demand.
+
+To scale its Stable diffusion image generator, the company can deploy multiple Inferentia2 instances and use Ray Serve to distribute the traffic across the instances. This will allow the company to handle a high volume of requests and provide a fast response time.
+
+## Solution Architecture
+In this section, we will delve into the architecture of our solution, which combines Stable diffusion xl model, [Ray Serve](https://docs.ray.io/en/latest/serve/index.html) and [Inferentia2](https://aws.amazon.com/ec2/instance-types/inf2/) on Amazon EKS.
+
+![Sdxl-inf2](img/excali-draw-sdxl-inf2.png)
+
+## Deploying the Solution
+To get started with deploying `stable-diffusion-xl-base-1-0` on [Amazon EKS](https://aws.amazon.com/eks/), we will cover the necessary prerequisites and guide you through the deployment process step by step.
+This includes setting up the infrastructure, deploying the **Ray cluster**, and creating the [Gradio](https://www.gradio.app/) WebUI app.
+
+<CollapsibleContent header={<h2><span>Prerequisites</span></h2>}>
+Before we begin, ensure you have all the prerequisites in place to make the deployment process smooth and hassle-free.
+nsure that you have installed the following tools on your machine.
+
+1. [aws cli](https://docs.aws.amazon.com/cli/latest/userguide/install-cliv2.html)
+2. [kubectl](https://Kubernetes.io/docs/tasks/tools/)
+3. [terraform](https://learn.hashicorp.com/tutorials/terraform/install-cli)
+
+### Deploy
+
+Clone the repository
+
+```bash
+git clone https://github.com/awslabs/data-on-eks.git
+```
+
+Navigate into one of the example directories and run `install.sh` script
+
+**Important Note:** Ensure that you update the region in the `variables.tf` file before deploying the blueprint.
+Additionally, confirm that your local region setting matches the specified region to prevent any discrepancies.
+For example, set your `export AWS_DEFAULT_REGION="<REGION>"` to the desired region:
+
+```bash
+cd data-on-eks/ai-ml/trainium-inferentia/ && chmod +x install.sh
+./install.sh
+```
+
+### Verify the resources
+
+Verify the Amazon EKS Cluster
+
+```bash
+aws eks --region us-west-2 describe-cluster --name trainium-inferentia
+```
+
+```bash
+# Creates k8s config file to authenticate with EKS
+aws eks --region us-west-2 update-kubeconfig --name trainium-inferentia
+
+kubectl get nodes # Output shows the EKS Managed Node group nodes
+```
+
+</CollapsibleContent>
+
+## Deploying the Ray Cluster with Stable Diffusion XL Model
+
+Once the `Trainium on EKS` Cluster is deployed, you can proceed to use `kubectl` to deploy the `ray-service-stablediffusion.yaml`.
+
+In this step, we will deploy the Ray Serve cluster, which comprises one `Head Pod` on `x86 CPU` instances using Karpenter autoscaling, as well as `Ray workers` on `Inf2.48xlarge` instances, autoscaled by [Karpenter](https://karpenter.sh/).
+
+Let's take a closer look at the key files used in this deployment and understand their functionalities before proceeding with the deployment:
+
+- **ray_serve_stablediffusion.py:**
+This script uses FastAPI, Ray Serve, and [Hugging Face Optimum Neuron](https://github.com/huggingface/optimum-neuron) library of tools to create an efficient text to image generator using the [Neuronx model for stable-diffusion-xl-base-1.0](https://huggingface.co/aws-neuron/stable-diffusion-xl-base-1-0-1024x1024) language model.
+
+For this example blueprint, we are using a precompiled model that's been compiled to run on AWS Neuron. You can use any stable diffusion model of your choice and compile it to run on AWS Neuron before driving inference on it.
+
+- **ray-service-stablediffusion.yaml:**
+This Ray Serve YAML file serves as a Kubernetes configuration for deploying the Ray Serve service, facilitating efficient text generation using the `stable-diffusion-xl-base-1.0` model.
+It defines a Kubernetes namespace named `stablediffusion` to isolate resources. Within the configuration, the `RayService` specification, named `stablediffusion-service`, is created and hosted within the `stablediffusion` namespace. The `RayService` specification leverages the Python script `ray_serve_stablediffusion.py` (copied into the Dockerfile located within the same folder) to create the Ray Serve service.
+The Docker image used in this example is publicly available on Amazon Elastic Container Registry (ECR) for ease of deployment.
+Users can also modify the Dockerfile to suit their specific requirements and push it to their own ECR repository, referencing it in the YAML file.
+
+### Deploy the Stable-Diffusion-xl-base-1-0 Model
+
+**Ensure the cluster is configured locally**
+```bash
+aws eks --region us-west-2 update-kubeconfig --name trainium-inferentia
+```
+
+**Deploy RayServe Cluster**
+
+```bash
+cd ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2
+kubectl apply -f ray-service-stablediffusion.yaml
+```
+
+Verify the deployment by running the following commands
+
+:::info
+
+The deployment process may take up to 10 minutes. The Head Pod is expected to be ready within 2 to 3 minutes, while the Ray Serve worker pod may take up to 10 minutes for image retrieval and Model deployment from Huggingface.
+
+:::
+
+```text
+$ kubectl get po -n stablediffusion -w
+
+NAME                                                      READY   STATUS     RESTARTS   AGE
+service-raycluster-gc7gb-worker-inf2-worker-group-k2kf2   0/1     Init:0/1   0          7s
+stablediffusion-service-raycluster-gc7gb-head-6fqvv       1/1     Running    0          7s
+
+service-raycluster-gc7gb-worker-inf2-worker-group-k2kf2   0/1     PodInitializing   0          9s
+service-raycluster-gc7gb-worker-inf2-worker-group-k2kf2   1/1     Running           0          10s
+stablediffusion-service-raycluster-gc7gb-head-6fqvv       1/1     Running           0          53s
+service-raycluster-gc7gb-worker-inf2-worker-group-k2kf2   1/1     Running           0          53s
+```
+
+Also check the service and ingress resources that got created
+
+```text
+kubectl get svc -n stablediffusion
+
+NAME                                TYPE       CLUSTER-IP       EXTERNAL-IP   PORT(S)                                                                                       AGE
+stablediffusion-service             NodePort   172.20.175.61    <none>        6379:32190/TCP,8265:32375/TCP,10001:32117/TCP,8000:30770/TCP,52365:30334/TCP,8080:30094/TCP   16h
+stablediffusion-service-head-svc    NodePort   172.20.193.225   <none>        6379:32228/TCP,8265:30215/TCP,10001:30767/TCP,8000:31482/TCP,52365:30170/TCP,8080:31584/TCP   16h
+stablediffusion-service-serve-svc   NodePort   172.20.15.224    <none>        8000:30982/TCP                                                                                16h
+
+
+$ kubectl get ingress -n stablediffusion
+
+NAME                      CLASS   HOSTS   ADDRESS                                                                         PORTS   AGE
+stablediffusion-ingress   nginx   *       k8s-ingressn-ingressn-7f3f4b475b-1b8966c0b8f4d3da.elb.us-west-2.amazonaws.com   80      16h
+```
+
+Now, you can access the Ray Dashboard from the Load balancer URL below.
+
+    http://\<NLB_DNS_NAME\>/dashboard/#/serve
+
+If you don't have access to a public Load Balancer, you can use port-forwarding and browse the Ray Dashboard using localhost with the following command:
+
+```bash
+kubectl port-forward svc/stablediffusion-service 8265:8265 -n stablediffusion
+
+# Open the link in the browser
+http://localhost:8265/
+
+```
+
+From this webpage, you will be able to monitor the progress of Model deployment, as shown in the image below:
+
+![Ray Dashboard](img/ray-dashboard-sdxl.png)
+
+### To Test the Stable Diffusion XL Model
+Once you see the status of the model deployment is in `running` state then you can start using Llama-2-chat.
+
+You can use the following URL with a query added at the end of the URL.
+
+    http://\<NLB_DNS_NAME\>/serve/serve/imagine?prompt=an astronaut is dancing on green grass, sunlit
+
+You will see an output like this in your browser:
+
+![Prompt Output](img/stable-diffusion-xl-prompt_3.png)
+
+## Deploying the Gradio WebUI App
+Discover how to create a user-friendly chat interface using [Gradio](https://www.gradio.app/) that integrates seamlessly with deployed models.
+
+Let's deploy Gradio app locally on your machine to interact with the Stable Diffusion XL model deployed using RayServe.
+
+:::info
+
+The Gradio app interacts with the locally exposed service created solely for the demonstration. Alternatively, you can deploy the Gradio app on EKS as a Pod with Ingress and Load Balancer for wider accessibility.
 
 :::
+
+### Execute Port Forward to the stablediffusion Ray Service
+First, execute a port forward to the stablediffusion Ray Service using kubectl:
+
+```bash
+kubectl port-forward svc/stablediffusion-service 8000:8000 -n stablediffusion
+```
+
+### Deploy Gradio WebUI Locally
+
+#### Create a Virtual Environment
+Create a Python virtual environment in your machine for the Gradio application:
+
+```bash
+cd ai-ml/trainium-inferentia/examples/gradio-ui
+python3 -m venv .venv
+source .venv/bin/activate
+```
+
+#### Install Gradio Image Generator app
+Install all the Gradio WebUI app dependencies with pip
+
+```bash
+pip install gradio requests
+```
+
+#### Invoke the WebUI
+Run the Gradio WebUI using the following command:
+
+NOTE: `gradio-app-stablediffusion.py` refers to the port forward url. e.g., `service_name = "http://localhost:8000" `
+
+```bash
+python gradio-app-stablediffusion.py
+```
+
+You should see output similar to the following:
+
+```text
+Running on local URL:  http://127.0.0.1:7860
+
+To create a public link, set `share=True` in `launch()`.
+```
+
+#### 2.4. Access the WebUI from Your Browser
+Open your web browser and access the Gradio WebUI by navigating to the following URL:
+
+http://127.0.0.1:7860
+
+You should now be able to interact with the Gradio application from your local machine.
+
+![Gradio Output](img/stable-diffusion-xl-gradio.png)
+
+## Conclusion
+In conclusion, you will have successfully deployed the **Stable-diffusion-xl-base** model on EKS with Ray Serve and created a prompt based web UI using Gradio.
+This opens up exciting possibilities for natural language processing and prompt based image generator and image predictor development.
+
+In summary, when it comes to deploying and scaling Stable diffusion models, AWS Trn1/Inf2 instances offer a compelling advantage.
+They provide the scalability, cost optimization, and performance boost needed to make running large language models efficient and accessible, all while overcoming the challenges associated with the scarcity of GPUs.
+Whether you're building text-to-image generators, image-to-image generators or any other LLM-driven solution, Trn1/Inf2 instances empower you to harness the full potential of Stable Diffusion LLMs on the AWS cloud.
+
+## Cleanup
+Finally, we'll provide instructions for cleaning up and deprovisioning the resources when they are no longer needed.
+
+**Step1:** Cancel the execution of the `python gradio-app-stablediffusion.py`
+
+**Step2:** Delete Ray Cluster
+
+```bash
+cd ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2
+kubectl delete -f ray-service-stablediffusion.yaml
+```
+
+**Step3:** Cleanup the EKS Cluster
+This script will cleanup the environment using `-target` option to ensure all the resources are deleted in correct order.
+
+```bash
+export AWS_DEAFULT_REGION="DEPLOYED_EKS_CLUSTER_REGION>"
+cd data-on-eks/ai-ml/trainium-inferentia/ && chmod +x cleanup.sh
+./cleanup.sh
+```
diff --git a/website/docs/gen-ai/inference/img/excali-draw-sdxl-inf2.png b/website/docs/gen-ai/inference/img/excali-draw-sdxl-inf2.png
new file mode 100644
index 000000000..bef179b2a
Binary files /dev/null and b/website/docs/gen-ai/inference/img/excali-draw-sdxl-inf2.png differ
diff --git a/website/docs/gen-ai/inference/img/ray-dashboard-sdxl.png b/website/docs/gen-ai/inference/img/ray-dashboard-sdxl.png
new file mode 100644
index 000000000..a2cca535b
Binary files /dev/null and b/website/docs/gen-ai/inference/img/ray-dashboard-sdxl.png differ
diff --git a/website/docs/gen-ai/inference/img/stable-diffusion-xl-gradio.png b/website/docs/gen-ai/inference/img/stable-diffusion-xl-gradio.png
new file mode 100644
index 000000000..9576241fe
Binary files /dev/null and b/website/docs/gen-ai/inference/img/stable-diffusion-xl-gradio.png differ
diff --git a/website/docs/gen-ai/inference/img/stable-diffusion-xl-prompt_3.png b/website/docs/gen-ai/inference/img/stable-diffusion-xl-prompt_3.png
new file mode 100644
index 000000000..e0da1f3e6
Binary files /dev/null and b/website/docs/gen-ai/inference/img/stable-diffusion-xl-prompt_3.png differ
diff --git a/website/docs/gen-ai/training/Llama2.md b/website/docs/gen-ai/training/Llama2.md
index 6cb0090eb..adde58305 100644
--- a/website/docs/gen-ai/training/Llama2.md
+++ b/website/docs/gen-ai/training/Llama2.md
@@ -357,4 +357,3 @@ To remove the resources created using this solution, run the cleanup script:
 cd data-on-eks/ai-ml/trainium-inferentia
 ./cleanup.sh
 ```
-