feat: Deploy Gradio UI App as a k8s deployment and service for stable…

… diffusion inference (#461)
awslabs · Mar 5, 2024 · 5f96cb3 · 5f96cb3
1 parent a38b7ba
commit 5f96cb3
Show file tree

Hide file tree

Showing 18 changed files with 102 additions and 101 deletions.
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md b/ai-ml/trainium-inferentia/examples/gradio-ui/README-StableDiffusion.md
diff --git a/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png b/ai-ml/trainium-inferentia/examples/gradio-ui/gradio-app-stable-diffusion-xl.png
diff --git a/...examples/ray-serve/llama2-inf2/Dockerfile → ...nference/ray-serve/llama2-inf2/Dockerfile b/...examples/ray-serve/llama2-inf2/Dockerfile → ...nference/ray-serve/llama2-inf2/Dockerfile
diff --git a/.../examples/ray-serve/llama2-inf2/README.md → ...inference/ray-serve/llama2-inf2/README.md b/.../examples/ray-serve/llama2-inf2/README.md → ...inference/ray-serve/llama2-inf2/README.md
diff --git a/...serve/llama2-inf2/ray-service-llama2.yaml → ...serve/llama2-inf2/ray-service-llama2.yaml b/...serve/llama2-inf2/ray-service-llama2.yaml → ...serve/llama2-inf2/ray-service-llama2.yaml
diff --git a/...ray-serve/llama2-inf2/ray_serve_llama2.py → ...ray-serve/llama2-inf2/ray_serve_llama2.py b/...ray-serve/llama2-inf2/ray_serve_llama2.py → ...ray-serve/llama2-inf2/ray_serve_llama2.py
diff --git a/...ay-serve/stable-diffusion-inf2/Dockerfile → ...ay-serve/stable-diffusion-inf2/Dockerfile b/...ay-serve/stable-diffusion-inf2/Dockerfile → ...ay-serve/stable-diffusion-inf2/Dockerfile
diff --git a/...ray-serve/stable-diffusion-inf2/README.md → ...ray-serve/stable-diffusion-inf2/README.md b/...ray-serve/stable-diffusion-inf2/README.md → ...ray-serve/stable-diffusion-inf2/README.md
diff --git a/...ainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/gradio-ui/Dockerfile b/...ainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2/gradio-ui/Dockerfile
@@ -0,0 +1,13 @@
+# Use Python base image
+FROM --platform=linux/amd64 python:3.9-slim
+
+# Set working directory in the container
+WORKDIR /app
+
+# Copy the Python script into the container
+COPY gradio-app-stablediffusion.py /app/gradio-app-stablediffusion.py
+
+RUN pip install --no-cache-dir gradio requests Pillow
+
+# Command to run the Python script
+ENTRYPOINT ["python", "gradio-app-stablediffusion.py"]
diff --git a/...s/gradio-ui/gradio-app-stablediffusion.py → ...2/gradio-ui/gradio-app-stablediffusion.py b/...s/gradio-ui/gradio-app-stablediffusion.py → ...2/gradio-ui/gradio-app-stablediffusion.py
@@ -1,14 +1,12 @@
 import gradio as gr
 import requests
-import json
+import os
 from PIL import Image
 from io import BytesIO
 
 # Constants for model endpoint and service name
-model_endpoint = "/imagine"
-# service_name = "http://<REPLACE_ME_WITH_ELB_DNS_NAME>/serve"
-service_name = "http://localhost:8000"  # Replace with your actual service name
-
+model_endpoint = os.environ.get("MODEL_ENDPOINT", "/imagine")
+service_name = os.environ.get("SERVICE_NAME", "http://localhost:8000")
 
 # Function to generate image based on prompt
 def generate_image(prompt):
@@ -25,9 +23,10 @@ def generate_image(prompt):
 
     except requests.exceptions.RequestException as e:
         # Handle any request exceptions (e.g., connection errors)
-        return f"AI: Error: {str(e)}"
+        # return f"AI: Error: {str(e)}"
+        return Image.new('RGB', (100, 100), color='red')
 
 # Define the Gradio PromptInterface
 demo = gr.Interface(fn=generate_image,
                     inputs = [gr.Textbox(label="Enter the Prompt")],
-                    outputs = gr.Image(type='pil')).launch(debug='True')
+                    outputs = gr.Image(type='pil')).launch(server_name="0.0.0.0")
diff --git a/...nferentia/examples/inference/ray-serve/stable-diffusion-inf2/gradio-ui/gradio-deploy.yaml b/...nferentia/examples/inference/ray-serve/stable-diffusion-inf2/gradio-ui/gradio-deploy.yaml
@@ -0,0 +1,56 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: gradio
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gradio-deployment
+  namespace: gradio
+  labels:
+    app: gradio
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: gradio
+  template:
+    metadata:
+      labels:
+        app: gradio
+    spec:
+      containers:
+      - name: gradio
+        image: public.ecr.aws/data-on-eks/gradio-app:sd-v1.0
+        imagePullPolicy: IfNotPresent
+        ports:
+        - containerPort: 7860
+        resources:
+          requests:
+            cpu: "512m"
+            memory: "2048Mi"
+          limits:
+            cpu: "1"
+            memory: "4096Mi"
+        env:
+        - name: MODEL_ENDPOINT
+          value: "/imagine"
+        #Please note that the service name is currently hardcoded to match the Stable Diffusion service for this blueprint. If there are any updates or changes to the actual RayServe deployment, you'll need to update the service name in this code accordingly.
+        - name: SERVICE_NAME
+          value: "http://stablediffusion-service.stablediffusion.svc.cluster.local:8000"
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: gradio-service
+  namespace: gradio
+spec:
+  selector:
+    app: gradio
+  ports:
+  - name: http
+    protocol: TCP
+    port: 7860
+    targetPort: 7860
+  type: ClusterIP
diff --git a/...ion-inf2/ray-service-stablediffusion.yaml → ...ion-inf2/ray-service-stablediffusion.yaml b/...ion-inf2/ray-service-stablediffusion.yaml → ...ion-inf2/ray-service-stablediffusion.yaml
diff --git a/...ffusion-inf2/ray_serve_stablediffusion.py → ...ffusion-inf2/ray_serve_stablediffusion.py b/...ffusion-inf2/ray_serve_stablediffusion.py → ...ffusion-inf2/ray_serve_stablediffusion.py
diff --git a/...ion-inf2/stable-diffusion-xl-prompt_3.png → ...ion-inf2/stable-diffusion-xl-prompt_3.png b/...ion-inf2/stable-diffusion-xl-prompt_3.png → ...ion-inf2/stable-diffusion-xl-prompt_3.png
diff --git a/analytics/terraform/datahub-on-eks/README.md b/analytics/terraform/datahub-on-eks/README.md
@@ -28,8 +28,7 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 | <a name="module_eks"></a> [eks](#module\_eks) | terraform-aws-modules/eks/aws | ~> 19.15 |
 | <a name="module_eks_blueprints_addons"></a> [eks\_blueprints\_addons](#module\_eks\_blueprints\_addons) | aws-ia/eks-blueprints-addons/aws | ~> 1.2 |
 | <a name="module_vpc"></a> [vpc](#module\_vpc) | terraform-aws-modules/vpc/aws | ~> 5.0 |
-| <a name="module_vpc_endpoints"></a> [vpc\_endpoints](#module\_vpc\_endpoints) | terraform-aws-modules/vpc/aws//modules/vpc-endpoints | ~> 5.0 |
-| <a name="module_vpc_endpoints_sg"></a> [vpc\_endpoints\_sg](#module\_vpc\_endpoints\_sg) | terraform-aws-modules/security-group/aws | ~> 5.0 |
+| <a name="module_vpc_endpoints"></a> [vpc\_endpoints](#module\_vpc\_endpoints) | terraform-aws-modules/vpc/aws//modules/vpc-endpoints | ~> 5.1 |
 
 ## Resources
 
@@ -43,15 +42,17 @@ Checkout the [documentation website](https://awslabs.github.io/data-on-eks/docs/
 
 | Name | Description | Type | Default | Required |
 |------|-------------|------|---------|:--------:|
-| <a name="input_create_iam_service_linked_role_es"></a> [create\_iam\_service\_linked\_role\_es](#input\_create\_iam\_service\_linked\_role\_es) | Whether to create `AWSServiceRoleForAmazonOpensearchService` service-linked role. Set it to `false` if the role already exists | `bool` | `true` | no |
+| <a name="input_create_vpc"></a> [create\_vpc](#input\_create\_vpc) | Create VPC | `bool` | `true` | no |
 | <a name="input_eks_cluster_version"></a> [eks\_cluster\_version](#input\_eks\_cluster\_version) | EKS Cluster version | `string` | `"1.26"` | no |
 | <a name="input_enable_vpc_endpoints"></a> [enable\_vpc\_endpoints](#input\_enable\_vpc\_endpoints) | Enable VPC Endpoints | `bool` | `false` | no |
 | <a name="input_name"></a> [name](#input\_name) | Name of the VPC and EKS Cluster | `string` | `"datahub-on-eks"` | no |
+| <a name="input_private_subnet_ids"></a> [private\_subnet\_ids](#input\_private\_subnet\_ids) | Ids for existing private subnets - needed when create\_vpc set to false | `list(string)` | `[]` | no |
 | <a name="input_private_subnets"></a> [private\_subnets](#input\_private\_subnets) | Private Subnets CIDRs. 32766 Subnet1 and 16382 Subnet2 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.0.0/17",<br>  "10.1.128.0/18"<br>]</pre> | no |
 | <a name="input_public_subnets"></a> [public\_subnets](#input\_public\_subnets) | Public Subnets CIDRs. 62 IPs per Subnet | `list(string)` | <pre>[<br>  "10.1.255.128/26",<br>  "10.1.255.192/26"<br>]</pre> | no |
 | <a name="input_region"></a> [region](#input\_region) | Region | `string` | `"us-west-2"` | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Default tags | `map(string)` | `{}` | no |
-| <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR | `string` | `"10.1.0.0/16"` | no |
+| <a name="input_vpc_cidr"></a> [vpc\_cidr](#input\_vpc\_cidr) | VPC CIDR - must change to match the cidr of the existing VPC if create\_vpc set to false | `string` | `"10.1.0.0/16"` | no |
+| <a name="input_vpc_id"></a> [vpc\_id](#input\_vpc\_id) | VPC Id for the existing vpc - needed when create\_vpc set to false | `string` | `""` | no |
 
 ## Outputs
 

diff --git a/analytics/terraform/datahub-on-eks/providers.tf b/analytics/terraform/datahub-on-eks/providers.tf
@@ -15,4 +15,3 @@ provider "helm" {
     token                  = data.aws_eks_cluster_auth.this.token
   }
 }
-
diff --git a/website/docs/gen-ai/inference/Llama2.md b/website/docs/gen-ai/inference/Llama2.md
@@ -154,7 +154,7 @@ aws eks --region us-west-2 update-kubeconfig --name trainium-inferentia
 **Deploy RayServe Cluster**
 
 ```bash
-cd ai-ml/trainium-inferentia/examples/ray-serve/llama2-inf2
+cd ai-ml/trainium-inferentia/examples/inference/ray-serve/llama2-inf2
 kubectl apply -f ray-service-llama2.yaml
 ```
 

diff --git a/website/docs/gen-ai/inference/StableDiffusion.md b/website/docs/gen-ai/inference/StableDiffusion.md
@@ -131,7 +131,7 @@ aws eks --region us-west-2 update-kubeconfig --name trainium-inferentia
 **Deploy RayServe Cluster**
 
 ```bash
-cd ai-ml/trainium-inferentia/examples/ray-serve/stable-diffusion-inf2
+cd ai-ml/trainium-inferentia/examples/inference/ray-serve/stable-diffusion-inf2
 kubectl apply -f ray-service-stablediffusion.yaml
 ```
 
@@ -192,11 +192,12 @@ From this webpage, you will be able to monitor the progress of Model deployment,
 ![Ray Dashboard](img/ray-dashboard-sdxl.png)
 
 ### To Test the Stable Diffusion XL Model
-Once you see the status of the model deployment is in `running` state then you can start using Llama-2-chat.
+
+Once you've verified that the Stable Diffusion model deployment status has switched to a `running` state in Ray Dashboard , you're all set to start leveraging the model. This change in status signifies that the Stable Diffusion model is now fully functional and prepared to handle your image generation requests based on textual descriptions."
 
 You can use the following URL with a query added at the end of the URL.
 
-    http://\<NLB_DNS_NAME\>/serve/serve/imagine?prompt=an astronaut is dancing on green grass, sunlit
+    http://\<NLB_DNS_NAME\>/serve/imagine?prompt=an astronaut is dancing on green grass, sunlit
 
 You will see an output like this in your browser:
 
@@ -205,66 +206,52 @@ You will see an output like this in your browser:
 ## Deploying the Gradio WebUI App
 Discover how to create a user-friendly chat interface using [Gradio](https://www.gradio.app/) that integrates seamlessly with deployed models.
 
-Let's deploy Gradio app locally on your machine to interact with the Stable Diffusion XL model deployed using RayServe.
+Let's move forward with setting up the Gradio app as a Kubernetes deployment, utilizing a Docker container. This setup will enable interaction with the Stable Diffusion XL model, which is deployed using RayServe.
 
 :::info
 
-The Gradio app interacts with the locally exposed service created solely for the demonstration. Alternatively, you can deploy the Gradio app on EKS as a Pod with Ingress and Load Balancer for wider accessibility.
+The Gradio UI application is containerized and the container image is stored in [data-on-eks](https://gallery.ecr.aws/data-on-eks/gradio-app) public repository. The Gradio app container internally points to the `stablediffusion-service` that's running on port 8000.
 
 :::
 
-### Execute Port Forward to the stablediffusion Ray Service
-First, execute a port forward to the stablediffusion Ray Service using kubectl:
+### Deploy the Gradio Pod as Deployment
 
-```bash
-kubectl port-forward svc/stablediffusion-service 8000:8000 -n stablediffusion
-```
-
-### Deploy Gradio WebUI Locally
-
-#### Create a Virtual Environment
-Create a Python virtual environment in your machine for the Gradio application:
+First, deploy the Gradio app as a Deployment on EKS using kubectl:
 
 ```bash
-cd ai-ml/trainium-inferentia/examples/gradio-ui
-python3 -m venv .venv
-source .venv/bin/activate
+cd gradio-ui
+kubectl apply -f gradio-deploy.yaml
 ```
 
-#### Install Gradio Image Generator app
-Install all the Gradio WebUI app dependencies with pip
+This should create a Deployment and a Service in namespace `gradio`. Check the status of the resources.
 
 ```bash
-pip install gradio requests
+kubectl -n gradio get all
+NAME                                     READY   STATUS    RESTARTS   AGE
+pod/gradio-deployment-59cfbffdf5-q745z   1/1     Running   0          143m
+
+NAME                     TYPE        CLUSTER-IP       EXTERNAL-IP   PORT(S)    AGE
+service/gradio-service   ClusterIP   172.20.245.153   <none>        7860/TCP   3d12h
 ```
 
 #### Invoke the WebUI
-Run the Gradio WebUI using the following command:
 
-NOTE: `gradio-app-stablediffusion.py` refers to the port forward url. e.g., `service_name = "http://localhost:8000" `
+Execute a port forward to the `gradio-service` Service using kubectl:
 
 ```bash
-python gradio-app-stablediffusion.py
+kubectl -n gradio port-forward service/gradio-service 8080:7860
 ```
 
-You should see output similar to the following:
-
-```text
-Running on local URL:  http://127.0.0.1:7860
-
-To create a public link, set `share=True` in `launch()`.
-```
-
-#### 2.4. Access the WebUI from Your Browser
 Open your web browser and access the Gradio WebUI by navigating to the following URL:
 
-http://127.0.0.1:7860
+Running on local URL:  http://localhost:8080
 
 You should now be able to interact with the Gradio application from your local machine.
 
 ![Gradio Output](img/stable-diffusion-xl-gradio.png)
 
 ## Conclusion
+
 In conclusion, you will have successfully deployed the **Stable-diffusion-xl-base** model on EKS with Ray Serve and created a prompt based web UI using Gradio.
 This opens up exciting possibilities for natural language processing and prompt based image generator and image predictor development.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,4 +15,3 @@ provider "helm" {
		token = data.aws_eks_cluster_auth.this.token
		}
		}