Skip to content

Commit

Permalink
refactor: Upgrade ray version for stable diffusion inference (#438)
Browse files Browse the repository at this point in the history
  • Loading branch information
ratnopamc authored Feb 20, 2024
1 parent 7906ac3 commit b16c4c5
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 27 deletions.
2 changes: 1 addition & 1 deletion ai-ml/trainium-inferentia/addons.tf
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ module "eks_data_addons" {
#---------------------------------------
enable_kuberay_operator = true
kuberay_operator_helm_config = {
version = "1.0.0-rc.0"
version = "1.0.0"
# Enabling Volcano as Batch scheduler for KubeRay Operator
values = [
<<-EOT
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM rayproject/ray:2.7.1-py310
# https://hub.docker.com/layers/rayproject/ray/2.9.0-py310/images/sha256-846cda01841c6c11610292aba8f190d49cc54844d1d578b307678cab5076ef98?context=explore
FROM rayproject/ray:2.9.0-py310

# Maintainer label
LABEL maintainer="DoEKS"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,41 @@ metadata:
name: stablediffusion

---
apiVersion: ray.io/v1alpha1
apiVersion: ray.io/v1
kind: RayService
metadata:
name: stablediffusion-service
namespace: stablediffusion
spec:
serviceUnhealthySecondThreshold: 900
deploymentUnhealthySecondThreshold: 300
serveConfig:
importPath: ray_serve_stablediffusion:entrypoint # Specify the correct path to your Python script
runtimeEnv: |
env_vars: {"MODEL_ID": "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024"} # Replace with the appropriate model ID
serveConfigV2: |
applications:
- name: stable-diffusion-deployment
import_path: "ray_serve_stablediffusion:entrypoint"
route_prefix: "/"
runtime_env:
env_vars:
MODEL_ID: "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024"
NEURON_CC_FLAGS: "-O1"
deployments:
- name: stable-diffusion-v2
autoscaling_config:
metrics_interval_s: 0.2
min_replicas: 8
max_replicas: 12
look_back_period_s: 2
downscale_delay_s: 30
upscale_delay_s: 2
target_num_ongoing_requests_per_replica: 1
graceful_shutdown_timeout_s: 5
max_concurrent_queries: 100
ray_actor_options:
num_cpus: 10
resources: {"neuron_cores": 2}
rayClusterConfig:
rayVersion: '2.7.1'
rayVersion: '2.9.0'
enableInTreeAutoscaling: true
headGroupSpec:
serviceType: NodePort
headService:
Expand All @@ -31,15 +51,15 @@ spec:
spec:
containers:
- name: ray-head
image: public.ecr.aws/data-on-eks/ray2.7.1-py310-stablediffusion-neuron:latest # Image created using the Dockerfile attached in the folder
image: public.ecr.aws/data-on-eks/ray2.9.0-py310-stablediffusion-neuron:v1.0
imagePullPolicy: Always # Ensure the image is always pulled when updated
lifecycle:
preStop:
exec:
command: ["/bin/sh", "-c", "ray stop"]
ports:
- containerPort: 6379
name: gcs
name: gcs-server
- containerPort: 8265
name: dashboard
- containerPort: 10001
Expand All @@ -66,15 +86,15 @@ spec:

workerGroupSpecs:
- groupName: inf2-worker-group
replicas: 1
minReplicas: 1
maxReplicas: 1
# replicas: 1
minReplicas: 2
maxReplicas: 8
rayStartParams: {}
template:
spec:
containers:
- name: ray-worker
image: public.ecr.aws/data-on-eks/ray2.7.1-py310-stablediffusion-neuron:latest # Image created using the Dockerfile attached in the folder
image: public.ecr.aws/data-on-eks/ray2.9.0-py310-stablediffusion-neuron:v1.0
imagePullPolicy: Always # Ensure the image is always pulled when updated
lifecycle:
preStop:
Expand All @@ -87,10 +107,8 @@ spec:
memory: "360G" # All memory of inf2.24xlarge; 24G for daemonset overhead
aws.amazon.com/neuron: "6" # All Neuron cores of inf2.24xlarge
requests:
cpu: "45" # Half of vCPUs for reservation for inf2.24xlarge; leaving 3 vCPUs for daemonset overhead
memory: "180G" # Half of memory for reservation for inf2.24xlarge; leaving 12G for daemonset overhead
# Set maximum neuron core available to the instance to acocomodate multiple requests to leverage all the neuron cores
# You cannot run multiple pods of the model on the same instance unless you shard the model
cpu: "90" # All vCPUs of inf2.24xlarge; 6vCPU daemonset overhead
memory: "360G" # All memory of inf2.24xlarge; 24G for daemonset overhead
aws.amazon.com/neuron: "6" # All Neuron cores of inf2.24xlarge
nodeSelector:
instanceType: inferentia-inf2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

neuron_cores = 2

@serve.deployment(num_replicas=1, route_prefix="/")
@serve.deployment(name="stable-diffusion-api", num_replicas=1, route_prefix="/")
@serve.ingress(app)
class APIIngress:
def __init__(self, diffusion_model_handle) -> None:
Expand All @@ -22,20 +22,17 @@ def __init__(self, diffusion_model_handle) -> None:
response_class=Response,
)
async def generate(self, prompt: str):

image_ref = await self.handle.generate.remote(prompt)
image = await image_ref
image = await self.handle.generate.remote(prompt)
file_stream = BytesIO()
image.save(file_stream, "PNG")
return Response(content=file_stream.getvalue(), media_type="image/png")


@serve.deployment(
@serve.deployment(name="stable-diffusion-v2",
autoscaling_config={"min_replicas": 0, "max_replicas": 6},
ray_actor_options={
"resources": {"neuron_cores": neuron_cores},
"runtime_env": {"env_vars": {"NEURON_CC_FLAGS": "-O1"}},
},
autoscaling_config={"min_replicas": 1, "max_replicas": 1},
)
class StableDiffusionV2:
def __init__(self):
Expand All @@ -47,7 +44,6 @@ def __init__(self):
self.pipe = NeuronStableDiffusionXLPipeline.from_pretrained(compiled_model_id, device_ids=[0, 1])

async def generate(self, prompt: str):

assert len(prompt), "prompt parameter cannot be empty"
image = self.pipe(prompt).images[0]
return image
Expand Down
16 changes: 16 additions & 0 deletions website/docs/blueprints/troubleshooting/troubleshooting.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,3 +220,19 @@ Delete the existing log group by updating log group name and the region.
```sh
aws logs delete-log-group --log-group-name <LOG_GROUP_NAME> --region <ENTER_REGION>
```

## Karpenter Error - Missing Service Linked Role

Karpenter throws below error while trying to create new instances.

```
"error":"launching nodeclaim, creating instance, with fleet error(s), AuthFailure.ServiceLinkedRoleCreationNotPermitted: The provided credentials do not have permission to create the service-linked role for EC2 Spot Instances."}
```

**Solution:**

You will need to create the service linked role in the AWS account you're using to avoid `ServiceLinkedRoleCreationNotPermitted` error.

```sh
aws iam create-service-linked-role --aws-service-name spot.amazonaws.com
```

0 comments on commit b16c4c5

Please sign in to comment.