diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py index fa1f41ac0..41997b4ee 100644 --- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py +++ b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py @@ -60,7 +60,7 @@ def download_models(): # ## Load model and run inference # # The container lifecycle [`__enter__` function](https://modal.com/docs/guide/lifecycle-functions#container-lifecycle-beta) -# loads the model at startup. Then, we evaluate it in the `run_inference` function. +# loads the model at startup. Then, we evaluate it in the `inference` function. # # To avoid excessive cold-starts, we set the idle timeout to 240 seconds, meaning once a GPU has loaded the model it will stay # online for 4 minutes before spinning down. This can be adjusted for cost/experience trade-offs. diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py index 0982eb0fb..f642369be 100644 --- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py +++ b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py @@ -1,12 +1,32 @@ +# --- +# output-directory: "/tmp/stable-diffusion-xl-turbo" +# args: [] +# runtimes: ["runc", "gvisor"] +# --- +# # Stable Diffusion XL Turbo +# +# This example is similar to the [Stable Diffusion XL](/docs/examples/stable_diffusion_xl) +# example, but it's a distilled model trained for real-time synthesis. Learn more about it [here] (https://stability.ai/news/stability-ai-sdxl-turbo) + +# ## Basic setup + from pathlib import Path from modal import Image, Stub, gpu, method +# ## Define a container image + def download_models(): from huggingface_hub import snapshot_download - ignore = ["*.bin", "*.onnx_data", "*/diffusion_pytorch_model.safetensors"] + # Ignore files that we don't need to speed up download time. + ignore = [ + "*.bin", + "*.onnx_data", + "*/diffusion_pytorch_model.safetensors", + ] + snapshot_download("stabilityai/sdxl-turbo", ignore_patterns=ignore) @@ -15,15 +35,23 @@ def download_models(): .pip_install( "Pillow~=10.1.0", "diffusers~=0.24", - "transformers~=4.35", - "accelerate~=0.25", - "safetensors~=0.4", + "transformers~=4.35", # This is needed for `import torch` + "accelerate~=0.25", # Allows `device_map="auto"``, which allows computation of optimized device_map + "safetensors~=0.4", # Enables safetensor format as opposed to using unsafe pickle format ) .run_function(download_models) ) stub = Stub("stable-diffusion-xl-turbo", image=image) +# ## Load model and run inference +# +# The container lifecycle [`__enter__` function](https://modal.com/docs/guide/lifecycle-functions#container-lifecycle-beta) +# loads the model at startup. Then, we evaluate it in the `inference` function. +# +# To avoid excessive cold-starts, we set the idle timeout to 240 seconds, meaning once a GPU has loaded the model it will stay +# online for 4 minutes before spinning down. This can be adjusted for cost/experience trade-offs. + @stub.cls(gpu=gpu.A10G(), container_idle_timeout=240) class Model: @@ -48,11 +76,17 @@ def inference(self, image_bytes, prompt): init_image = load_image(Image.open(BytesIO(image_bytes))).resize( (512, 512) ) + num_inference_steps = 4 + strength = 0.9 + # "When using SDXL-Turbo for image-to-image generation, make sure that num_inference_steps * strength is larger or equal to 1" + # See: https://huggingface.co/stabilityai/sdxl-turbo + assert num_inference_steps * strength >= 1 + image = self.pipe( prompt, image=init_image, - num_inference_steps=4, - strength=0.9, + num_inference_steps=num_inference_steps, + strength=strength, guidance_scale=0.0, ).images[0]