diff --git a/06_gpu_and_ml/stable_diffusion/demo_images/dog.png b/06_gpu_and_ml/stable_diffusion/demo_images/dog.png
deleted file mode 100644
index 022c2d13a..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/demo_images/dog.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/foo.json b/06_gpu_and_ml/stable_diffusion/foo.json
deleted file mode 100644
index cb61dad86..000000000
--- a/06_gpu_and_ml/stable_diffusion/foo.json
+++ /dev/null
@@ -1 +0,0 @@
-{"last_node_id": 9, "last_link_id": 9, "nodes": [{"id": 7, "type": "CLIPTextEncode", "pos": [413, 389], "size": {"0": 425.27801513671875, "1": 180.6060791015625}, "flags": {}, "order": 3, "mode": 0, "inputs": [{"name": "clip", "type": "CLIP", "link": 5}], "outputs": [{"name": "CONDITIONING", "type": "CONDITIONING", "links": [6], "slot_index": 0}], "properties": {"Node name for S&R": "CLIPTextEncode"}, "widgets_values": ["text, watermark"]}, {"id": 6, "type": "CLIPTextEncode", "pos": [415, 186], "size": {"0": 422.84503173828125, "1": 164.31304931640625}, "flags": {}, "order": 2, "mode": 0, "inputs": [{"name": "clip", "type": "CLIP", "link": 3}], "outputs": [{"name": "CONDITIONING", "type": "CONDITIONING", "links": [4], "slot_index": 0}], "properties": {"Node name for S&R": "CLIPTextEncode"}, "widgets_values": ["beautiful scenery nature glass bottle landscape, , purple galaxy bottle,"]}, {"id": 5, "type": "EmptyLatentImage", "pos": [473, 609], "size": {"0": 315, "1": 106}, "flags": {}, "order": 0, "mode": 0, "outputs": [{"name": "LATENT", "type": "LATENT", "links": [2], "slot_index": 0}], "properties": {"Node name for S&R": "EmptyLatentImage"}, "widgets_values": [512, 512, 1]}, {"id": 3, "type": "KSampler", "pos": [863, 186], "size": {"0": 315, "1": 262}, "flags": {}, "order": 4, "mode": 0, "inputs": [{"name": "model", "type": "MODEL", "link": 1}, {"name": "positive", "type": "CONDITIONING", "link": 4}, {"name": "negative", "type": "CONDITIONING", "link": 6}, {"name": "latent_image", "type": "LATENT", "link": 2}], "outputs": [{"name": "LATENT", "type": "LATENT", "links": [7], "slot_index": 0}], "properties": {"Node name for S&R": "KSampler"}, "widgets_values": [156680208700286, "randomize", 20, 8, "euler", "normal", 1]}, {"id": 8, "type": "VAEDecode", "pos": [1209, 188], "size": {"0": 210, "1": 46}, "flags": {}, "order": 5, "mode": 0, "inputs": [{"name": "samples", "type": "LATENT", "link": 7}, {"name": "vae", "type": "VAE", "link": 8}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": [9], "slot_index": 0}], "properties": {"Node name for S&R": "VAEDecode"}}, {"id": 9, "type": "SaveImage", "pos": [1451, 189], "size": {"0": 210, "1": 58}, "flags": {}, "order": 6, "mode": 0, "inputs": [{"name": "images", "type": "IMAGE", "link": 9}], "properties": {}, "widgets_values": ["ComfyUI"]}, {"id": 4, "type": "CheckpointLoaderSimple", "pos": [26, 474], "size": {"0": 315, "1": 98}, "flags": {}, "order": 1, "mode": 0, "outputs": [{"name": "MODEL", "type": "MODEL", "links": [1], "slot_index": 0}, {"name": "CLIP", "type": "CLIP", "links": [3, 5], "slot_index": 1}, {"name": "VAE", "type": "VAE", "links": [8], "slot_index": 2}], "properties": {"Node name for S&R": "CheckpointLoaderSimple"}, "widgets_values": ["dreamlike-photoreal-2.0.safetensors"]}], "links": [[1, 4, 0, 3, 0, "MODEL"], [2, 5, 0, 3, 3, "LATENT"], [3, 4, 1, 6, 0, "CLIP"], [4, 6, 0, 3, 1, "CONDITIONING"], [5, 4, 1, 7, 0, "CLIP"], [6, 7, 0, 3, 2, "CONDITIONING"], [7, 3, 0, 8, 0, "LATENT"], [8, 4, 2, 8, 1, "VAE"], [9, 8, 0, 9, 0, "IMAGE"]], "groups": [], "config": {}, "extra": {}, "version": 0.4}
\ No newline at end of file
diff --git a/06_gpu_and_ml/stable_diffusion/huggingface_token.png b/06_gpu_and_ml/stable_diffusion/huggingface_token.png
deleted file mode 100644
index 618b48d24..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/huggingface_token.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
index 9bfc2c84d..2494abbd7 100644
--- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
+++ b/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
@@ -2,12 +2,13 @@
 # output-directory: "/tmp/stable-diffusion"
 # args: ["--prompt", "A 1600s oil painting of the New York City skyline"]
 # tags: ["use-case-image-video-3d"]
+# deploy: true
 # ---
 
-# # Run Stable Diffusion 3.5 Large Turbo from the command line
+# # Run Stable Diffusion 3.5 Large Turbo as a CLI, API, and web UI
 
 # This example shows how to run [Stable Diffusion 3.5 Large Turbo](https://huggingface.co/stabilityai/stable-diffusion-3.5-large-turbo) on Modal
-# and generate images from your local command line.
+# to generate images from your local command line, via an API, and as a web UI.
 
 # Inference takes about one minute to cold start,
 # at which point images are generated at a rate of one image every 1-2 seconds
@@ -47,19 +48,20 @@
     .pip_install(
         "accelerate==0.33.0",
         "diffusers==0.31.0",
+        "fastapi[standard]==0.115.4",
         "huggingface-hub[hf_transfer]==0.25.2",
         "sentencepiece==0.2.0",
         "torch==2.5.1",
         "torchvision==0.20.1",
         "transformers~=4.44.0",
     )
-    .entrypoint([])  # deactivate default entrypoint to reduce log verbosity
     .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})  # faster downloads
 )
 
 with image.imports():
     import diffusers
     import torch
+    from fastapi import Response
 
 # ## Implementing SD3.5 Large Turbo inference on Modal
 
@@ -67,9 +69,14 @@
 # that ensures models are downloaded when we `build` our container image (just like our dependencies)
 # and that models are loaded and then moved to the GPU when a new container starts.
 
-# The `run_inference` function just wraps a `diffusers` pipeline.
+# The `run` function just wraps a `diffusers` pipeline.
 # It sends the output image back to the client as bytes.
 
+# We also include a `web` wrapper that makes it possible
+# to trigger inference via an API call.
+# See the `/docs` route of the URL ending in `inference-web.modal.run`
+# that appears when you deploy the app for details.
+
 model_id = "adamo1139/stable-diffusion-3.5-large-turbo-ungated"
 model_revision_id = "9ad870ac0b0e5e48ced156bb02f85d324b7275d2"
 
@@ -79,7 +86,7 @@
     gpu="H100",
     timeout=10 * MINUTES,
 )
-class StableDiffusion:
+class Inference:
     @modal.build()
     @modal.enter()
     def initialize(self):
@@ -94,7 +101,7 @@ def move_to_gpu(self):
         self.pipe.to("cuda")
 
     @modal.method()
-    def run_inference(
+    def run(
         self, prompt: str, batch_size: int = 4, seed: int = None
     ) -> list[bytes]:
         seed = seed if seed is not None else random.randint(0, 2**32 - 1)
@@ -116,8 +123,17 @@ def run_inference(
         torch.cuda.empty_cache()  # reduce fragmentation
         return image_output
 
+    @modal.web_endpoint(docs=True)
+    def web(self, prompt: str, seed: int = None):
+        return Response(
+            content=self.run.local(  # run in the same container
+                prompt, batch_size=1, seed=seed
+            )[0],
+            media_type="image/png",
+        )
+
 
-# ## Generating images from the command line
+# ## Generating Stable Diffusion images from the command line
 
 # This is the command we'll use to generate images. It takes a text `prompt`,
 # a `batch_size` that determines the number of images to generate per prompt,
@@ -125,6 +141,11 @@ def run_inference(
 
 # You can also provide a `seed` to make sampling more deterministic.
 
+# Run it with
+# ```bash
+# modal run stable_diffusion_cli.py
+# ```
+
 
 @app.local_entrypoint()
 def entrypoint(
@@ -144,11 +165,11 @@ def entrypoint(
     output_dir = Path("/tmp/stable-diffusion")
     output_dir.mkdir(exist_ok=True, parents=True)
 
-    sd = StableDiffusion()
+    inference_service = Inference()
 
     for sample_idx in range(samples):
         start = time.time()
-        images = sd.run_inference.remote(prompt, batch_size, seed)
+        images = inference_service.run.remote(prompt, batch_size, seed)
         duration = time.time() - start
         print(f"Run {sample_idx+1} took {duration:.3f}s")
         if sample_idx:
@@ -169,5 +190,56 @@ def entrypoint(
             output_path.write_bytes(image_bytes)
 
 
+# ## Generating Stable Diffusion images in a web UI
+
+# Lastly, we add a simple web application that exposes a front-end (written in Alpine.js) for
+# our image generation backend.
+
+# The `Inference` class will serve multiple users from its own shared pool of warm GPU containers automatically.
+
+# We can deploy this with `modal deploy stable_diffusion_cli.py`.
+
+frontend_path = Path(__file__).parent / "frontend"
+
+web_image = modal.Image.debian_slim(python_version="3.12").pip_install(
+    "jinja2", "fastapi[standard]==0.115.4"
+)
+
+
+@app.function(
+    image=web_image,
+    mounts=[modal.Mount.from_local_dir(frontend_path, remote_path="/assets")],
+    allow_concurrent_inputs=1000,
+)
+@modal.asgi_app()
+def ui():
+    import fastapi.staticfiles
+    from fastapi import FastAPI, Request
+    from fastapi.templating import Jinja2Templates
+
+    web_app = FastAPI()
+    templates = Jinja2Templates(directory="/assets")
+
+    @web_app.get("/")
+    async def read_root(request: Request):
+        return templates.TemplateResponse(
+            "index.html",
+            {
+                "request": request,
+                "inference_url": Inference.web.web_url,
+                "model_name": "Stable Diffusion 3.5 Large Turbo",
+                "default_prompt": "A cinematic shot of a baby raccoon wearing an intricate italian priest robe.",
+            },
+        )
+
+    web_app.mount(
+        "/static",
+        fastapi.staticfiles.StaticFiles(directory="/assets"),
+        name="static",
+    )
+
+    return web_app
+
+
 def slugify(s: str) -> str:
     return "".join(c if c.isalnum() else "-" for c in s).strip("-")
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_latencies.png b/06_gpu_and_ml/stable_diffusion/stable_diffusion_latencies.png
deleted file mode 100644
index 1a70edc1f..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/stable_diffusion_latencies.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_montage.png b/06_gpu_and_ml/stable_diffusion/stable_diffusion_montage.png
deleted file mode 100644
index 593919725..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/stable_diffusion_montage.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_input.png b/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_input.png
deleted file mode 100644
index 022c2d13a..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_input.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_output.png b/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_output.png
deleted file mode 100644
index 6123597cf..000000000
Binary files a/06_gpu_and_ml/stable_diffusion/stable_diffusion_turbo_output.png and /dev/null differ
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py
deleted file mode 100644
index 14802cef9..000000000
--- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl.py
+++ /dev/null
@@ -1,220 +0,0 @@
-# ---
-# output-directory: "/tmp/stable-diffusion-xl"
-# args: ["--prompt", "An astronaut riding a green horse"]
-# runtimes: ["runc", "gvisor"]
-# tags: ["use-case-image-video-3d"]
-# ---
-# # Stable Diffusion XL 1.0
-#
-# This example is similar to the [Stable Diffusion CLI](/docs/examples/stable_diffusion_cli)
-# example, but it generates images from the larger SDXL 1.0 model. Specifically, it runs the
-# first set of steps with the base model, followed by the refiner model.
-#
-# [Try out the live demo here!](https://modal-labs--stable-diffusion-xl-ui.modal.run/) The first
-# generation may include a cold-start, which takes around 20 seconds. The inference speed depends on the GPU
-# and step count (for reference, an A100 runs 40 steps in 8 seconds).
-
-# ## Basic setup
-
-import io
-from pathlib import Path
-
-import modal
-
-# ## Define a container image
-#
-# To take advantage of Modal's blazing fast cold-start times, we'll need to download our model weights
-# inside our container image with a download function. We ignore binaries, ONNX weights and 32-bit weights.
-#
-# Tip: avoid using global variables in this function to ensure the download step detects model changes and
-# triggers a rebuild.
-
-
-sdxl_image = (
-    modal.Image.debian_slim(python_version="3.10")
-    .apt_install(
-        "libglib2.0-0", "libsm6", "libxrender1", "libxext6", "ffmpeg", "libgl1"
-    )
-    .pip_install(
-        "diffusers==0.26.3",
-        "huggingface-hub~=0.25.2",
-        "invisible_watermark==0.2.0",
-        "transformers~=4.38.2",
-        "accelerate==0.27.2",
-        "safetensors==0.4.2",
-        "fastapi[standard]==0.115.4",
-        "pydantic==2.9.2",
-        "starlette==0.41.2",
-    )
-)
-
-app = modal.App("stable-diffusion-xl")
-
-with sdxl_image.imports():
-    import torch
-    from diffusers import DiffusionPipeline
-    from fastapi import Response
-
-# ## Load model and run inference
-#
-# The container lifecycle [`@enter` decorator](https://modal.com/docs/guide/lifecycle-functions#container-lifecycle-beta)
-# loads the model at startup. Then, we evaluate it in the `run_inference` function.
-#
-# To avoid excessive cold-starts, we set the idle timeout to 240 seconds, meaning once a GPU has loaded the model it will stay
-# online for 4 minutes before spinning down. This can be adjusted for cost/experience trade-offs.
-
-
-@app.cls(gpu=modal.gpu.A10G(), container_idle_timeout=240, image=sdxl_image)
-class Model:
-    @modal.build()
-    def build(self):
-        from huggingface_hub import snapshot_download
-
-        ignore = [
-            "*.bin",
-            "*.onnx_data",
-            "*/diffusion_pytorch_model.safetensors",
-        ]
-        snapshot_download(
-            "stabilityai/stable-diffusion-xl-base-1.0", ignore_patterns=ignore
-        )
-        snapshot_download(
-            "stabilityai/stable-diffusion-xl-refiner-1.0",
-            ignore_patterns=ignore,
-        )
-
-    @modal.enter()
-    def enter(self):
-        load_options = dict(
-            torch_dtype=torch.float16,
-            use_safetensors=True,
-            variant="fp16",
-            device_map="auto",
-        )
-
-        # Load base model
-        self.base = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0", **load_options
-        )
-
-        # Load refiner model
-        self.refiner = DiffusionPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-refiner-1.0",
-            text_encoder_2=self.base.text_encoder_2,
-            vae=self.base.vae,
-            **load_options,
-        )
-
-        # Compiling the model graph is JIT so this will increase inference time for the first run
-        # but speed up subsequent runs. Uncomment to enable.
-        # self.base.unet = torch.compile(self.base.unet, mode="reduce-overhead", fullgraph=True)
-        # self.refiner.unet = torch.compile(self.refiner.unet, mode="reduce-overhead", fullgraph=True)
-
-    def _inference(self, prompt, n_steps=24, high_noise_frac=0.8):
-        negative_prompt = "disfigured, ugly, deformed"
-        image = self.base(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            num_inference_steps=n_steps,
-            denoising_end=high_noise_frac,
-            output_type="latent",
-        ).images
-        image = self.refiner(
-            prompt=prompt,
-            negative_prompt=negative_prompt,
-            num_inference_steps=n_steps,
-            denoising_start=high_noise_frac,
-            image=image,
-        ).images[0]
-
-        byte_stream = io.BytesIO()
-        image.save(byte_stream, format="JPEG")
-
-        return byte_stream
-
-    @modal.method()
-    def inference(self, prompt, n_steps=24, high_noise_frac=0.8):
-        return self._inference(
-            prompt, n_steps=n_steps, high_noise_frac=high_noise_frac
-        ).getvalue()
-
-    @modal.web_endpoint(docs=True)
-    def web_inference(
-        self, prompt: str, n_steps: int = 24, high_noise_frac: float = 0.8
-    ):
-        return Response(
-            content=self._inference(
-                prompt, n_steps=n_steps, high_noise_frac=high_noise_frac
-            ).getvalue(),
-            media_type="image/jpeg",
-        )
-
-
-# And this is our entrypoint; where the CLI is invoked. Explore CLI options
-# with: `modal run stable_diffusion_xl.py --help
-
-
-@app.local_entrypoint()
-def main(prompt: str = "Unicorns and leprechauns sign a peace treaty"):
-    image_bytes = Model().inference.remote(prompt)
-
-    dir = Path("/tmp/stable-diffusion-xl")
-    if not dir.exists():
-        dir.mkdir(exist_ok=True, parents=True)
-
-    output_path = dir / "output.jpg"
-    print(f"Saving it to {output_path}")
-    with open(output_path, "wb") as f:
-        f.write(image_bytes)
-
-
-# ## A user interface
-#
-# Here we ship a simple web application that exposes a front-end (written in Alpine.js) for
-# our backend deployment.
-#
-# The Model class will serve multiple users from its own shared pool of warm GPU containers automatically.
-#
-# We can deploy this with `modal deploy stable_diffusion_xl.py`.
-#
-# Because the `web_endpoint` decorator on our `web_inference` function has the `docs` flag set to `True`,
-# we also get interactive documentation for our endpoint at `/docs`.
-
-frontend_path = Path(__file__).parent / "frontend"
-
-web_image = modal.Image.debian_slim().pip_install("jinja2")
-
-
-@app.function(
-    image=web_image,
-    mounts=[modal.Mount.from_local_dir(frontend_path, remote_path="/assets")],
-    allow_concurrent_inputs=20,
-)
-@modal.asgi_app()
-def ui():
-    import fastapi.staticfiles
-    from fastapi import FastAPI, Request
-    from fastapi.templating import Jinja2Templates
-
-    web_app = FastAPI()
-    templates = Jinja2Templates(directory="/assets")
-
-    @web_app.get("/")
-    async def read_root(request: Request):
-        return templates.TemplateResponse(
-            "index.html",
-            {
-                "request": request,
-                "inference_url": Model.web_inference.web_url,
-                "model_name": "Stable Diffusion XL",
-                "default_prompt": "A cinematic shot of a baby raccoon wearing an intricate italian priest robe.",
-            },
-        )
-
-    web_app.mount(
-        "/static",
-        fastapi.staticfiles.StaticFiles(directory="/assets"),
-        name="static",
-    )
-
-    return web_app
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_lightning.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_lightning.py
deleted file mode 100644
index 1ba491114..000000000
--- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_lightning.py
+++ /dev/null
@@ -1,165 +0,0 @@
-# ---
-# output-directory: "/tmp/stable-diffusion-xl-lightning"
-# tags: ["use-case-image-video-3d"]
-# ---
-# # Run SDXL Lightning on Modal
-#
-# This example runs [SDXL-Lightning](https://huggingface.co/ByteDance/SDXL-Lightning) by ByteDance, a fast text-to-image model that generates high quality images in just a few steps.
-#
-
-from pathlib import Path
-
-import modal
-
-app = modal.App("stable-diffusion-xl-lightning")
-
-image = modal.Image.debian_slim(python_version="3.11").pip_install(
-    "diffusers==0.26.3",
-    "huggingface-hub==0.25.2",
-    "transformers~=4.37.2",
-    "accelerate==0.27.2",
-    "fastapi[standard]==0.115.4",
-    "pydantic==2.9.2",
-    "starlette==0.41.2",
-)
-
-base = "stabilityai/stable-diffusion-xl-base-1.0"
-repo = "ByteDance/SDXL-Lightning"
-ckpt = "sdxl_lightning_4step_unet.safetensors"
-
-
-with image.imports():
-    import io
-
-    import torch
-    from diffusers import (
-        EulerDiscreteScheduler,
-        StableDiffusionXLPipeline,
-        UNet2DConditionModel,
-    )
-    from fastapi import Response
-    from huggingface_hub import hf_hub_download
-    from safetensors.torch import load_file
-
-
-@app.cls(image=image, gpu="a100")
-class Model:
-    @modal.build()
-    @modal.enter()
-    def load_weights(self):
-        unet = UNet2DConditionModel.from_config(base, subfolder="unet").to(
-            "cuda", torch.float16
-        )
-        unet.load_state_dict(
-            load_file(hf_hub_download(repo, ckpt), device="cuda")
-        )
-        self.pipe = StableDiffusionXLPipeline.from_pretrained(
-            base, unet=unet, torch_dtype=torch.float16, variant="fp16"
-        ).to("cuda")
-
-        self.pipe.scheduler = EulerDiscreteScheduler.from_config(
-            self.pipe.scheduler.config, timestep_spacing="trailing"
-        )
-
-    def _inference(self, prompt, n_steps=4):
-        negative_prompt = "disfigured, ugly, deformed"
-        image = self.pipe(
-            prompt=prompt,
-            guidance_scale=0,
-            negative_prompt=negative_prompt,
-            num_inference_steps=n_steps,
-        ).images[0]
-
-        byte_stream = io.BytesIO()
-        image.save(byte_stream, format="JPEG")
-
-        return byte_stream
-
-    @modal.method()
-    def inference(self, prompt, n_steps=4):
-        return self._inference(
-            prompt,
-            n_steps=n_steps,
-        ).getvalue()
-
-    @modal.web_endpoint(docs=True)
-    def web_inference(self, prompt, n_steps=4):
-        return Response(
-            content=self._inference(
-                prompt,
-                n_steps=n_steps,
-            ).getvalue(),
-            media_type="image/jpeg",
-        )
-
-
-# And this is our entrypoint; where the CLI is invoked. Run this example
-# with: `modal run stable_diffusion_xl_lightning.py --prompt "An astronaut riding a green horse"`
-
-
-@app.local_entrypoint()
-def main(
-    prompt: str = "in the style of Dali, a surrealist painting of a weasel in a tuxedo riding a bicycle in the rain",
-):
-    image_bytes = Model().inference.remote(prompt)
-
-    dir = Path("/tmp/stable-diffusion-xl-lightning")
-    if not dir.exists():
-        dir.mkdir(exist_ok=True, parents=True)
-
-    output_path = dir / "output.png"
-    print(f"Saving it to {output_path}")
-    with open(output_path, "wb") as f:
-        f.write(image_bytes)
-
-
-# ## A user interface
-#
-# Here we ship a simple web application that exposes a front-end (written in Alpine.js) for
-# our backend deployment.
-#
-# The Model class will serve multiple users from a its own shared pool of warm GPU containers automatically.
-#
-# We can deploy this with `modal deploy stable_diffusion_xl_lightning.py`.
-#
-# Because the `web_endpoint` decorator on our `web_inference` function has the `docs` flag set to `True`,
-# we also get interactive documentation for our endpoint at `/docs`.
-
-frontend_path = Path(__file__).parent / "frontend"
-
-web_image = modal.Image.debian_slim().pip_install("jinja2")
-
-
-@app.function(
-    image=web_image,
-    mounts=[modal.Mount.from_local_dir(frontend_path, remote_path="/assets")],
-    allow_concurrent_inputs=20,
-)
-@modal.asgi_app()
-def ui():
-    import fastapi.staticfiles
-    from fastapi import FastAPI, Request
-    from fastapi.templating import Jinja2Templates
-
-    web_app = FastAPI()
-    templates = Jinja2Templates(directory="/assets")
-
-    @web_app.get("/")
-    async def read_root(request: Request):
-        return templates.TemplateResponse(
-            "index.html",
-            {
-                "request": request,
-                "inference_url": Model.web_inference.web_url,
-                "model_name": "Stable Diffusion XL Lightning",
-                "default_prompt": "A cinematic shot of a baby raccoon wearing an intricate Italian priest robe.",
-            },
-        )
-
-    web_app.mount(
-        "/static",
-        fastapi.staticfiles.StaticFiles(directory="/assets"),
-        name="static",
-    )
-
-    return web_app
diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py
deleted file mode 100644
index 16e577ce1..000000000
--- a/06_gpu_and_ml/stable_diffusion/stable_diffusion_xl_turbo.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# ---
-# output-directory: "/tmp/stable-diffusion-xl-turbo"
-# args: []
-# runtimes: ["runc", "gvisor"]
-# tags: ["use-case-image-video-3d"]
-# ---
-# # Stable Diffusion XL Turbo Image-to-image
-#
-# This example is similar to the [Stable Diffusion XL](/docs/examples/stable_diffusion_xl)
-# example, but it's a distilled model trained for real-time synthesis and is image-to-image. Learn more about it [here](https://stability.ai/news/stability-ai-sdxl-turbo).
-#
-# Input prompt:
-# `dog wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k`
-#
-# Input             |  Output
-# :-------------------------:|:-------------------------:
-# ![](./stable_diffusion_turbo_input.png)  |  ![](./stable_diffusion_turbo_output.png)
-
-# ## Basic setup
-
-from io import BytesIO
-from pathlib import Path
-
-import modal
-
-# ## Define a container image
-
-
-image = modal.Image.debian_slim().pip_install(
-    "Pillow~=10.1.0",
-    "diffusers~=0.24.0",
-    "huggingface-hub~=0.25.2",
-    "transformers~=4.35.2",  # This is needed for `import torch`
-    "accelerate~=0.25.0",  # Allows `device_map="auto"``, which allows computation of optimized device_map
-    "safetensors~=0.4.1",  # Enables safetensor format as opposed to using unsafe pickle format
-)
-
-app = modal.App("stable-diffusion-xl-turbo", image=image)
-
-with image.imports():
-    import torch
-    from diffusers import AutoPipelineForImage2Image
-    from diffusers.utils import load_image
-    from huggingface_hub import snapshot_download
-    from PIL import Image
-
-
-# ## Load model and run inference
-#
-# The container lifecycle [`@enter` decorator](https://modal.com/docs/guide/lifecycle-functions#container-lifecycle-beta)
-# loads the model at startup. Then, we evaluate it in the `inference` function.
-#
-# To avoid excessive cold-starts, we set the idle timeout to 240 seconds, meaning once a GPU has loaded the model it will stay
-# online for 4 minutes before spinning down. This can be adjusted for cost/experience trade-offs.
-
-
-@app.cls(gpu=modal.gpu.A10G(), container_idle_timeout=240)
-class Model:
-    @modal.build()
-    def download_models(self):
-        # Ignore files that we don't need to speed up download time.
-        ignore = [
-            "*.bin",
-            "*.onnx_data",
-            "*/diffusion_pytorch_model.safetensors",
-        ]
-
-        snapshot_download("stabilityai/sdxl-turbo", ignore_patterns=ignore)
-
-    @modal.enter()
-    def enter(self):
-        self.pipe = AutoPipelineForImage2Image.from_pretrained(
-            "stabilityai/sdxl-turbo",
-            torch_dtype=torch.float16,
-            variant="fp16",
-            device_map="auto",
-        )
-
-    @modal.method()
-    def inference(self, image_bytes, prompt):
-        init_image = load_image(Image.open(BytesIO(image_bytes))).resize(
-            (512, 512)
-        )
-        num_inference_steps = 4
-        strength = 0.9
-        # "When using SDXL-Turbo for image-to-image generation, make sure that num_inference_steps * strength is larger or equal to 1"
-        # See: https://huggingface.co/stabilityai/sdxl-turbo
-        assert num_inference_steps * strength >= 1
-
-        image = self.pipe(
-            prompt,
-            image=init_image,
-            num_inference_steps=num_inference_steps,
-            strength=strength,
-            guidance_scale=0.0,
-        ).images[0]
-
-        byte_stream = BytesIO()
-        image.save(byte_stream, format="PNG")
-        image_bytes = byte_stream.getvalue()
-
-        return image_bytes
-
-
-DEFAULT_IMAGE_PATH = Path(__file__).parent / "demo_images/dog.png"
-
-
-@app.local_entrypoint()
-def main(
-    image_path=DEFAULT_IMAGE_PATH,
-    prompt="dog wizard, gandalf, lord of the rings, detailed, fantasy, cute, adorable, Pixar, Disney, 8k",
-):
-    with open(image_path, "rb") as image_file:
-        input_image_bytes = image_file.read()
-        output_image_bytes = Model().inference.remote(input_image_bytes, prompt)
-
-    dir = Path("/tmp/stable-diffusion-xl-turbo")
-    if not dir.exists():
-        dir.mkdir(exist_ok=True, parents=True)
-
-    output_path = dir / "output.png"
-    print(f"Saving it to {output_path}")
-    with open(output_path, "wb") as f:
-        f.write(output_image_bytes)
-
-
-# ## Running the model
-#
-# We can run the model with different parameters using the following command,
-# ```
-# modal run stable_diffusion_xl_turbo.py --prompt="harry potter, glasses, wizard" --image-path="dog.png"
-# ```