Merge branch 'main' into charlesfrye/faster-download-llamacpp

modal-labs · Nov 18, 2024 · 1c7971a · 1c7971a
2 parents bbff2ad + 38eee3b
commit 1c7971a
Show file tree

Hide file tree

Showing 7 changed files with 118 additions and 88 deletions.
diff --git a/06_gpu_and_ml/comfyui/comfyapp.py b/06_gpu_and_ml/comfyui/comfyapp.py
@@ -5,19 +5,15 @@
 #
 # # Run Flux on ComfyUI interactively and as an API
 #
-# [ComfyUI](https://github.com/comfyanonymous/ComfyUI) is an open-source Stable Diffusion GUI with a graph/nodes based interface that allows you to design and execute advanced image generation pipelines.
+# [ComfyUI](https://github.com/comfyanonymous/ComfyUI) is an open-source diffusion model platform with a graph/nodes interface that allows you to design and execute advanced image generation pipelines.
 
-# Flux is a family of cutting-edge text-to-image models created by [black forest labs](https://huggingface.co/black-forest-labs), rapidly gaining popularity due to their exceptional image quality.
 #
 # In this example, we show you how to
 #
-# 1. run Flux on ComfyUI interactively to develop workflows
+# 1. run the [Flux](https://huggingface.co/docs/diffusers/main/en/api/pipelines/flux) diffusion model on ComfyUI interactively to develop workflows
 #
 # 2. serve a Flux ComfyUI workflow as an API
 #
-# Combining the UI and the API in a single app makes it easy to iterate on your workflow even after deployment.
-# Simply head to the interactive UI, make your changes, export the JSON, and redeploy the app.
-#
 # ## Quickstart
 #
 # This example runs `workflow_api.json` in this directory, which is an adapation of [this simple FLUX.1-schnell workflow](https://openart.ai/workflows/reverentelusarca/flux-simple-workflow-schnell/40OkdaB23J2TMTXHmxxu) with an Image Resize custom node added at the end.
@@ -28,13 +24,17 @@
 # ![example comfyui image](./flux_gen_image.jpeg)
 #
 # To serve the workflow in this example as an API:
-# 1. Stand up the ComfyUI server in development mode:
+# 1. Download the Flux models to a Modal [Volume](/docs/guide/volumes):
+# ```bash
+# modal run 06_gpu_and_ml/comfyui/comfyapp.py::download_models
+# ```
+#
+# 2. Stand up the ComfyUI server in development mode:
 # ```bash
 # modal serve 06_gpu_and_ml/comfyui/comfyapp.py
 # ```
-# Note: if you're running this for the first time, it will take several minutes to build the image, since we have to download the Flux models (>20GB) to the container. Successive calls will reuse this prebuilt image.
 #
-# 2. In another terminal, run inference:
+# 3. In another terminal, run inference:
 # ```bash
 # python 06_gpu_and_ml/comfyui/comfyclient.py --dev --modal-workspace $(modal profile current) --prompt "neon green sign that says Modal"
 # ```
@@ -54,9 +54,6 @@
 
 # ### Building up the environment
 #
-# ComfyUI setups can be complex, with a lot of custom nodes and models to manage.
-# We'll use [`comfy-cli`](https://github.com/Comfy-Org/comfy-cli) to manage the installation of ComfyUI, its dependencies, models, and custom nodes.
-#
 # We start from a base image and specify all of our dependencies.
 # We'll call out the interesting ones as they come up below.
 #
@@ -76,70 +73,81 @@
         "comfy --skip-prompt install --nvidia"
     )
 )
-
-# #### Downloading models
-#
-# We'll download the Flux models using `comfy-cli`.
-# ComfyUI will look for these models in the `models` subdirectory under specific subdirectories
-# (e.g. `vae`, `unet`, `clip`, etc.), so we need to download them into the correct location.
-#
-# You can run multiple commands using comma separated commands in `.run_commands()`.
-# But here we opt to split them up to allow for more granular layer caching in the Modal Image.
-# By appending a model install using `.run_commands(...)` at the end of this build step we ensure
-# that the previous steps remain un-changed and will be cached, avoiding unnecessary re-runs.
-
+# ### Downloading custom nodes
+# We'll use `comfy-cli` to download custom nodes, in this case the popular WAS Node Suite pack.
 image = (
-    image.run_commands(
-        "comfy --skip-prompt model download --url https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors --relative-path models/clip"
-    )
-    .run_commands(
-        "comfy --skip-prompt model download --url https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors --relative-path models/clip"
-    )
-    .run_commands(
-        "comfy --skip-prompt model download --url https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/ae.safetensors --relative-path models/vae"
-    )
-    .run_commands(
-        "comfy --skip-prompt model download --url https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/flux1-schnell.safetensors --relative-path models/unet"
+    image.run_commands(  # download a custom node
+        "comfy node install was-node-suite-comfyui"
     )
-    # Add .run_commands(...) calls for any other models you want to download
+    # Add .run_commands(...) calls for any other custom nodes you want to download
 )
 
-# To download gated models that require a Hugging Face token (e.g. Flux Dev), add `--set-hf-api-token=<your_token>` to your `comfy model download` command.
+# See [this post](/blog/comfyui-custom-nodes) for more on how to install custom nodes on Modal.
+# ### Downloading models
 
-# #### Downloading custom nodes
-#
-# We'll download custom nodes using `comfy-cli` too.
-# Alternatively, you can install them by cloning the git repositories to your `/root/comfy/ComfyUI/custom_nodes`
-# directory and installing the required dependencies manually.
-#
-# Similarly to models, we opt to split the custom node installation into separate `.run_commands(...)` calls
-# to allow for more granular layer caching.
+# You can also use comfy-cli to download models, but for this example we'll download the Flux models directly from Hugging Face into a Modal Volume.
+# Then on container start, we'll mount our models into the ComfyUI models directory.
+# This allows us to avoid re-downloading the models every time you rebuild your image.
 
 image = (
-    image.run_commands(  # download a custom node
-        "comfy node install image-resize-comfyui"
+    # install huggingface_hub with hf_transfer support to speed up downloads
+    image.pip_install("huggingface_hub[hf_transfer]==0.26.2")
+    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
+    .run_commands(  # needs to be empty for Volume mount to work
+        "rm -rf /root/comfy/ComfyUI/models"
     )
-    # Add .run_commands(...) calls for any other custom nodes you want to download
 )
 
-# #### Adding more dependencies
+# We create the app and specify the image we built above.
+
+app = modal.App(name="example-comfyui", image=image)
+
 #
-# To add more dependencies, models or custom nodes without having to rebuild the entire image
-# it's recommended to append them at the end of your image build rather than modifying previous steps.
-# This allows you to cache all previous steps and only build the new steps when you make changes to the image.
+# First we need to run a function to download the Flux models to a Modal Volume.
 
-image = (
-    image  # Add any additional steps here
-    # .run_commands(...)
-    # .pip_install(...)
-    # .apt_install(...)
+vol = modal.Volume.from_name("comfyui-models", create_if_missing=True)
+
+
+@app.function(
+    volumes={"/root/models": vol},
 )
+def hf_download(repo_id: str, filename: str, model_type: str):
+    from huggingface_hub import hf_hub_download
 
-# #### Create the app
-#
-# We create the app and specify the image we built above.
+    hf_hub_download(
+        repo_id=repo_id,
+        filename=filename,
+        local_dir=f"/root/models/{model_type}",
+    )
 
-app = modal.App(name="example-comfyui", image=image)
+
+# We can kick off the model downloads in parallel using [`starmap`](/docs/reference/modal.Function#starmap).
+@app.local_entrypoint()
+def download_models():
+    models_to_download = [
+        # format is (huggingface repo_id, the model filename, comfyui models subdirectory we want to save the model in)
+        (
+            "black-forest-labs/FLUX.1-schnell",
+            "ae.safetensors",
+            "vae",
+        ),
+        (
+            "black-forest-labs/FLUX.1-schnell",
+            "flux1-schnell.safetensors",
+            "unet",
+        ),
+        (
+            "comfyanonymous/flux_text_encoders",
+            "t5xxl_fp8_e4m3fn.safetensors",
+            "clip",
+        ),
+        ("comfyanonymous/flux_text_encoders", "clip_l.safetensors", "clip"),
+    ]
+    list(hf_download.starmap(models_to_download))
+
+
+# To run the download step, run `modal run 06_gpu_and_ml/comfyui/comfyapp.py::download_models`.
+# By leveraging [hf_transfer](https://huggingface.co/docs/huggingface_hub/en/guides/download#faster-downloads), Modal starmap for parallelism, and Volumes, image build time drops from ~10 minutes to ~25 seconds.
 
 # ## Running ComfyUI interactively and as an API on Modal
 #
@@ -152,6 +160,7 @@
     container_idle_timeout=30,
     timeout=1800,
     gpu="A10G",
+    volumes={"/root/comfy/ComfyUI/models": vol},
 )
 @modal.web_server(8000, startup_timeout=60)
 def ui():
@@ -179,6 +188,7 @@ def ui():
             "/root/workflow_api.json",
         ),
     ],
+    volumes={"/root/comfy/ComfyUI/models": vol},
 )
 class ComfyUI:
     @modal.enter()
@@ -247,6 +257,6 @@ def api(self, item: Dict):
 # Then, redeploy the app with this new workflow by running `modal deploy 06_gpu_and_ml/comfyui/comfyapp.py` again.
 #
 # ## Further optimizations
-# - To decrease inference latency, you can process multiple inputs in parallel by setting `allow_concurrent_inputs=1`, which will run each input on its own container. This will reduce overall response time, but will cost you more money. See our [Scaling ComfyUI](https://modal.com/blog/scaling-comfyui) blog post for more details.
+# - To decrease inference latency, you can process multiple inputs in parallel by setting `allow_concurrent_inputs=1`, which will run each input on its own container. See our [Scaling ComfyUI](https://modal.com/blog/scaling-comfyui) blog post for more details.
 # - If you're noticing long startup times for the ComfyUI server (e.g. >30s), this is likely due to too many custom nodes being loaded in. Consider breaking out your deployments into one App per unique combination of models and custom nodes.
 # - For those who prefer to run a ComfyUI workflow directly as a Python script, see [this blog post](https://modal.com/blog/comfyui-prototype-to-production).
diff --git a/06_gpu_and_ml/comfyui/workflow_api.json b/06_gpu_and_ml/comfyui/workflow_api.json
@@ -33,7 +33,7 @@
   "9": {
     "inputs": {
       "filename_prefix": "ComfyUI",
-      "images": ["26", 0]
+      "images": ["28", 0]
     },
     "class_type": "SaveImage",
     "_meta": {
@@ -116,37 +116,35 @@
   },
   "25": {
     "inputs": {
-      "noise_seed": 857914953840122
+      "noise_seed": 229383932224230
     },
     "class_type": "RandomNoise",
     "_meta": {
       "title": "RandomNoise"
     }
   },
-  "26": {
+  "27": {
     "inputs": {
-      "action": "crop to ratio",
-      "smaller_side": 0,
-      "larger_side": 0,
-      "scale_factor": 2,
-      "resize_mode": "any",
-      "side_ratio": "1:1",
-      "crop_pad_position": 0.5,
-      "pad_feathering": 20,
-      "pixels": ["8", 0]
+      "images": ["8", 0]
     },
-    "class_type": "ImageResize",
+    "class_type": "PreviewImage",
     "_meta": {
-      "title": "Image Resize"
+      "title": "Preview Image"
     }
   },
-  "27": {
+  "28": {
     "inputs": {
-      "images": ["8", 0]
+      "mode": "rescale",
+      "supersample": "true",
+      "resampling": "lanczos",
+      "rescale_factor": 2,
+      "resize_width": 2048,
+      "resize_height": 2048,
+      "image": ["8", 0]
     },
-    "class_type": "PreviewImage",
+    "class_type": "Image Resize",
     "_meta": {
-      "title": "Preview Image"
+      "title": "Image Resize"
     }
   }
 }
diff --git a/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py b/06_gpu_and_ml/tensorflow/tensorflow_tutorial.py
@@ -28,8 +28,8 @@
 import modal
 
 dockerhub_image = modal.Image.from_registry(
-    "tensorflow/tensorflow:2.12.0-gpu",
-).pip_install("protobuf==3.20.*")
+    "tensorflow/tensorflow:2.15.0-gpu",
+)
 
 app = modal.App("example-tensorflow-tutorial", image=dockerhub_image)
 

diff --git a/06_gpu_and_ml/text-to-video/mochi.py b/06_gpu_and_ml/text-to-video/mochi.py
@@ -61,7 +61,7 @@
     )
 )
 
-app = modal.App("example-mochi", image=image)
+app = modal.App("example-mochi")
 
 with image.imports():
     import numpy as np
@@ -85,7 +85,7 @@
 model = modal.Volume.from_name("mochi-model", create_if_missing=True)
 outputs = modal.Volume.from_name("mochi-outputs", create_if_missing=True)
 
-MODEL_CACHE = Path("/root/.cache")  # remote path for saving the model
+MODEL_CACHE = Path("/models")  # remote path for saving the model
 OUTPUTS_PATH = "/outputs"  # remote path for saving video outputs
 
 # We download the model using the `hf-transfer`
@@ -116,12 +116,23 @@
         "transformers",
         "sentencepiece",
     )
-    .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
+    .env(
+        {
+            "HF_HUB_ENABLE_HF_TRANSFER": "1",
+            "HF_HOME": str(MODEL_CACHE / "huggingface"),
+        }
+    )
+)
+
+image = image.env(  # so we look for the model in the right place
+    {"HF_HOME": str(MODEL_CACHE / "huggingface")}
 )
 
 
 @app.function(
-    volumes={MODEL_CACHE: model}, timeout=2 * HOURS, image=download_image
+    volumes={MODEL_CACHE: model},
+    timeout=2 * HOURS,
+    image=download_image,
 )
 def download_model(
     model_revision: str = "8e9673c5349979457e515fddd38911df6b4ca07f",
@@ -215,11 +226,11 @@ def main(
     # boot takes a while, so we keep the container warm for 20 minutes after the last call finishes
     timeout=1 * HOURS,
     container_idle_timeout=20 * MINUTES,
+    image=image,
 )
 class Mochi:
     @modal.enter()
     def load_model(self):
-        model.reload()
         ray.init()
         model_path = MODEL_CACHE / "mochi-1-preview"
         vae_stats_path = f"{model_path}/vae_stats.json"
@@ -232,7 +243,7 @@ def load_model(self):
                 f"🍡 WARNING: Mochi requires at least 4xH100 GPUs, but only {num_gpus} GPU(s) are available."
             )
         print(
-            f"🍡 loading model to {num_gpus} GPUs. This can take 5-15 minutes."
+            f"🍡 loading model to {num_gpus} GPUs. This can take a few minutes."
         )
         self.model = MochiWrapper(
             num_workers=num_gpus,

diff --git a/06_gpu_and_ml/torch_profiling.py b/06_gpu_and_ml/torch_profiling.py
@@ -1,5 +1,7 @@
 # # Tracing and profiling GPU-accelerated PyTorch programs on Modal
 
+# ![A PyTorch trace loaded into ui.perfetto.dev](https://modal-public-assets.s3.amazonaws.com/tmpx_2c9bl5_c5aa7ab0.webp)
+
 # GPUs are high-performance computing devices. For high-performance computing,
 # tools for measuring and investigating performance are as critical
 # as tools for testing and confirming correctness in typical software.
@@ -9,6 +11,9 @@
 # how to host TensorBoard, which includes useful visualizations and
 # performance improvement suggestions.
 
+# For a live walkthrough, check out
+# [this video on our YouTube channel](https://www.youtube.com/watch?v=4cesQJLyHA8).
+
 # ## Saving traces to a Modal Volume
 
 # Most tracing tools, including PyTorch's profiler, produce results as files on disk.

diff --git a/13_sandboxes/codelangchain/agent.py b/13_sandboxes/codelangchain/agent.py
@@ -2,6 +2,7 @@
 # cmd: ["modal", "run", "13_sandboxes.codelangchain.agent", "--question", "Use gpt2 and transformers to generate text"]
 # tags: ["featured", "use-case-sandboxed-code-execution"]
 # pytest: false
+# env: {"MODAL_AUTOMOUNT": "True"}
 # ---
 
 # # Build a coding agent with Modal Sandboxes and LangGraph

diff --git a/internal/utils.py b/internal/utils.py
@@ -34,6 +34,9 @@ class Example(BaseModel):
     cli_args: Optional[list] = None  # Full command line args to run it
     stem: Optional[str] = None  # stem of path
     tags: Optional[list[str]] = None  # metadata tags for the example
+    env: Optional[
+        dict[str, str]
+    ] = None  # environment variables for the example
 
 
 _RE_NEWLINE = re.compile(r"\r?\n")
@@ -119,6 +122,7 @@ def gather_example_files(
                 cmd = metadata.get("cmd", ["modal", "run", repo_filename])
                 args = metadata.get("args", [])
                 tags = metadata.get("tags", [])
+                env = metadata.get("env", dict())
                 yield Example(
                     type=ExampleType.MODULE,
                     filename=filename_abs,
@@ -128,6 +132,7 @@ def gather_example_files(
                     cli_args=(cmd + args),
                     stem=Path(filename_abs).stem,
                     tags=tags,
+                    env=env,
                 )
             elif ext in [".png", ".jpeg", ".jpg", ".gif", ".mp4"]:
                 yield Example(