update comfyui example to use flux (#874)

modal-labs · Sep 13, 2024 · b830830 · b830830
1 parent ca5aa70
commit b830830
Show file tree

Hide file tree

Showing 5 changed files with 127 additions and 92 deletions.
diff --git a/06_gpu_and_ml/comfyui/comfyapp.py b/06_gpu_and_ml/comfyui/comfyapp.py
@@ -3,44 +3,44 @@
 # deploy: true
 # ---
 #
-# # Run ComfyUI interactively and as an API
+# # Run Flux on ComfyUI interactively and as an API
 #
-# [ComfyUI](https://github.com/comfyanonymous/ComfyUI) is a no-code Stable Diffusion GUI that allows you to design and execute advanced image generation pipelines.
-#
-# ![example comfyui image](./comfyui.png)
+# [ComfyUI](https://github.com/comfyanonymous/ComfyUI) is an open-source Stable Diffusion GUI with a graph/nodes based interface that allows you to design and execute advanced image generation pipelines.
+
+# Flux is a family of cutting-edge text-to-image models created by [black forest labs](https://huggingface.co/black-forest-labs), rapidly gaining popularity due to their exceptional image quality.
 #
 # In this example, we show you how to
 #
-# 1. run ComfyUI interactively to develop workflows
+# 1. run Flux on ComfyUI interactively to develop workflows
 #
-# 2. serve a ComfyUI workflow as an API
+# 2. serve a Flux ComfyUI workflow as an API
 #
 # Combining the UI and the API in a single app makes it easy to iterate on your workflow even after deployment.
 # Simply head to the interactive UI, make your changes, export the JSON, and redeploy the app.
 #
 # ## Quickstart
 #
-# This example serves the [ComfyUI inpainting example workflow](https://comfyanonymous.github.io/ComfyUI_examples/inpaint/),
-# which "fills in" part of an input image based on a prompt.
-# For the prompt `"Spider-Man visits Yosemite, rendered by Blender, trending on artstation"`
-# on [this input image](https://raw.githubusercontent.com/comfyanonymous/ComfyUI_examples/master/inpaint/yosemite_inpaint_example.png),
+# This example runs a [simple FLUX.1-schnell workflow](https://openart.ai/workflows/reverentelusarca/flux-simple-workflow-schnell/40OkdaB23J2TMTXHmxxu) with a simple Image Resize custom node at the end.
+#
+# For the prompt `"Surreal dreamscape with floating islands, upside-down waterfalls, and impossible geometric structures, all bathed in a soft, ethereal light"`
 # we got this output:
 #
-# ![example comfyui image](./comfyui_gen_image.jpg)
+# ![example comfyui image](./flux_gen_image.jpeg)
 #
+# To serve the workflow in this example as an API:
 # 1. Stand up the ComfyUI server in development mode:
 # ```bash
 # modal serve 06_gpu_and_ml/comfyui/comfyapp.py
 # ```
+# Note: if you're running this for the first time, it will take several minutes to build the image, since we have to download the Flux models (>20GB) to the container. Successive calls will reuse this prebuilt image.
 #
 # 2. In another terminal, run inference:
 # ```bash
 # python 06_gpu_and_ml/comfyui/comfyclient.py --dev --modal-workspace your-modal-workspace --prompt "your prompt here"
 # ```
 # You can find your Modal workspace name by running `modal profile current`.
 #
-# The first inference will take a bit longer because the server will need to boot up (~20-30s).
-# Successive inference calls while the server is up should take a few seconds or less.
+# Images generate in 1-2m (20-30s for the ComfyUI server to launch, ~1m for the workflow to complete).
 #
 # ## Setup
 #
@@ -60,12 +60,15 @@
         python_version="3.11"
     )
     .apt_install("git")  # install git to clone ComfyUI
-    .pip_install("comfy-cli==1.0.33")  # install comfy-cli
+    .pip_install("comfy-cli==1.1.8")  # install comfy-cli
     .run_commands(  # use comfy-cli to install the ComfyUI repo and its dependencies
-        "comfy --skip-prompt install --nvidia",
+        "comfy --skip-prompt install --nvidia"
     )
-    .run_commands(  # download the inpainting model
-        "comfy --skip-prompt model download --url https://huggingface.co/stabilityai/stable-diffusion-2-inpainting/resolve/main/512-inpainting-ema.safetensors --relative-path models/checkpoints"
+    .run_commands(  # download the flux models
+        "comfy --skip-prompt model download --url https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/t5xxl_fp8_e4m3fn.safetensors --relative-path models/clip",
+        "comfy --skip-prompt model download --url https://huggingface.co/comfyanonymous/flux_text_encoders/resolve/main/clip_l.safetensors --relative-path models/clip",
+        "comfy --skip-prompt model download --url https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/ae.safetensors --relative-path models/vae",
+        "comfy --skip-prompt model download --url https://huggingface.co/black-forest-labs/FLUX.1-schnell/resolve/main/flux1-schnell.safetensors --relative-path models/unet",
     )
     .run_commands(  # download a custom node
         "comfy node install image-resize-comfyui"
@@ -84,7 +87,7 @@
     concurrency_limit=1,
     container_idle_timeout=30,
     timeout=1800,
-    gpu="any",
+    gpu="A10G",
 )
 @modal.web_server(8000, startup_timeout=60)
 def ui():
@@ -104,19 +107,13 @@ def ui():
 # For more on how to run web services on Modal, check out [this guide](https://modal.com/docs/guide/webhooks).
 @app.cls(
     allow_concurrent_inputs=10,
-    concurrency_limit=1,
     container_idle_timeout=300,
-    gpu="any",
+    gpu="A10G",
     mounts=[
         modal.Mount.from_local_file(
             Path(__file__).parent / "workflow_api.json",
             "/root/workflow_api.json",
         ),
-        # mount input images
-        modal.Mount.from_local_file(
-            Path(__file__).parent / "yosemite_inpaint_example.png",
-            "/root/comfy/ComfyUI/input/yosemite_inpaint_example.png",
-        ),
     ],
 )
 class ComfyUI:
@@ -128,7 +125,7 @@ def launch_comfy_background(self):
     @modal.method()
     def infer(self, workflow_path: str = "/root/workflow_api.json"):
         # runs the comfy run --workflow command as a subprocess
-        cmd = f"comfy run --workflow {workflow_path} --wait"
+        cmd = f"comfy run --workflow {workflow_path} --wait --timeout 1200"
         subprocess.run(cmd, shell=True, check=True)
 
         # completed workflows write output images to this directory
@@ -155,11 +152,11 @@ def api(self, item: Dict):
         )
 
         # insert the prompt
-        workflow_data["3"]["inputs"]["text"] = item["prompt"]
+        workflow_data["6"]["inputs"]["text"] = item["prompt"]
 
         # give the output image a unique id per client request
         client_id = uuid.uuid4().hex
-        workflow_data["11"]["inputs"]["filename_prefix"] = client_id
+        workflow_data["9"]["inputs"]["filename_prefix"] = client_id
 
         # save this updated workflow to a new file
         new_workflow_file = f"{client_id}.json"
@@ -187,7 +184,6 @@ def api(self, item: Dict):
 # Then, redeploy the app with this new workflow by running `modal deploy 06_gpu_and_ml/comfyui/comfyapp.py` again.
 #
 # ## Further optimizations
-#
+# - To decrease inference latency, you can process multiple inputs in parallel by setting `allow_concurrent_inputs=1`, which will run each input on its own container. This will reduce overall response time, but will cost you more money. See our [Scaling ComfyUI](https://modal.com/blog/scaling-comfyui) blog post for more details.
 # - If you're noticing long startup times for the ComfyUI server (e.g. >30s), this is likely due to too many custom nodes being loaded in. Consider breaking out your deployments into one App per unique combination of models and custom nodes.
-# - To reduce image build time, you can write custom code to cache previous model and custom node downloads into a Modal [Volume](https://modal.com/docs/guide/volumes) to avoid full downloads on image rebuilds. (see [gist](https://gist.github.com/kning/bb5f076e831266d00e134fcb3a13ed88)).
 # - For those who prefer to run a ComfyUI workflow directly as a Python script, see [this blog post](https://modal.com/blog/comfyui-prototype-to-production).
diff --git a/06_gpu_and_ml/comfyui/comfyclient.py b/06_gpu_and_ml/comfyui/comfyclient.py
@@ -18,7 +18,6 @@ def main(args: argparse.Namespace):
     url = f"https://{args.modal_workspace}--example-comfyui-comfyui-api{'-dev' if args.dev else ''}.modal.run/"
     data = {
         "prompt": args.prompt,
-        "input_image_url": "https://raw.githubusercontent.com/comfyanonymous/ComfyUI_examples/master/inpaint/yosemite_inpaint_example.png",
     }
     print(f"Sending request to {url} with prompt: {data['prompt']}")
     print("Waiting for response...")
@@ -58,12 +57,7 @@ def parse_args(arglist: list[str]) -> argparse.Namespace:
         action="store_true",
         help="use this flag when running the ComfyUI server in development mode with `modal serve`",
     )
-    parser.add_argument(
-        "--input_image-url",
-        default="https://github.com/comfyanonymous/ComfyUI_examples/blob/abcc12912ca11f2f7a36b3a36a4b7651db907459/inpaint/yosemite_inpaint_example.png",
-        type=str,
-        help="URL of the image to inpaint",
-    )
+
     return parser.parse_args(arglist[1:])
 
 

diff --git a/06_gpu_and_ml/comfyui/comfyui_gen_image.jpg b/06_gpu_and_ml/comfyui/comfyui_gen_image.jpg
diff --git a/06_gpu_and_ml/comfyui/flux_gen_image.jpeg b/06_gpu_and_ml/comfyui/flux_gen_image.jpeg
diff --git a/06_gpu_and_ml/comfyui/workflow_api.json b/06_gpu_and_ml/comfyui/workflow_api.json
@@ -1,107 +1,152 @@
 {
-  "1": {
+  "5": {
     "inputs": {
-      "image": "yosemite_inpaint_example.png",
-      "upload": "image"
+      "width": 1024,
+      "height": 1024,
+      "batch_size": 1
     },
-    "class_type": "LoadImage",
+    "class_type": "EmptyLatentImage",
     "_meta": {
-      "title": "Load Image"
+      "title": "Empty Latent Image"
     }
   },
-  "2": {
+  "6": {
     "inputs": {
-      "ckpt_name": "512-inpainting-ema.safetensors"
+      "text": "Surreal dreamscape with floating islands, upside-down waterfalls, and impossible geometric structures, all bathed in a soft, ethereal light",
+      "clip": ["11", 0]
     },
-    "class_type": "CheckpointLoaderSimple",
+    "class_type": "CLIPTextEncode",
     "_meta": {
-      "title": "Load Checkpoint"
+      "title": "CLIP Text Encode (Prompt)"
     }
   },
-  "3": {
+  "8": {
     "inputs": {
-      "text": "closeup photograph of a white heron in the yosemite national park mountains nature",
-      "clip": ["2", 1]
+      "samples": ["13", 0],
+      "vae": ["10", 0]
     },
-    "class_type": "CLIPTextEncode",
+    "class_type": "VAEDecode",
     "_meta": {
-      "title": "CLIP Text Encode (Prompt)"
+      "title": "VAE Decode"
     }
   },
-  "5": {
+  "9": {
     "inputs": {
-      "text": "watermark, text",
-      "clip": ["2", 1]
+      "filename_prefix": "ComfyUI",
+      "images": ["26", 0]
     },
-    "class_type": "CLIPTextEncode",
+    "class_type": "SaveImage",
     "_meta": {
-      "title": "CLIP Text Encode (Prompt)"
+      "title": "Save Image"
     }
   },
-  "6": {
+  "10": {
     "inputs": {
-      "seed": 655430128391471,
-      "steps": 20,
-      "cfg": 8,
-      "sampler_name": "uni_pc_bh2",
-      "scheduler": "normal",
-      "denoise": 1,
-      "model": ["2", 0],
-      "positive": ["3", 0],
-      "negative": ["5", 0],
-      "latent_image": ["9", 0]
+      "vae_name": "ae.safetensors"
     },
-    "class_type": "KSampler",
+    "class_type": "VAELoader",
     "_meta": {
-      "title": "KSampler"
+      "title": "Load VAE"
     }
   },
-  "7": {
+  "11": {
     "inputs": {
-      "samples": ["6", 0],
-      "vae": ["2", 2]
+      "clip_name1": "t5xxl_fp8_e4m3fn.safetensors",
+      "clip_name2": "clip_l.safetensors",
+      "type": "flux"
     },
-    "class_type": "VAEDecode",
+    "class_type": "DualCLIPLoader",
     "_meta": {
-      "title": "VAE Decode"
+      "title": "DualCLIPLoader"
     }
   },
-  "9": {
+  "12": {
     "inputs": {
-      "grow_mask_by": 6,
-      "pixels": ["1", 0],
-      "vae": ["2", 2],
-      "mask": ["1", 1]
+      "unet_name": "flux1-schnell.safetensors",
+      "weight_dtype": "fp8_e4m3fn"
     },
-    "class_type": "VAEEncodeForInpaint",
+    "class_type": "UNETLoader",
     "_meta": {
-      "title": "VAE Encode (for Inpainting)"
+      "title": "Load Diffusion Model"
     }
   },
-  "11": {
+  "13": {
     "inputs": {
-      "filename_prefix": "ComfyUI",
-      "images": ["7", 0]
+      "noise": ["25", 0],
+      "guider": ["22", 0],
+      "sampler": ["16", 0],
+      "sigmas": ["17", 0],
+      "latent_image": ["5", 0]
     },
-    "class_type": "SaveImage",
+    "class_type": "SamplerCustomAdvanced",
     "_meta": {
-      "title": "Save Image"
+      "title": "SamplerCustomAdvanced"
     }
   },
-  "12": {
+  "16": {
     "inputs": {
-      "action": "resize only",
+      "sampler_name": "euler"
+    },
+    "class_type": "KSamplerSelect",
+    "_meta": {
+      "title": "KSamplerSelect"
+    }
+  },
+  "17": {
+    "inputs": {
+      "scheduler": "simple",
+      "steps": 4,
+      "denoise": 1,
+      "model": ["12", 0]
+    },
+    "class_type": "BasicScheduler",
+    "_meta": {
+      "title": "BasicScheduler"
+    }
+  },
+  "22": {
+    "inputs": {
+      "model": ["12", 0],
+      "conditioning": ["6", 0]
+    },
+    "class_type": "BasicGuider",
+    "_meta": {
+      "title": "BasicGuider"
+    }
+  },
+  "25": {
+    "inputs": {
+      "noise_seed": 857914953840122
+    },
+    "class_type": "RandomNoise",
+    "_meta": {
+      "title": "RandomNoise"
+    }
+  },
+  "26": {
+    "inputs": {
+      "action": "crop to ratio",
       "smaller_side": 0,
       "larger_side": 0,
-      "scale_factor": 0,
-      "resize_mode": "reduce size only",
-      "side_ratio": "4:3",
+      "scale_factor": 2,
+      "resize_mode": "any",
+      "side_ratio": "1:1",
       "crop_pad_position": 0.5,
-      "pad_feathering": 20
+      "pad_feathering": 20,
+      "pixels": ["8", 0]
     },
     "class_type": "ImageResize",
     "_meta": {
       "title": "Image Resize"
     }
+  },
+  "27": {
+    "inputs": {
+      "images": ["8", 0]
+    },
+    "class_type": "PreviewImage",
+    "_meta": {
+      "title": "Preview Image"
+    }
   }
 }