From 2300cc1f9eee423c89f6263ac29b84ac90690464 Mon Sep 17 00:00:00 2001
From: Charles Frye <charles@modal.com>
Date: Thu, 24 Oct 2024 08:30:41 -0700
Subject: [PATCH] boost num_inference_steps for better outputs (#949)

* boost num_inference_steps for better outputs

* make inference steps configurable from CLI

* ke-babs not s_nakes
---
 06_gpu_and_ml/text-to-video/mochi.py | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/06_gpu_and_ml/text-to-video/mochi.py b/06_gpu_and_ml/text-to-video/mochi.py
index 46a297676..a143afff9 100644
--- a/06_gpu_and_ml/text-to-video/mochi.py
+++ b/06_gpu_and_ml/text-to-video/mochi.py
@@ -1,5 +1,5 @@
 # ---
-# cmd: ["modal", "run", "--detach", "06_gpu_and_ml/text-to-video/mochi.py"]
+# cmd: ["modal", "run", "--detach", "06_gpu_and_ml/text-to-video/mochi.py", "--num-inference-steps", "64"]
 # ---
 
 # # Generate videos from text prompts with Mochi
@@ -165,14 +165,21 @@ def download_model(
 
 
 @app.local_entrypoint()
-def main(prompt: str = "A cat playing drums in a jazz ensemble"):
+def main(
+    prompt: str = "A cat playing drums in a jazz ensemble",
+    num_inference_steps: int = 200,
+):
     from pathlib import Path
 
     mochi = Mochi()
     local_dir = Path("/tmp/moshi")
     local_dir.mkdir(exist_ok=True, parents=True)
     download_model.remote()
-    remote_path = Path(mochi.generate_video.remote(prompt=prompt))
+    remote_path = Path(
+        mochi.generate_video.remote(
+            prompt=prompt, num_inference_steps=num_inference_steps
+        )
+    )
     local_path = local_dir / remote_path.name
     local_path.write_bytes(b"".join(outputs.read_file(remote_path.name)))
     print("🍡 video saved locally at", local_path)
@@ -250,7 +257,7 @@ def generate_video(
         num_frames=163,
         seed=12345,
         cfg_scale=4.5,
-        num_inference_steps=64,
+        num_inference_steps=200,
     ):
         # credit: https://github.com/genmoai/models/blob/7c7d33c49d53bbf939fd6676610e949f3008b5a8/src/mochi_preview/infer.py#L63