Example simplification using @build (#507)

* Example simplification using __build__ * merge __build__ and __enter__ * add back type annotation that disappeared * rewrite SDXL * Rewrite it to use decorators * Update SDXL Turbo * Rewrite alpaca-lora * embeddings/instructor.py – use @build * Update webcam example
modal-labs · Jan 7, 2024 · 650c56f · 650c56f
1 parent 33591ea
commit 650c56f
Show file tree

Hide file tree

Showing 6 changed files with 110 additions and 154 deletions.
diff --git a/06_gpu_and_ml/alpaca/alpaca_lora.py b/06_gpu_and_ml/alpaca/alpaca_lora.py
@@ -1,25 +1,13 @@
 import sys
 
-from modal import Image, Stub, method
+from modal import Image, Stub, build, enter, method
 
 # Define a function for downloading the models, that will run once on image build.
 # This allows the weights to be present inside the image for faster startup.
 
 base_model = "luodian/llama-7b-hf"
 lora_weights = "tloen/alpaca-lora-7b"
 
-
-def download_models():
-    from peft import PeftModel
-    from transformers import LlamaForCausalLM, LlamaTokenizer
-
-    model = LlamaForCausalLM.from_pretrained(
-        base_model,
-    )
-    PeftModel.from_pretrained(model, lora_weights)
-    LlamaTokenizer.from_pretrained(base_model)
-
-
 # Alpaca-LoRA is distributed as a public Github repository and the repository is not
 # installable by `pip`, so instead we install the repository by cloning it into our Modal
 # image.
@@ -28,8 +16,7 @@ def download_models():
 repo_url = "https://github.com/tloen/alpaca-lora"
 commit_hash = "fcbc45e4c0db8948743bd1227b46a796c1effcd0"
 image = (
-    Image.debian_slim()
-    .apt_install("git")
+    Image.debian_slim().apt_install("git")
     # Here we place the latest repository code into /root.
     # Because /root is almost empty, but not entirely empty, `git clone` won't work,
     # so this `init` then `checkout` workaround is used.
@@ -58,8 +45,14 @@ def download_models():
         "torchvision~=0.16",
         "sentencepiece==0.1.99",
     )
-    .run_function(download_models)
 )
+
+with image.imports():
+    import torch
+    from generate import generate_prompt
+    from peft import PeftModel
+    from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer
+
 stub = Stub(name="example-alpaca-lora", image=image)
 
 # The Alpaca-LoRA model is integrated into model as a Python class with an __enter__
@@ -73,16 +66,21 @@ def download_models():
 
 @stub.cls(gpu="A10G")
 class AlpacaLoRAModel:
-    def __enter__(self):
+    @build()
+    def download_models(self):
+        model = LlamaForCausalLM.from_pretrained(
+            base_model,
+        )
+        PeftModel.from_pretrained(model, lora_weights)
+        LlamaTokenizer.from_pretrained(base_model)
+
+    @enter()
+    def enter(self):
         """
         Container-lifeycle method for model setup. Code is taken from
         https://github.com/tloen/alpaca-lora/blob/main/generate.py and minor
         modifications are made to support usage in a Python class.
         """
-        import torch
-        from peft import PeftModel
-        from transformers import LlamaForCausalLM, LlamaTokenizer
-
         load_8bit = False
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
@@ -146,10 +144,6 @@ def evaluate(
         max_new_tokens=128,
         **kwargs,
     ):
-        import torch
-        from generate import generate_prompt
-        from transformers import GenerationConfig
-
         prompt = generate_prompt(instruction, input)
         inputs = self.tokenizer(prompt, return_tensors="pt")
         input_ids = inputs["input_ids"].to(self.device)

diff --git a/06_gpu_and_ml/embeddings/instructor.py b/06_gpu_and_ml/embeddings/instructor.py
@@ -1,15 +1,8 @@
-from modal import Image, Stub, method
+from modal import Image, Stub, build, enter, method
 
 MODEL_DIR = "/model"
 
 
-def download_model():
-    from InstructorEmbedding import INSTRUCTOR
-
-    model = INSTRUCTOR("hkunlp/instructor-large")
-    model.save(MODEL_DIR)
-
-
 image = (
     Image.debian_slim(python_version="3.10")
     .apt_install("git")
@@ -19,17 +12,23 @@ def download_model():
         "cd instructor-embedding && pip install -r requirements.txt",
     )
     .pip_install("InstructorEmbedding")
-    .run_function(download_model)
 )
 
 stub = Stub("instructor", image=image)
 
+with image.imports():
+    from InstructorEmbedding import INSTRUCTOR
+
 
 @stub.cls(gpu="any")
 class InstructorModel:
-    def __enter__(self):
-        from InstructorEmbedding import INSTRUCTOR
+    @build()
+    def download_model(self):
+        model = INSTRUCTOR("hkunlp/instructor-large")
+        model.save(MODEL_DIR)
 
+    @enter()
+    def enter(self):
         self.model = INSTRUCTOR(MODEL_DIR, device="cuda")
 
     @method()

diff --git a/06_gpu_and_ml/obj_detection_webcam/webcam.py b/06_gpu_and_ml/obj_detection_webcam/webcam.py
@@ -34,32 +34,20 @@
 
 from fastapi import FastAPI, Request, Response
 from fastapi.staticfiles import StaticFiles
-from modal import (
-    Image,
-    Mount,
-    Stub,
-    asgi_app,
-    method,
-)
+from modal import Image, Mount, Stub, asgi_app, build, method
 
 # We need to install [transformers](https://github.com/huggingface/transformers)
 # which is a package Huggingface uses for all their models, but also
 # [Pillow](https://python-pillow.org/) which lets us work with images from Python,
 # and a system font for drawing.
 #
 # This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded
-# once at image build time using the `download_model` function and saved into the image.
-# 'Baking' models into the `modal.Image` at build time provided the fastest cold start.
+# once at image build time using the `@build` hook and saved into the image. 'Baking'
+# models into the `modal.Image` at build time provided the fastest cold start.
 
 model_repo_id = "facebook/detr-resnet-50"
 
 
-def download_model():
-    from huggingface_hub import snapshot_download
-
-    snapshot_download(repo_id=model_repo_id, cache_dir="/cache")
-
-
 stub = Stub("example-webcam-object-detection")
 image = (
     Image.debian_slim()
@@ -70,7 +58,6 @@ def download_model():
         "transformers",
     )
     .apt_install("fonts-freefont-ttf")
-    .run_function(download_model)
 )
 
 
@@ -92,14 +79,23 @@ def download_model():
 # web interface can render it on top of the webcam view.
 
 
+with image.imports():
+    import torch
+    from huggingface_hub import snapshot_download
+    from PIL import Image, ImageColor, ImageDraw, ImageFont
+    from transformers import DetrForObjectDetection, DetrImageProcessor
+
+
 @stub.cls(
     cpu=4,
     image=image,
 )
 class ObjectDetection:
-    def __enter__(self):
-        from transformers import DetrForObjectDetection, DetrImageProcessor
+    @build()
+    def download_model(self):
+        snapshot_download(repo_id=model_repo_id, cache_dir="/cache")
 
+    def __enter__(self):
         self.feature_extractor = DetrImageProcessor.from_pretrained(
             model_repo_id,
             cache_dir="/cache",
@@ -112,14 +108,10 @@ def __enter__(self):
     @method()
     def detect(self, img_data_in):
         # Based on https://huggingface.co/spaces/nateraw/detr-object-detection/blob/main/app.py
-        from PIL import Image, ImageColor, ImageDraw, ImageFont
-
         # Read png from input
         image = Image.open(io.BytesIO(img_data_in)).convert("RGB")
 
         # Make prediction
-        import torch
-
         inputs = self.feature_extractor(image, return_tensors="pt")
         outputs = self.model(**inputs)
         img_size = torch.tensor([tuple(reversed(image.size))])

diff --git a/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py b/06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
@@ -37,7 +37,7 @@
 import time
 from pathlib import Path
 
-from modal import Image, Stub, method
+from modal import Image, Stub, build, enter, method
 
 # All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
 # the application. Let's give it a friendly name.
@@ -53,31 +53,6 @@
 # already inside the image.
 
 model_id = "runwayml/stable-diffusion-v1-5"
-cache_path = "/vol/cache"
-
-
-def download_models():
-    import diffusers
-    import torch
-
-    # Download scheduler configuration. Experiment with different schedulers
-    # to identify one that works best for your use-case.
-    scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
-        model_id,
-        subfolder="scheduler",
-        cache_dir=cache_path,
-    )
-    scheduler.save_pretrained(cache_path, safe_serialization=True)
-
-    # Downloads all other models.
-    pipe = diffusers.StableDiffusionPipeline.from_pretrained(
-        model_id,
-        revision="fp16",
-        torch_dtype=torch.float16,
-        cache_dir=cache_path,
-    )
-    pipe.save_pretrained(cache_path, safe_serialization=True)
-
 
 image = (
     Image.debian_slim(python_version="3.10")
@@ -95,9 +70,12 @@ def download_models():
         find_links="https://download.pytorch.org/whl/torch_stable.html",
     )
     .pip_install("xformers", pre=True)
-    .run_function(download_models)
 )
-stub.image = image
+
+with image.imports():
+    import diffusers
+    import torch
+
 
 # ## Using container lifecycle methods
 #
@@ -118,16 +96,13 @@ def download_models():
 # It sends the PIL image back to our CLI where we save the resulting image in a local file.
 
 
-@stub.cls(gpu="A10G")
+@stub.cls(image=image, gpu="A10G")
 class StableDiffusion:
-    def __enter__(self):
-        import diffusers
-        import torch
-
-        torch.backends.cuda.matmul.allow_tf32 = True
-
+    @build()
+    @enter()
+    def initialize(self):
         scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
-            cache_path,
+            model_id,
             subfolder="scheduler",
             solver_order=2,
             prediction_type="epsilon",
@@ -139,7 +114,7 @@ def __enter__(self):
             device_map="auto",
         )
         self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
-            cache_path,
+            model_id,
             scheduler=scheduler,
             low_cpu_mem_usage=True,
             device_map="auto",
@@ -150,8 +125,6 @@ def __enter__(self):
     def run_inference(
         self, prompt: str, steps: int = 20, batch_size: int = 4
     ) -> list[bytes]:
-        import torch
-
         with torch.inference_mode():
             with torch.autocast("cuda"):
                 images = self.pipe(