Skip to content

Commit

Permalink
Example simplification using @build (#507)
Browse files Browse the repository at this point in the history
* Example simplification using __build__

* merge __build__ and __enter__

* add back type annotation that disappeared

* rewrite SDXL

* Rewrite it to use decorators

* Update SDXL Turbo

* Rewrite alpaca-lora

* embeddings/instructor.py – use @build

* Update webcam example
  • Loading branch information
erikbern authored Jan 7, 2024
1 parent 33591ea commit 650c56f
Show file tree
Hide file tree
Showing 6 changed files with 110 additions and 154 deletions.
44 changes: 19 additions & 25 deletions 06_gpu_and_ml/alpaca/alpaca_lora.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
import sys

from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

# Define a function for downloading the models, that will run once on image build.
# This allows the weights to be present inside the image for faster startup.

base_model = "luodian/llama-7b-hf"
lora_weights = "tloen/alpaca-lora-7b"


def download_models():
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer

model = LlamaForCausalLM.from_pretrained(
base_model,
)
PeftModel.from_pretrained(model, lora_weights)
LlamaTokenizer.from_pretrained(base_model)


# Alpaca-LoRA is distributed as a public Github repository and the repository is not
# installable by `pip`, so instead we install the repository by cloning it into our Modal
# image.
Expand All @@ -28,8 +16,7 @@ def download_models():
repo_url = "https://github.com/tloen/alpaca-lora"
commit_hash = "fcbc45e4c0db8948743bd1227b46a796c1effcd0"
image = (
Image.debian_slim()
.apt_install("git")
Image.debian_slim().apt_install("git")
# Here we place the latest repository code into /root.
# Because /root is almost empty, but not entirely empty, `git clone` won't work,
# so this `init` then `checkout` workaround is used.
Expand Down Expand Up @@ -58,8 +45,14 @@ def download_models():
"torchvision~=0.16",
"sentencepiece==0.1.99",
)
.run_function(download_models)
)

with image.imports():
import torch
from generate import generate_prompt
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer

stub = Stub(name="example-alpaca-lora", image=image)

# The Alpaca-LoRA model is integrated into model as a Python class with an __enter__
Expand All @@ -73,16 +66,21 @@ def download_models():

@stub.cls(gpu="A10G")
class AlpacaLoRAModel:
def __enter__(self):
@build()
def download_models(self):
model = LlamaForCausalLM.from_pretrained(
base_model,
)
PeftModel.from_pretrained(model, lora_weights)
LlamaTokenizer.from_pretrained(base_model)

@enter()
def enter(self):
"""
Container-lifeycle method for model setup. Code is taken from
https://github.com/tloen/alpaca-lora/blob/main/generate.py and minor
modifications are made to support usage in a Python class.
"""
import torch
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer

load_8bit = False
device = "cuda" if torch.cuda.is_available() else "cpu"

Expand Down Expand Up @@ -146,10 +144,6 @@ def evaluate(
max_new_tokens=128,
**kwargs,
):
import torch
from generate import generate_prompt
from transformers import GenerationConfig

prompt = generate_prompt(instruction, input)
inputs = self.tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(self.device)
Expand Down
21 changes: 10 additions & 11 deletions 06_gpu_and_ml/embeddings/instructor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

MODEL_DIR = "/model"


def download_model():
from InstructorEmbedding import INSTRUCTOR

model = INSTRUCTOR("hkunlp/instructor-large")
model.save(MODEL_DIR)


image = (
Image.debian_slim(python_version="3.10")
.apt_install("git")
Expand All @@ -19,17 +12,23 @@ def download_model():
"cd instructor-embedding && pip install -r requirements.txt",
)
.pip_install("InstructorEmbedding")
.run_function(download_model)
)

stub = Stub("instructor", image=image)

with image.imports():
from InstructorEmbedding import INSTRUCTOR


@stub.cls(gpu="any")
class InstructorModel:
def __enter__(self):
from InstructorEmbedding import INSTRUCTOR
@build()
def download_model(self):
model = INSTRUCTOR("hkunlp/instructor-large")
model.save(MODEL_DIR)

@enter()
def enter(self):
self.model = INSTRUCTOR(MODEL_DIR, device="cuda")

@method()
Expand Down
36 changes: 14 additions & 22 deletions 06_gpu_and_ml/obj_detection_webcam/webcam.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,20 @@

from fastapi import FastAPI, Request, Response
from fastapi.staticfiles import StaticFiles
from modal import (
Image,
Mount,
Stub,
asgi_app,
method,
)
from modal import Image, Mount, Stub, asgi_app, build, method

# We need to install [transformers](https://github.com/huggingface/transformers)
# which is a package Huggingface uses for all their models, but also
# [Pillow](https://python-pillow.org/) which lets us work with images from Python,
# and a system font for drawing.
#
# This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded
# once at image build time using the `download_model` function and saved into the image.
# 'Baking' models into the `modal.Image` at build time provided the fastest cold start.
# once at image build time using the `@build` hook and saved into the image. 'Baking'
# models into the `modal.Image` at build time provided the fastest cold start.

model_repo_id = "facebook/detr-resnet-50"


def download_model():
from huggingface_hub import snapshot_download

snapshot_download(repo_id=model_repo_id, cache_dir="/cache")


stub = Stub("example-webcam-object-detection")
image = (
Image.debian_slim()
Expand All @@ -70,7 +58,6 @@ def download_model():
"transformers",
)
.apt_install("fonts-freefont-ttf")
.run_function(download_model)
)


Expand All @@ -92,14 +79,23 @@ def download_model():
# web interface can render it on top of the webcam view.


with image.imports():
import torch
from huggingface_hub import snapshot_download
from PIL import Image, ImageColor, ImageDraw, ImageFont
from transformers import DetrForObjectDetection, DetrImageProcessor


@stub.cls(
cpu=4,
image=image,
)
class ObjectDetection:
def __enter__(self):
from transformers import DetrForObjectDetection, DetrImageProcessor
@build()
def download_model(self):
snapshot_download(repo_id=model_repo_id, cache_dir="/cache")

def __enter__(self):
self.feature_extractor = DetrImageProcessor.from_pretrained(
model_repo_id,
cache_dir="/cache",
Expand All @@ -112,14 +108,10 @@ def __enter__(self):
@method()
def detect(self, img_data_in):
# Based on https://huggingface.co/spaces/nateraw/detr-object-detection/blob/main/app.py
from PIL import Image, ImageColor, ImageDraw, ImageFont

# Read png from input
image = Image.open(io.BytesIO(img_data_in)).convert("RGB")

# Make prediction
import torch

inputs = self.feature_extractor(image, return_tensors="pt")
outputs = self.model(**inputs)
img_size = torch.tensor([tuple(reversed(image.size))])
Expand Down
51 changes: 12 additions & 39 deletions 06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import time
from pathlib import Path

from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
# the application. Let's give it a friendly name.
Expand All @@ -53,31 +53,6 @@
# already inside the image.

model_id = "runwayml/stable-diffusion-v1-5"
cache_path = "/vol/cache"


def download_models():
import diffusers
import torch

# Download scheduler configuration. Experiment with different schedulers
# to identify one that works best for your use-case.
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
cache_dir=cache_path,
)
scheduler.save_pretrained(cache_path, safe_serialization=True)

# Downloads all other models.
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_id,
revision="fp16",
torch_dtype=torch.float16,
cache_dir=cache_path,
)
pipe.save_pretrained(cache_path, safe_serialization=True)


image = (
Image.debian_slim(python_version="3.10")
Expand All @@ -95,9 +70,12 @@ def download_models():
find_links="https://download.pytorch.org/whl/torch_stable.html",
)
.pip_install("xformers", pre=True)
.run_function(download_models)
)
stub.image = image

with image.imports():
import diffusers
import torch


# ## Using container lifecycle methods
#
Expand All @@ -118,16 +96,13 @@ def download_models():
# It sends the PIL image back to our CLI where we save the resulting image in a local file.


@stub.cls(gpu="A10G")
@stub.cls(image=image, gpu="A10G")
class StableDiffusion:
def __enter__(self):
import diffusers
import torch

torch.backends.cuda.matmul.allow_tf32 = True

@build()
@enter()
def initialize(self):
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
cache_path,
model_id,
subfolder="scheduler",
solver_order=2,
prediction_type="epsilon",
Expand All @@ -139,7 +114,7 @@ def __enter__(self):
device_map="auto",
)
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
cache_path,
model_id,
scheduler=scheduler,
low_cpu_mem_usage=True,
device_map="auto",
Expand All @@ -150,8 +125,6 @@ def __enter__(self):
def run_inference(
self, prompt: str, steps: int = 20, batch_size: int = 4
) -> list[bytes]:
import torch

with torch.inference_mode():
with torch.autocast("cuda"):
images = self.pipe(
Expand Down
Loading

0 comments on commit 650c56f

Please sign in to comment.