Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Example simplification using @build #507

Merged
merged 9 commits into from
Jan 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 19 additions & 25 deletions 06_gpu_and_ml/alpaca/alpaca_lora.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,13 @@
import sys

from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

# Define a function for downloading the models, that will run once on image build.
# This allows the weights to be present inside the image for faster startup.

base_model = "luodian/llama-7b-hf"
lora_weights = "tloen/alpaca-lora-7b"


def download_models():
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer

model = LlamaForCausalLM.from_pretrained(
base_model,
)
PeftModel.from_pretrained(model, lora_weights)
LlamaTokenizer.from_pretrained(base_model)


# Alpaca-LoRA is distributed as a public Github repository and the repository is not
# installable by `pip`, so instead we install the repository by cloning it into our Modal
# image.
Expand All @@ -28,8 +16,7 @@ def download_models():
repo_url = "https://github.com/tloen/alpaca-lora"
commit_hash = "fcbc45e4c0db8948743bd1227b46a796c1effcd0"
image = (
Image.debian_slim()
.apt_install("git")
Image.debian_slim().apt_install("git")
# Here we place the latest repository code into /root.
# Because /root is almost empty, but not entirely empty, `git clone` won't work,
# so this `init` then `checkout` workaround is used.
Expand Down Expand Up @@ -58,8 +45,14 @@ def download_models():
"torchvision~=0.16",
"sentencepiece==0.1.99",
)
.run_function(download_models)
)

with image.imports():
import torch
from generate import generate_prompt
from peft import PeftModel
from transformers import GenerationConfig, LlamaForCausalLM, LlamaTokenizer

stub = Stub(name="example-alpaca-lora", image=image)

# The Alpaca-LoRA model is integrated into model as a Python class with an __enter__
Expand All @@ -73,16 +66,21 @@ def download_models():

@stub.cls(gpu="A10G")
class AlpacaLoRAModel:
def __enter__(self):
@build()
def download_models(self):
model = LlamaForCausalLM.from_pretrained(
base_model,
)
PeftModel.from_pretrained(model, lora_weights)
LlamaTokenizer.from_pretrained(base_model)

@enter()
def enter(self):
"""
Container-lifeycle method for model setup. Code is taken from
https://github.com/tloen/alpaca-lora/blob/main/generate.py and minor
modifications are made to support usage in a Python class.
"""
import torch
from peft import PeftModel
from transformers import LlamaForCausalLM, LlamaTokenizer

load_8bit = False
device = "cuda" if torch.cuda.is_available() else "cpu"

Expand Down Expand Up @@ -146,10 +144,6 @@ def evaluate(
max_new_tokens=128,
**kwargs,
):
import torch
from generate import generate_prompt
from transformers import GenerationConfig

prompt = generate_prompt(instruction, input)
inputs = self.tokenizer(prompt, return_tensors="pt")
input_ids = inputs["input_ids"].to(self.device)
Expand Down
21 changes: 10 additions & 11 deletions 06_gpu_and_ml/embeddings/instructor.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,8 @@
from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

MODEL_DIR = "/model"


def download_model():
from InstructorEmbedding import INSTRUCTOR

model = INSTRUCTOR("hkunlp/instructor-large")
model.save(MODEL_DIR)


image = (
Image.debian_slim(python_version="3.10")
.apt_install("git")
Expand All @@ -19,17 +12,23 @@ def download_model():
"cd instructor-embedding && pip install -r requirements.txt",
)
.pip_install("InstructorEmbedding")
.run_function(download_model)
)

stub = Stub("instructor", image=image)

with image.imports():
from InstructorEmbedding import INSTRUCTOR


@stub.cls(gpu="any")
class InstructorModel:
def __enter__(self):
from InstructorEmbedding import INSTRUCTOR
@build()
def download_model(self):
model = INSTRUCTOR("hkunlp/instructor-large")
model.save(MODEL_DIR)

@enter()
def enter(self):
self.model = INSTRUCTOR(MODEL_DIR, device="cuda")

@method()
Expand Down
36 changes: 14 additions & 22 deletions 06_gpu_and_ml/obj_detection_webcam/webcam.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,32 +34,20 @@

from fastapi import FastAPI, Request, Response
from fastapi.staticfiles import StaticFiles
from modal import (
Image,
Mount,
Stub,
asgi_app,
method,
)
from modal import Image, Mount, Stub, asgi_app, build, method

# We need to install [transformers](https://github.com/huggingface/transformers)
# which is a package Huggingface uses for all their models, but also
# [Pillow](https://python-pillow.org/) which lets us work with images from Python,
# and a system font for drawing.
#
# This example uses the `facebook/detr-resnet-50` pre-trained model, which is downloaded
# once at image build time using the `download_model` function and saved into the image.
# 'Baking' models into the `modal.Image` at build time provided the fastest cold start.
# once at image build time using the `@build` hook and saved into the image. 'Baking'
# models into the `modal.Image` at build time provided the fastest cold start.

model_repo_id = "facebook/detr-resnet-50"


def download_model():
from huggingface_hub import snapshot_download

snapshot_download(repo_id=model_repo_id, cache_dir="/cache")


stub = Stub("example-webcam-object-detection")
image = (
Image.debian_slim()
Expand All @@ -70,7 +58,6 @@ def download_model():
"transformers",
)
.apt_install("fonts-freefont-ttf")
.run_function(download_model)
)


Expand All @@ -92,14 +79,23 @@ def download_model():
# web interface can render it on top of the webcam view.


with image.imports():
import torch
from huggingface_hub import snapshot_download
from PIL import Image, ImageColor, ImageDraw, ImageFont
from transformers import DetrForObjectDetection, DetrImageProcessor


@stub.cls(
cpu=4,
image=image,
)
class ObjectDetection:
def __enter__(self):
from transformers import DetrForObjectDetection, DetrImageProcessor
@build()
def download_model(self):
snapshot_download(repo_id=model_repo_id, cache_dir="/cache")

def __enter__(self):
self.feature_extractor = DetrImageProcessor.from_pretrained(
model_repo_id,
cache_dir="/cache",
Expand All @@ -112,14 +108,10 @@ def __enter__(self):
@method()
def detect(self, img_data_in):
# Based on https://huggingface.co/spaces/nateraw/detr-object-detection/blob/main/app.py
from PIL import Image, ImageColor, ImageDraw, ImageFont

# Read png from input
image = Image.open(io.BytesIO(img_data_in)).convert("RGB")

# Make prediction
import torch

inputs = self.feature_extractor(image, return_tensors="pt")
outputs = self.model(**inputs)
img_size = torch.tensor([tuple(reversed(image.size))])
Expand Down
51 changes: 12 additions & 39 deletions 06_gpu_and_ml/stable_diffusion/stable_diffusion_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import time
from pathlib import Path

from modal import Image, Stub, method
from modal import Image, Stub, build, enter, method

# All Modal programs need a [`Stub`](/docs/reference/modal.Stub) — an object that acts as a recipe for
# the application. Let's give it a friendly name.
Expand All @@ -53,31 +53,6 @@
# already inside the image.

model_id = "runwayml/stable-diffusion-v1-5"
cache_path = "/vol/cache"


def download_models():
import diffusers
import torch

# Download scheduler configuration. Experiment with different schedulers
# to identify one that works best for your use-case.
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
model_id,
subfolder="scheduler",
cache_dir=cache_path,
)
scheduler.save_pretrained(cache_path, safe_serialization=True)

# Downloads all other models.
pipe = diffusers.StableDiffusionPipeline.from_pretrained(
model_id,
revision="fp16",
torch_dtype=torch.float16,
cache_dir=cache_path,
)
pipe.save_pretrained(cache_path, safe_serialization=True)


image = (
Image.debian_slim(python_version="3.10")
Expand All @@ -95,9 +70,12 @@ def download_models():
find_links="https://download.pytorch.org/whl/torch_stable.html",
)
.pip_install("xformers", pre=True)
.run_function(download_models)
)
stub.image = image

with image.imports():
import diffusers
import torch


# ## Using container lifecycle methods
#
Expand All @@ -118,16 +96,13 @@ def download_models():
# It sends the PIL image back to our CLI where we save the resulting image in a local file.


@stub.cls(gpu="A10G")
@stub.cls(image=image, gpu="A10G")
class StableDiffusion:
def __enter__(self):
import diffusers
import torch

torch.backends.cuda.matmul.allow_tf32 = True

@build()
@enter()
def initialize(self):
scheduler = diffusers.DPMSolverMultistepScheduler.from_pretrained(
cache_path,
model_id,
subfolder="scheduler",
solver_order=2,
prediction_type="epsilon",
Expand All @@ -139,7 +114,7 @@ def __enter__(self):
device_map="auto",
)
self.pipe = diffusers.StableDiffusionPipeline.from_pretrained(
cache_path,
model_id,
scheduler=scheduler,
low_cpu_mem_usage=True,
device_map="auto",
Expand All @@ -150,8 +125,6 @@ def __enter__(self):
def run_inference(
self, prompt: str, steps: int = 20, batch_size: int = 4
) -> list[bytes]:
import torch

with torch.inference_mode():
with torch.autocast("cuda"):
images = self.pipe(
Expand Down
Loading