Skip to content

Commit

Permalink
Fix failing synthetic monitors (#535)
Browse files Browse the repository at this point in the history
* Fix algolia_indexer synthetic monitor

* Fix db_to_sheet

* Fix dbt_duckdb

* Remove dbt_sqlite

This depends on meltano, an example that is currently not tested,
and it also looks up an NFS.

* Fix mini_dalle_slackbot

* Fix news_summarizer

* Fix dreambooth_app

* Fix instructor

* Fix webscraper

* Fix a bunch of "huggingface" secrets

* Fix db_to_sheet

* Revert changes to environment_name

* Remove unused import for lints

* Fix TGI synmon token

* Fix TEI and TGI-Mixtral

- TEI: Issue with HuggingFace secrets again
- TGI-Mixtral: huggingface/text-generation-inference#1342
  • Loading branch information
ekzhang authored Jan 3, 2024
1 parent 702cc03 commit 8db92de
Show file tree
Hide file tree
Showing 20 changed files with 51 additions and 228 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ class AppConfig:
gpu="A100", # finetuning is VRAM hungry, so this should be an A100
volumes=VOLUME_CONFIG,
timeout=3600 * 2, # multiple hours
secrets=[Secret.from_name("huggingface")],
secrets=[Secret.from_name("huggingface-secret")],
)
# ## Define the training function
# Now, finally, we define the training function itself. This training function does a bunch of preparatory things, but the core of it is the `_exec_subprocess` call to `accelerate launch` that launches the actual Diffusers training script. Depending on which Diffusers script you are using, you will want to modify the script name, and the arguments that are passed to it.
Expand Down
45 changes: 15 additions & 30 deletions 06_gpu_and_ml/dreambooth/dreambooth_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from modal import (
Image,
Mount,
Secret,
Stub,
Volume,
asgi_app,
Expand Down Expand Up @@ -77,7 +76,7 @@
# This is crucial as finetuning runs are separate from the Gradio app we run as a webhook.

volume = Volume.persisted("dreambooth-finetuning-volume")
MODEL_DIR = Path("/model")
MODEL_DIR = "/model"

# ## Config
#
Expand Down Expand Up @@ -138,21 +137,21 @@ class AppConfig(SharedConfig):
# So we can fetch just a few images, stored on consumer platforms like Imgur or Google Drive
# -- no need for expensive data collection or data engineering.

IMG_PATH = Path("/img")


def load_images(image_urls):
def load_images(image_urls: list[str]) -> Path:
import PIL.Image
from smart_open import open

os.makedirs(IMG_PATH, exist_ok=True)
img_path = Path("/img")

img_path.mkdir(parents=True, exist_ok=True)
for ii, url in enumerate(image_urls):
with open(url, "rb") as f:
image = PIL.Image.open(f)
image.save(IMG_PATH / f"{ii}.png")
image.save(img_path / f"{ii}.png")
print("Images loaded.")

return IMG_PATH
return img_path


# ## Finetuning a text-to-image model
Expand All @@ -173,34 +172,24 @@ def load_images(image_urls):
#
# The model weights, libraries, and training script are all provided by [🤗 Hugging Face](https://huggingface.co).
#
# To access the model weights, you'll need a [Hugging Face account](https://huggingface.co/join)
# and from that account you'll need to accept the model license [here](https://huggingface.co/runwayml/stable-diffusion-v1-5).
#
# Lastly, you'll need to create a token from that account and share it with Modal
# under the name `"huggingface"`. Follow the instructions [here](https://modal.com/secrets).
#
# Then, you can kick off a training job with the command
# `modal run dreambooth_app.py::stub.train`.
# You can kick off a training job with the command `modal run dreambooth_app.py::stub.train`.
# It should take about ten minutes.
#
# Tip: if the results you're seeing don't match the prompt too well, and instead produce an image of your subject again, the model has likely overfit. In this case, repeat training with a lower # of max_train_steps. On the other hand, if the results don't look like your subject, you might need to increase # of max_train_steps.
# Tip: if the results you're seeing don't match the prompt too well, and instead produce an image
# of your subject again, the model has likely overfit. In this case, repeat training with a lower
# value of `max_train_steps`. On the other hand, if the results don't look like your subject, you
# might need to increase `max_train_steps`.


@stub.function(
image=image,
gpu="A100", # finetuning is VRAM hungry, so this should be an A100
volumes={
str(
MODEL_DIR
): volume, # fine-tuned model will be stored at `MODEL_DIR`
},
gpu="A100", # fine-tuning is VRAM-heavy and requires an A100 GPU
volumes={MODEL_DIR: volume}, # stores fine-tuned model
timeout=1800, # 30 minutes
secrets=[Secret.from_name("huggingface")],
)
def train(instance_example_urls):
import subprocess

import huggingface_hub
from accelerate.utils import write_basic_config
from transformers import CLIPTokenizer

Expand All @@ -214,10 +203,6 @@ def train(instance_example_urls):
# set up hugging face accelerate library for fast training
write_basic_config(mixed_precision="fp16")

# authenticate to hugging face so we can download the model weights
hf_key = os.environ["HUGGINGFACE_TOKEN"]
huggingface_hub.login(hf_key)

# check whether we can access to model repo
try:
CLIPTokenizer.from_pretrained(config.model_name, subfolder="tokenizer")
Expand Down Expand Up @@ -283,7 +268,7 @@ def _exec_subprocess(cmd: list[str]):
@stub.cls(
image=image,
gpu="A100",
volumes={str(MODEL_DIR): volume},
volumes={MODEL_DIR: volume},
)
class Model:
def __enter__(self):
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/embeddings/instructor.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def compare(self, sentences_a, sentences_b):
embeddings_a = self.model.encode(sentences_a)
embeddings_b = self.model.encode(sentences_b)
similarities = cosine_similarity(embeddings_a, embeddings_b)
return similarities
return similarities.tolist()


@stub.local_entrypoint()
Expand Down
10 changes: 8 additions & 2 deletions 06_gpu_and_ml/embeddings/text_embeddings_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@


def spawn_server() -> subprocess.Popen:
process = subprocess.Popen(["text-embeddings-router"] + LAUNCH_FLAGS)
process = subprocess.Popen(
["text-embeddings-router"] + LAUNCH_FLAGS,
env={
**os.environ,
"HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
},
)

# Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
while True:
Expand Down Expand Up @@ -74,7 +80,7 @@ def download_model():


@stub.cls(
secret=Secret.from_name("huggingface"),
secret=Secret.from_name("huggingface-secret"),
gpu=GPU_CONFIG,
image=tei_image,
# Use up to 20 GPU containers at once.
Expand Down
24 changes: 15 additions & 9 deletions 06_gpu_and_ml/text_generation_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#
# First we import the components we need from `modal`.

import os
import subprocess
from pathlib import Path

from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
Expand Down Expand Up @@ -49,8 +51,6 @@


def download_model():
import subprocess

subprocess.run(
[
"text-generation-server",
Expand All @@ -59,6 +59,10 @@ def download_model():
"--revision",
REVISION,
],
env={
**os.environ,
"HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
},
check=True,
)

Expand All @@ -69,21 +73,20 @@ def download_model():
#
# Next we run the download step to pre-populate the image with our model weights.
#
# For this step to work on a gated model such as LLaMA 2, the HUGGING_FACE_HUB_TOKEN environment
# For this step to work on a gated model such as LLaMA 2, the HUGGINGFACE_TOKEN environment
# variable must be set ([reference](https://github.com/huggingface/text-generation-inference#using-a-private-or-gated-model)).
#
# After [creating a HuggingFace access token](https://huggingface.co/settings/tokens),
# head to the [secrets page](https://modal.com/secrets) to create a Modal secret.
#
# The key should be `HUGGING_FACE_HUB_TOKEN` and the value should be your access token.
#
# Finally, we install the `text-generation` client to interface with TGI's Rust webserver over `localhost`.

stub = Stub("example-tgi-" + MODEL_ID.split("/")[-1])

tgi_image = (
Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.0.3")
.dockerfile_commands("ENTRYPOINT []")
.run_function(download_model, secret=Secret.from_name("huggingface"))
.run_function(download_model, secret=Secret.from_name("huggingface-secret"))
.pip_install("text-generation")
)

Expand All @@ -109,7 +112,7 @@ def download_model():


@stub.cls(
secret=Secret.from_name("huggingface"),
secret=Secret.from_name("huggingface-secret"),
gpu=GPU_CONFIG,
allow_concurrent_inputs=10,
container_idle_timeout=60 * 10,
Expand All @@ -119,13 +122,16 @@ def download_model():
class Model:
def __enter__(self):
import socket
import subprocess
import time

from text_generation import AsyncClient

self.launcher = subprocess.Popen(
["text-generation-launcher"] + LAUNCH_FLAGS
["text-generation-launcher"] + LAUNCH_FLAGS,
env={
**os.environ,
"HUGGING_FACE_HUB_TOKEN": os.environ["HUGGINGFACE_TOKEN"],
},
)
self.client = AsyncClient("http://127.0.0.1:8000", timeout=60)
self.template = """<s>[INST] <<SYS>>
Expand Down
2 changes: 1 addition & 1 deletion 06_gpu_and_ml/tgi_mixtral.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def download_model():
# Finally, we install the `text-generation` client to interface with TGI's Rust webserver over `localhost`.

tgi_image = (
Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.3.1")
Image.from_registry("ghcr.io/huggingface/text-generation-inference:1.3.3")
.dockerfile_commands("ENTRYPOINT []")
.run_function(download_model, timeout=60 * 20)
.pip_install("text-generation")
Expand Down
4 changes: 2 additions & 2 deletions 06_gpu_and_ml/vllm_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def download_model_to_folder():
.env({"HF_HUB_ENABLE_HF_TRANSFER": "1"})
.run_function(
download_model_to_folder,
secret=Secret.from_name("huggingface"),
secret=Secret.from_name("huggingface-secret"),
timeout=60 * 20,
)
)
Expand All @@ -82,7 +82,7 @@ def download_model_to_folder():
# on the GPU for each subsequent invocation of the function.
#
# The `vLLM` library allows the code to remain quite clean.
@stub.cls(gpu="A100", secret=Secret.from_name("huggingface"))
@stub.cls(gpu="A100", secret=Secret.from_name("huggingface-secret"))
class Model:
def __enter__(self):
from vllm import LLM
Expand Down
3 changes: 2 additions & 1 deletion 10_integrations/algolia_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@


@stub.function(
image=algolia_image, secrets=[Secret.from_name("algolia-secret")]
image=algolia_image,
secrets=[Secret.from_name("algolia-secret")],
)
def crawl():
# Installed with a 3.6 venv; Python 3.6 is unsupported by Modal, so use a subprocess instead.
Expand Down
92 changes: 0 additions & 92 deletions 10_integrations/dbt/dbt_sqlite.py

This file was deleted.

4 changes: 0 additions & 4 deletions 10_integrations/dbt/sample_proj_sqlite/.gitignore

This file was deleted.

Empty file.
26 changes: 0 additions & 26 deletions 10_integrations/dbt/sample_proj_sqlite/dbt_project.yml

This file was deleted.

Empty file.
Loading

0 comments on commit 8db92de

Please sign in to comment.