Skip to content

Commit

Permalink
Merge branch 'main' into ekzhang/remove-mlc-inference
Browse files Browse the repository at this point in the history
  • Loading branch information
ekzhang authored Jan 23, 2024
2 parents e502ced + 7a6fa3e commit 5ae4c0c
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 4 deletions.
3 changes: 2 additions & 1 deletion 06_gpu_and_ml/embeddings/wikipedia/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from modal import Image, Volume, Stub
from modal import Image, Stub, Volume

# We first set out configuration variables for our script.
DATASET_DIR = "/data"
Expand All @@ -19,6 +19,7 @@
def download_dataset():
# Redownload the dataset
import time

from datasets import load_dataset

start = time.time()
Expand Down
15 changes: 12 additions & 3 deletions 06_gpu_and_ml/embeddings/wikipedia/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import json
import subprocess

from modal import Image, Secret, Stub, Volume, gpu, method

# We first set out configuration variables for our script.
Expand Down Expand Up @@ -60,7 +61,9 @@ def spawn_server() -> subprocess.Popen:
# If so, a connection can never be made.
retcode = process.poll()
if retcode is not None:
raise RuntimeError(f"launcher exited unexpectedly with code {retcode}")
raise RuntimeError(
f"launcher exited unexpectedly with code {retcode}"
)


def download_model():
Expand Down Expand Up @@ -164,6 +167,7 @@ def load_dataset_from_disk(down_scale: float = 0.01):
Dataset: A subset of the training data.
"""
import time

from datasets import load_from_disk

start = time.perf_counter()
Expand Down Expand Up @@ -220,6 +224,7 @@ def upload_result_to_hf(batch_size: int) -> None:
"""
import os
import time

from huggingface_hub import HfApi

path_parent_folder = f"{CHECKPOINT_DIR}/{MODEL_SLUG}-{batch_size}"
Expand Down Expand Up @@ -284,7 +289,9 @@ def embed_dataset(down_scale: float = 1, batch_size: int = 512 * 50):
start = time.perf_counter()
acc_chunks = []
embeddings = []
for resp in model.embed.map(batches, order_outputs=False, return_exceptions=True):
for resp in model.embed.map(
batches, order_outputs=False, return_exceptions=True
):
if isinstance(resp, Exception):
print(f"Exception: {resp}")
continue
Expand Down Expand Up @@ -312,7 +319,9 @@ def embed_dataset(down_scale: float = 1, batch_size: int = 512 * 50):
}

if SAVE_TO_DISK:
save_dataset_to_intermediate_checkpoint(acc_chunks, embeddings, batch_size)
save_dataset_to_intermediate_checkpoint(
acc_chunks, embeddings, batch_size
)

if UPLOAD_TO_HF:
upload_result_to_hf(batch_size)
Expand Down
1 change: 1 addition & 0 deletions 10_integrations/multion_news_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
# To get started, first create an account with [MultiOn](https://app.multion.ai/), install the [MultiOn chrome extension](https://chrome.google.com/webstore/detail/ddmjhdbknfidiopmbaceghhhbgbpenmm) and login to your Twitter account in your browser.
# To use the API create a [MultiOn API Key](https://app.multion.ai/api-keys) and store it as a modal secret on [the dashboard](https://modal.com/secrets)


@stub.function(
image=multion_image, secret=modal.Secret.from_name("MULTION_API_KEY")
)
Expand Down

0 comments on commit 5ae4c0c

Please sign in to comment.