Merge branch 'main' into patch-1

modal-labs · Jan 26, 2024 · 775b3c8 · 775b3c8
2 parents 82873ad + e6204fb
commit 775b3c8
Show file tree

Hide file tree

Showing 18 changed files with 50 additions and 29 deletions.
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -12,6 +12,7 @@
 
 - [ ] Example is testable in synthetic monitoring system, or `lambda-test: false` is added to example frontmatter
 - [ ] Example does _not_ require third-party dependencies to be installed locally
+- [ ] Example pins all dependencies and specifies a `python_version` for the base image
 - [ ] Example is documented with comments throughout, in a [_Literate Programming_](https://en.wikipedia.org/wiki/Literate_programming) style.
 
 ## Outside contributors

diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
@@ -18,7 +18,7 @@ jobs:
           python-version: "3.11"
 
       - name: Install black
-        run: pip install black
+        run: pip install black==23.11.0
 
       - name: Black
         run: black --check .

diff --git a/06_gpu_and_ml/embeddings/wikipedia/download.py b/06_gpu_and_ml/embeddings/wikipedia/download.py
@@ -8,7 +8,9 @@
 
 # We define our Modal Resources that we'll need
 volume = Volume.persisted("embedding-wikipedia")
-image = Image.debian_slim().pip_install("datasets", "apache_beam")
+image = Image.debian_slim(python_version="3.9").pip_install(
+    "datasets==2.16.1", "apache_beam==2.53.0"
+)
 stub = Stub(image=image)
 
 

diff --git a/06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py b/06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py
@@ -206,9 +206,11 @@ def __call__(
     # Distributed training:
     # The .from_pretrained methods guarantee that only one local process can concurrently
     config = AutoConfig.from_pretrained(
-        model_args.config_name
-        if model_args.config_name
-        else model_args.model_name_or_path,
+        (
+            model_args.config_name
+            if model_args.config_name
+            else model_args.model_name_or_path
+        ),
         cache_dir=model_args.cache_dir,
         revision=model_args.model_revision,
         use_auth_token=os.environ["HF_TOKEN"],
@@ -224,17 +226,21 @@ def __call__(
     config.update({"apply_spec_augment": model_args.apply_spec_augment})
 
     feature_extractor = AutoFeatureExtractor.from_pretrained(
-        model_args.feature_extractor_name
-        if model_args.feature_extractor_name
-        else model_args.model_name_or_path,
+        (
+            model_args.feature_extractor_name
+            if model_args.feature_extractor_name
+            else model_args.model_name_or_path
+        ),
         cache_dir=model_args.cache_dir,
         revision=model_args.model_revision,
         use_auth_token=True if model_args.use_auth_token else None,
     )
     tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name
-        if model_args.tokenizer_name
-        else model_args.model_name_or_path,
+        (
+            model_args.tokenizer_name
+            if model_args.tokenizer_name
+            else model_args.model_name_or_path
+        ),
         cache_dir=model_args.cache_dir,
         use_fast=model_args.use_fast_tokenizer,
         revision=model_args.model_revision,
@@ -404,17 +410,17 @@ def compute_metrics(pred):
     trainer = Seq2SeqTrainer(
         model=model,
         args=training_args,
-        train_dataset=vectorized_datasets["train"]
-        if training_args.do_train
-        else None,
-        eval_dataset=vectorized_datasets["eval"]
-        if training_args.do_eval
-        else None,
+        train_dataset=(
+            vectorized_datasets["train"] if training_args.do_train else None
+        ),
+        eval_dataset=(
+            vectorized_datasets["eval"] if training_args.do_eval else None
+        ),
         tokenizer=feature_extractor,
         data_collator=data_collator,
-        compute_metrics=compute_metrics
-        if training_args.predict_with_generate
-        else None,
+        compute_metrics=(
+            compute_metrics if training_args.predict_with_generate else None
+        ),
     )
 
     logger.info("12. Running training")

diff --git a/06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py b/06_gpu_and_ml/openai_whisper/finetuning/train/end_to_end_check.py
@@ -6,6 +6,7 @@
 before testing that the partially trained model can be serialized, saved to
 persistent storage, and then downloaded locally for inference.
 """
+
 import pathlib
 
 import modal

diff --git a/06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py b/06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py
@@ -2,6 +2,7 @@
 whisper-pod-transcriber uses OpenAI's Whisper modal to do speech-to-text transcription
 of podcasts.
 """
+
 import dataclasses
 import datetime
 import json

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/app.py b/06_gpu_and_ml/spam-detect/spam_detect/app.py
@@ -2,6 +2,7 @@
 Contains only definitions of Modal objects, to be imported
 from other modules.
 """
+
 import modal
 
 image = modal.Image.debian_slim(python_version="3.10").pip_install(

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/dataset.py b/06_gpu_and_ml/spam-detect/spam_detect/dataset.py
@@ -2,6 +2,7 @@
 Module for the fetching, pre-processing, and loading of spam classification datasets.
 Currently only provides access to the ENRON email dataset.
 """
+
 import csv
 import json
 import pathlib

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/model_registry.py b/06_gpu_and_ml/spam-detect/spam_detect/model_registry.py
@@ -3,6 +3,7 @@
 The CLI commands are operationally useful, used to inspect prior trained models and promote the
 most promising models to production serving.
 """
+
 import json
 from typing import Callable, NamedTuple, Optional
 

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/model_storage.py b/06_gpu_and_ml/spam-detect/spam_detect/model_storage.py
@@ -2,6 +2,7 @@
 The model storage module contains functions for the serialization, and
 disk-based storage of the email spam models defined within models.py.
 """
+
 import datetime
 import hashlib
 import io

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/models.py b/06_gpu_and_ml/spam-detect/spam_detect/models.py
@@ -9,6 +9,7 @@
 * LLM (a fine-tuned BERT language classifier)
 * NaiveBayes
 """
+
 import json
 import math
 import pathlib

diff --git a/06_gpu_and_ml/spam-detect/spam_detect/serving.py b/06_gpu_and_ml/spam-detect/spam_detect/serving.py
@@ -1,6 +1,7 @@
 """
 Defines a serverless web API to expose trained models
 """
+
 from typing import Optional
 
 import modal

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
@@ -6,6 +6,7 @@
 
 This code is partly based on code from github.com/Sanster/lama-cleaner/.
 """
+
 import io
 
 import modal

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
@@ -4,6 +4,7 @@
 
 eg. python -m text_to_pokemon.ops reset-diskcache
 """
+
 import argparse
 import io
 import json

diff --git a/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/pokemon_naming.py b/06_gpu_and_ml/text-to-pokemon/text_to_pokemon/pokemon_naming.py
@@ -1,6 +1,7 @@
 """
 Our AI-generated Pokémon characters need their own names!
 """
+
 import dataclasses
 import json
 import time

diff --git a/10_integrations/covid_datasette.py b/10_integrations/covid_datasette.py
@@ -124,12 +124,12 @@ def load_report(filepath):
             )
             yield {
                 "day": f"{yyyy}-{mm}-{dd}",
-                "country_or_region": country_or_region.strip()
-                if country_or_region
-                else None,
-                "province_or_state": province_or_state.strip()
-                if province_or_state
-                else None,
+                "country_or_region": (
+                    country_or_region.strip() if country_or_region else None
+                ),
+                "province_or_state": (
+                    province_or_state.strip() if province_or_state else None
+                ),
                 "confirmed": int(float(row["Confirmed"] or 0)),
                 "deaths": int(float(row["Deaths"] or 0)),
                 "recovered": int(float(row["Recovered"] or 0)),

diff --git a/internal/typecheck.py b/internal/typecheck.py
@@ -2,6 +2,7 @@
 MyPy type-checking script.
 Unvalidated, incorrect type-hints are worse than no type-hints!
 """
+
 import pathlib
 import subprocess
 import sys

diff --git a/misc/news_summarizer.py b/misc/news_summarizer.py
@@ -101,9 +101,9 @@ def latest_science_stories(n_stories: int = 5) -> List[NYArticle]:
     articles = [
         NYArticle(
             title=u["title"],
-            image_url=u.get("multimedia")[0]["url"]
-            if u.get("multimedia")
-            else "",
+            image_url=(
+                u.get("multimedia")[0]["url"] if u.get("multimedia") else ""
+            ),
             url=u.get("url"),
         )
         for u in results["results"]
-Original file line number
+Diff line change
@@ Expand Up / @@ -6,6 +6,7 @@ @@
     This code is partly based on code from github.com/Sanster/lama-cleaner/.
     """
     import io
     import modal
@@ Expand Down @@