Skip to content

Commit

Permalink
Merge branch 'main' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
ekzhang authored Jan 26, 2024
2 parents 82873ad + e6204fb commit 775b3c8
Show file tree
Hide file tree
Showing 18 changed files with 50 additions and 29 deletions.
1 change: 1 addition & 0 deletions .github/pull_request_template.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

- [ ] Example is testable in synthetic monitoring system, or `lambda-test: false` is added to example frontmatter
- [ ] Example does _not_ require third-party dependencies to be installed locally
- [ ] Example pins all dependencies and specifies a `python_version` for the base image
- [ ] Example is documented with comments throughout, in a [_Literate Programming_](https://en.wikipedia.org/wiki/Literate_programming) style.

## Outside contributors
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
python-version: "3.11"

- name: Install black
run: pip install black
run: pip install black==23.11.0

- name: Black
run: black --check .
Expand Down
4 changes: 3 additions & 1 deletion 06_gpu_and_ml/embeddings/wikipedia/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@

# We define our Modal Resources that we'll need
volume = Volume.persisted("embedding-wikipedia")
image = Image.debian_slim().pip_install("datasets", "apache_beam")
image = Image.debian_slim(python_version="3.9").pip_install(
"datasets==2.16.1", "apache_beam==2.53.0"
)
stub = Stub(image=image)


Expand Down
42 changes: 24 additions & 18 deletions 06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,11 @@ def __call__(
# Distributed training:
# The .from_pretrained methods guarantee that only one local process can concurrently
config = AutoConfig.from_pretrained(
model_args.config_name
if model_args.config_name
else model_args.model_name_or_path,
(
model_args.config_name
if model_args.config_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=os.environ["HF_TOKEN"],
Expand All @@ -224,17 +226,21 @@ def __call__(
config.update({"apply_spec_augment": model_args.apply_spec_augment})

feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.feature_extractor_name
if model_args.feature_extractor_name
else model_args.model_name_or_path,
(
model_args.feature_extractor_name
if model_args.feature_extractor_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name
if model_args.tokenizer_name
else model_args.model_name_or_path,
(
model_args.tokenizer_name
if model_args.tokenizer_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
Expand Down Expand Up @@ -404,17 +410,17 @@ def compute_metrics(pred):
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
train_dataset=vectorized_datasets["train"]
if training_args.do_train
else None,
eval_dataset=vectorized_datasets["eval"]
if training_args.do_eval
else None,
train_dataset=(
vectorized_datasets["train"] if training_args.do_train else None
),
eval_dataset=(
vectorized_datasets["eval"] if training_args.do_eval else None
),
tokenizer=feature_extractor,
data_collator=data_collator,
compute_metrics=compute_metrics
if training_args.predict_with_generate
else None,
compute_metrics=(
compute_metrics if training_args.predict_with_generate else None
),
)

logger.info("12. Running training")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
before testing that the partially trained model can be serialized, saved to
persistent storage, and then downloaded locally for inference.
"""

import pathlib

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
whisper-pod-transcriber uses OpenAI's Whisper modal to do speech-to-text transcription
of podcasts.
"""

import dataclasses
import datetime
import json
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Contains only definitions of Modal objects, to be imported
from other modules.
"""

import modal

image = modal.Image.debian_slim(python_version="3.10").pip_install(
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Module for the fetching, pre-processing, and loading of spam classification datasets.
Currently only provides access to the ENRON email dataset.
"""

import csv
import json
import pathlib
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
The CLI commands are operationally useful, used to inspect prior trained models and promote the
most promising models to production serving.
"""

import json
from typing import Callable, NamedTuple, Optional

Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/model_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
The model storage module contains functions for the serialization, and
disk-based storage of the email spam models defined within models.py.
"""

import datetime
import hashlib
import io
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* LLM (a fine-tuned BERT language classifier)
* NaiveBayes
"""

import json
import math
import pathlib
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/serving.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Defines a serverless web API to expose trained models
"""

from typing import Optional

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
This code is partly based on code from github.com/Sanster/lama-cleaner/.
"""

import io

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
eg. python -m text_to_pokemon.ops reset-diskcache
"""

import argparse
import io
import json
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Our AI-generated Pokémon characters need their own names!
"""

import dataclasses
import json
import time
Expand Down
12 changes: 6 additions & 6 deletions 10_integrations/covid_datasette.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def load_report(filepath):
)
yield {
"day": f"{yyyy}-{mm}-{dd}",
"country_or_region": country_or_region.strip()
if country_or_region
else None,
"province_or_state": province_or_state.strip()
if province_or_state
else None,
"country_or_region": (
country_or_region.strip() if country_or_region else None
),
"province_or_state": (
province_or_state.strip() if province_or_state else None
),
"confirmed": int(float(row["Confirmed"] or 0)),
"deaths": int(float(row["Deaths"] or 0)),
"recovered": int(float(row["Recovered"] or 0)),
Expand Down
1 change: 1 addition & 0 deletions internal/typecheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
MyPy type-checking script.
Unvalidated, incorrect type-hints are worse than no type-hints!
"""

import pathlib
import subprocess
import sys
Expand Down
6 changes: 3 additions & 3 deletions misc/news_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ def latest_science_stories(n_stories: int = 5) -> List[NYArticle]:
articles = [
NYArticle(
title=u["title"],
image_url=u.get("multimedia")[0]["url"]
if u.get("multimedia")
else "",
image_url=(
u.get("multimedia")[0]["url"] if u.get("multimedia") else ""
),
url=u.get("url"),
)
for u in results["results"]
Expand Down

0 comments on commit 775b3c8

Please sign in to comment.