Skip to content

Commit

Permalink
Fix CI and pin black version
Browse files Browse the repository at this point in the history
  • Loading branch information
ekzhang committed Jan 26, 2024
1 parent 4e5d48a commit 3bf8d63
Show file tree
Hide file tree
Showing 16 changed files with 46 additions and 28 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
python-version: "3.11"

- name: Install black
run: pip install black
run: pip install black==23.11.0

- name: Black
run: black --check .
Expand Down
42 changes: 24 additions & 18 deletions 06_gpu_and_ml/openai_whisper/finetuning/train/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,9 +206,11 @@ def __call__(
# Distributed training:
# The .from_pretrained methods guarantee that only one local process can concurrently
config = AutoConfig.from_pretrained(
model_args.config_name
if model_args.config_name
else model_args.model_name_or_path,
(
model_args.config_name
if model_args.config_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=os.environ["HF_TOKEN"],
Expand All @@ -224,17 +226,21 @@ def __call__(
config.update({"apply_spec_augment": model_args.apply_spec_augment})

feature_extractor = AutoFeatureExtractor.from_pretrained(
model_args.feature_extractor_name
if model_args.feature_extractor_name
else model_args.model_name_or_path,
(
model_args.feature_extractor_name
if model_args.feature_extractor_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name
if model_args.tokenizer_name
else model_args.model_name_or_path,
(
model_args.tokenizer_name
if model_args.tokenizer_name
else model_args.model_name_or_path
),
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
Expand Down Expand Up @@ -404,17 +410,17 @@ def compute_metrics(pred):
trainer = Seq2SeqTrainer(
model=model,
args=training_args,
train_dataset=vectorized_datasets["train"]
if training_args.do_train
else None,
eval_dataset=vectorized_datasets["eval"]
if training_args.do_eval
else None,
train_dataset=(
vectorized_datasets["train"] if training_args.do_train else None
),
eval_dataset=(
vectorized_datasets["eval"] if training_args.do_eval else None
),
tokenizer=feature_extractor,
data_collator=data_collator,
compute_metrics=compute_metrics
if training_args.predict_with_generate
else None,
compute_metrics=(
compute_metrics if training_args.predict_with_generate else None
),
)

logger.info("12. Running training")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
before testing that the partially trained model can be serialized, saved to
persistent storage, and then downloaded locally for inference.
"""

import pathlib

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/openai_whisper/pod_transcriber/app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
whisper-pod-transcriber uses OpenAI's Whisper modal to do speech-to-text transcription
of podcasts.
"""

import dataclasses
import datetime
import json
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Contains only definitions of Modal objects, to be imported
from other modules.
"""

import modal

image = modal.Image.debian_slim(python_version="3.10").pip_install(
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Module for the fetching, pre-processing, and loading of spam classification datasets.
Currently only provides access to the ENRON email dataset.
"""

import csv
import json
import pathlib
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
The CLI commands are operationally useful, used to inspect prior trained models and promote the
most promising models to production serving.
"""

import json
from typing import Callable, NamedTuple, Optional

Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/model_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
The model storage module contains functions for the serialization, and
disk-based storage of the email spam models defined within models.py.
"""

import datetime
import hashlib
import io
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* LLM (a fine-tuned BERT language classifier)
* NaiveBayes
"""

import json
import math
import pathlib
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/spam-detect/spam_detect/serving.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Defines a serverless web API to expose trained models
"""

from typing import Optional

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
This code is partly based on code from github.com/Sanster/lama-cleaner/.
"""

import io

import modal
Expand Down
1 change: 1 addition & 0 deletions 06_gpu_and_ml/text-to-pokemon/text_to_pokemon/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
eg. python -m text_to_pokemon.ops reset-diskcache
"""

import argparse
import io
import json
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Our AI-generated Pokémon characters need their own names!
"""

import dataclasses
import json
import time
Expand Down
12 changes: 6 additions & 6 deletions 10_integrations/covid_datasette.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,12 @@ def load_report(filepath):
)
yield {
"day": f"{yyyy}-{mm}-{dd}",
"country_or_region": country_or_region.strip()
if country_or_region
else None,
"province_or_state": province_or_state.strip()
if province_or_state
else None,
"country_or_region": (
country_or_region.strip() if country_or_region else None
),
"province_or_state": (
province_or_state.strip() if province_or_state else None
),
"confirmed": int(float(row["Confirmed"] or 0)),
"deaths": int(float(row["Deaths"] or 0)),
"recovered": int(float(row["Recovered"] or 0)),
Expand Down
1 change: 1 addition & 0 deletions internal/typecheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
MyPy type-checking script.
Unvalidated, incorrect type-hints are worse than no type-hints!
"""

import pathlib
import subprocess
import sys
Expand Down
6 changes: 3 additions & 3 deletions misc/news_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,9 @@ def latest_science_stories(n_stories: int = 5) -> List[NYArticle]:
articles = [
NYArticle(
title=u["title"],
image_url=u.get("multimedia")[0]["url"]
if u.get("multimedia")
else "",
image_url=(
u.get("multimedia")[0]["url"] if u.get("multimedia") else ""
),
url=u.get("url"),
)
for u in results["results"]
Expand Down

0 comments on commit 3bf8d63

Please sign in to comment.