Skip to content

Commit

Permalink
draft CI
Browse files Browse the repository at this point in the history
  • Loading branch information
ydshieh committed Feb 7, 2024
1 parent 1e6ea0f commit a9f6aa8
Show file tree
Hide file tree
Showing 8 changed files with 273 additions and 0 deletions.
28 changes: 28 additions & 0 deletions .github/workflows/test_model.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: Model Test

on:
push:
branches:
- draft_ci

env:
HF_HOME: /mnt/cache
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access.
# This token is created under the bot `hf-transformers-bot`.
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }}

jobs:
github_repo:
name: github repo
runs-on: [single-gpu, nvidia-gpu, a10, ci]
container:
# TODO: make this $ {{ inputs.image }} to use GCP's DLC images
image: huggingface/transformers-all-latest-gpu
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: run tests
run: |
python3 -m pytest -v tests/models
Empty file added tests/models/__init__.py
Empty file.
Empty file added tests/models/llama/__init__.py
Empty file.
39 changes: 39 additions & 0 deletions tests/models/llama/test_inference_llama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os
import unittest
import torch

from transformers import AutoModelForCausalLM, AutoTokenizer
from ..test_model import ModelInferenceTestMixin

torch.backends.cudnn.deterministic = True
torch.backends.cuda.matmul.allow_tf32 = False
torch.backends.cudnn.allow_tf32 = False

device = "cuda"


class LLaMaInferenceTest(ModelInferenceTestMixin, unittest.TestCase):

def test_inference(self):
ckpt = "meta-llama/Llama-2-7b-hf"
hf_token = token=os.getenv("HF_HUB_READ_TOKEN", None)

tokenizer = AutoTokenizer.from_pretrained(ckpt, token=hf_token)

prompt = "Hey, are you conscious? Can you talk to me?"
inputs = tokenizer(prompt, return_tensors="pt").to(device)

model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, token=hf_token)
model.to(device)

# To make generation's sample deterministic
torch.manual_seed(1)

# Generate
with torch.no_grad():
generate_ids = model.generate(inputs.input_ids, max_length=30)
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

expected_output = "Hey, are you conscious? Can you talk to me?\nI'm not sure if you can hear me, but I'm talking"

assert output == expected_output
18 changes: 18 additions & 0 deletions tests/models/llama/test_train_llama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import unittest

from ..test_model import ModelTrainingTestMixin, TestCasePlus


class LLamaModelTrainingTest(ModelTrainingTestMixin, TestCasePlus):

# WIP
def get_training_script(self):
raise NotImplementedError

@unittest.skip("WIP")
def prepare_training_command(self, **kwargs):
output_dir = kwargs.get("output_dir", "my_dir")

testargs = f""""""

return testargs
138 changes: 138 additions & 0 deletions tests/models/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import json
import os
import shutil
import sys
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from transformers.utils import is_accelerate_available
from transformers.testing_utils import TestCasePlus


if is_accelerate_available():
from accelerate.state import AcceleratorState, PartialState


class ModelTrainingTestMixin:

def get_training_script(self):
raise NotImplementedError

def prepare_training_command(self, **kwargs):
raise NotImplementedError

def get_results(self, output_dir):
path = os.path.join(output_dir, "all_results.json")
if os.path.exists(path):
with open(path, "r") as f:
results = json.load(f)
else:
raise ValueError(f"can't find {path}")
return results

def test_training(self):

tmp_dir = self.get_auto_remove_tmp_dir()
testargs = self.prepare_training_command(output_dir=tmp_dir).split()

with patch.object(sys, "argv", testargs):
self.get_training_script().main()
result = self.get_results(tmp_dir)
self.assertGreaterEqual(result["eval_accuracy"], 0.8)


class ModelInferenceTestMixin:
def test_inference(self):
raise NotImplementedError


# Copied from https://github.com/huggingface/transformers/blob/308d2b90049b4979a949a069aa4f43b2788254d6/src/transformers/testing_utils.py#L1335 # noqa
# (with minimal set of methods and their contents)
class TestCasePlus(unittest.TestCase):

def setUp(self):
# get_auto_remove_tmp_dir feature:
self.teardown_tmp_dirs = []

def get_auto_remove_tmp_dir(self, tmp_dir=None, before=None, after=None):
"""
Args:
tmp_dir (`string`, *optional*):
if `None`:
- a unique temporary path will be created
- sets `before=True` if `before` is `None`
- sets `after=True` if `after` is `None`
else:
- `tmp_dir` will be created
- sets `before=True` if `before` is `None`
- sets `after=False` if `after` is `None`
before (`bool`, *optional*):
If `True` and the `tmp_dir` already exists, make sure to empty it right away if `False` and the
`tmp_dir` already exists, any existing files will remain there.
after (`bool`, *optional*):
If `True`, delete the `tmp_dir` at the end of the test if `False`, leave the `tmp_dir` and its contents
intact at the end of the test.
Returns:
tmp_dir(`string`): either the same value as passed via *tmp_dir* or the path to the auto-selected tmp dir
"""
if tmp_dir is not None:
# defining the most likely desired behavior for when a custom path is provided.
# this most likely indicates the debug mode where we want an easily locatable dir that:
# 1. gets cleared out before the test (if it already exists)
# 2. is left intact after the test
if before is None:
before = True
if after is None:
after = False

# using provided path
path = Path(tmp_dir).resolve()

# to avoid nuking parts of the filesystem, only relative paths are allowed
if not tmp_dir.startswith("./"):
raise ValueError(
f"`tmp_dir` can only be a relative path, i.e. `./some/path`, but received `{tmp_dir}`"
)

# ensure the dir is empty to start with
if before is True and path.exists():
shutil.rmtree(tmp_dir, ignore_errors=True)

path.mkdir(parents=True, exist_ok=True)

else:
# defining the most likely desired behavior for when a unique tmp path is auto generated
# (not a debug mode), here we require a unique tmp dir that:
# 1. is empty before the test (it will be empty in this situation anyway)
# 2. gets fully removed after the test
if before is None:
before = True
if after is None:
after = True

# using unique tmp dir (always empty, regardless of `before`)
tmp_dir = tempfile.mkdtemp()

if after is True:
# register for deletion
self.teardown_tmp_dirs.append(tmp_dir)

return tmp_dir

def tearDown(self):
# get_auto_remove_tmp_dir feature: remove registered temp dirs
for path in self.teardown_tmp_dirs:
shutil.rmtree(path, ignore_errors=True)
self.teardown_tmp_dirs = []
if is_accelerate_available():
AcceleratorState._reset_state()
PartialState._reset_state()

# delete all the env variables having `ACCELERATE` in them
for k in list(os.environ.keys()):
if "ACCELERATE" in k:
del os.environ[k]
Empty file added tests/models/vit/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions tests/models/vit/test_training_vit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import os
import sys

from ..test_model import ModelTrainingTestMixin, TestCasePlus


# So far, the image we use has a `/transformers` directory.
# TODO: Find a way to get the installed `transformers` directory.
SRC_DIRS = [
os.path.join("/transformers/examples/pytorch", dirname)
for dirname in [
"image-classification",
]
]
sys.path.extend(SRC_DIRS)


if SRC_DIRS is not None:
import run_image_classification


class ViTTrainingTest(ModelTrainingTestMixin, TestCasePlus):

def get_training_script(self):
return run_image_classification

def prepare_training_command(self, **kwargs):
output_dir = kwargs.get("output_dir", "my_dir")

testargs = f"""
run_image_classification.py
--output_dir {output_dir}
--model_name_or_path google/vit-base-patch16-224-in21k
--dataset_name hf-internal-testing/cats_vs_dogs_sample
--do_train
--do_eval
--learning_rate 1e-4
--per_device_train_batch_size 2
--per_device_eval_batch_size 1
--remove_unused_columns False
--overwrite_output_dir True
--dataloader_num_workers 16
--metric_for_best_model accuracy
--max_steps 10
--train_val_split 0.1
--seed 42
--label_column_name labels
"""

return testargs

0 comments on commit a9f6aa8

Please sign in to comment.