-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
273 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
name: Model Test | ||
|
||
on: | ||
push: | ||
branches: | ||
- draft_ci | ||
|
||
env: | ||
HF_HOME: /mnt/cache | ||
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | ||
# This token is created under the bot `hf-transformers-bot`. | ||
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | ||
|
||
jobs: | ||
github_repo: | ||
name: github repo | ||
runs-on: [single-gpu, nvidia-gpu, a10, ci] | ||
container: | ||
# TODO: make this $ {{ inputs.image }} to use GCP's DLC images | ||
image: huggingface/transformers-all-latest-gpu | ||
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v3 | ||
|
||
- name: run tests | ||
run: | | ||
python3 -m pytest -v tests/models |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import os | ||
import unittest | ||
import torch | ||
|
||
from transformers import AutoModelForCausalLM, AutoTokenizer | ||
from ..test_model import ModelInferenceTestMixin | ||
|
||
torch.backends.cudnn.deterministic = True | ||
torch.backends.cuda.matmul.allow_tf32 = False | ||
torch.backends.cudnn.allow_tf32 = False | ||
|
||
device = "cuda" | ||
|
||
|
||
class LLaMaInferenceTest(ModelInferenceTestMixin, unittest.TestCase): | ||
|
||
def test_inference(self): | ||
ckpt = "meta-llama/Llama-2-7b-hf" | ||
hf_token = token=os.getenv("HF_HUB_READ_TOKEN", None) | ||
|
||
tokenizer = AutoTokenizer.from_pretrained(ckpt, token=hf_token) | ||
|
||
prompt = "Hey, are you conscious? Can you talk to me?" | ||
inputs = tokenizer(prompt, return_tensors="pt").to(device) | ||
|
||
model = AutoModelForCausalLM.from_pretrained(ckpt, torch_dtype=torch.float16, token=hf_token) | ||
model.to(device) | ||
|
||
# To make generation's sample deterministic | ||
torch.manual_seed(1) | ||
|
||
# Generate | ||
with torch.no_grad(): | ||
generate_ids = model.generate(inputs.input_ids, max_length=30) | ||
output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] | ||
|
||
expected_output = "Hey, are you conscious? Can you talk to me?\nI'm not sure if you can hear me, but I'm talking" | ||
|
||
assert output == expected_output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import unittest | ||
|
||
from ..test_model import ModelTrainingTestMixin, TestCasePlus | ||
|
||
|
||
class LLamaModelTrainingTest(ModelTrainingTestMixin, TestCasePlus): | ||
|
||
# WIP | ||
def get_training_script(self): | ||
raise NotImplementedError | ||
|
||
@unittest.skip("WIP") | ||
def prepare_training_command(self, **kwargs): | ||
output_dir = kwargs.get("output_dir", "my_dir") | ||
|
||
testargs = f"""""" | ||
|
||
return testargs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
import json | ||
import os | ||
import shutil | ||
import sys | ||
import tempfile | ||
import unittest | ||
from pathlib import Path | ||
from unittest.mock import patch | ||
from transformers.utils import is_accelerate_available | ||
from transformers.testing_utils import TestCasePlus | ||
|
||
|
||
if is_accelerate_available(): | ||
from accelerate.state import AcceleratorState, PartialState | ||
|
||
|
||
class ModelTrainingTestMixin: | ||
|
||
def get_training_script(self): | ||
raise NotImplementedError | ||
|
||
def prepare_training_command(self, **kwargs): | ||
raise NotImplementedError | ||
|
||
def get_results(self, output_dir): | ||
path = os.path.join(output_dir, "all_results.json") | ||
if os.path.exists(path): | ||
with open(path, "r") as f: | ||
results = json.load(f) | ||
else: | ||
raise ValueError(f"can't find {path}") | ||
return results | ||
|
||
def test_training(self): | ||
|
||
tmp_dir = self.get_auto_remove_tmp_dir() | ||
testargs = self.prepare_training_command(output_dir=tmp_dir).split() | ||
|
||
with patch.object(sys, "argv", testargs): | ||
self.get_training_script().main() | ||
result = self.get_results(tmp_dir) | ||
self.assertGreaterEqual(result["eval_accuracy"], 0.8) | ||
|
||
|
||
class ModelInferenceTestMixin: | ||
def test_inference(self): | ||
raise NotImplementedError | ||
|
||
|
||
# Copied from https://github.com/huggingface/transformers/blob/308d2b90049b4979a949a069aa4f43b2788254d6/src/transformers/testing_utils.py#L1335 # noqa | ||
# (with minimal set of methods and their contents) | ||
class TestCasePlus(unittest.TestCase): | ||
|
||
def setUp(self): | ||
# get_auto_remove_tmp_dir feature: | ||
self.teardown_tmp_dirs = [] | ||
|
||
def get_auto_remove_tmp_dir(self, tmp_dir=None, before=None, after=None): | ||
""" | ||
Args: | ||
tmp_dir (`string`, *optional*): | ||
if `None`: | ||
- a unique temporary path will be created | ||
- sets `before=True` if `before` is `None` | ||
- sets `after=True` if `after` is `None` | ||
else: | ||
- `tmp_dir` will be created | ||
- sets `before=True` if `before` is `None` | ||
- sets `after=False` if `after` is `None` | ||
before (`bool`, *optional*): | ||
If `True` and the `tmp_dir` already exists, make sure to empty it right away if `False` and the | ||
`tmp_dir` already exists, any existing files will remain there. | ||
after (`bool`, *optional*): | ||
If `True`, delete the `tmp_dir` at the end of the test if `False`, leave the `tmp_dir` and its contents | ||
intact at the end of the test. | ||
Returns: | ||
tmp_dir(`string`): either the same value as passed via *tmp_dir* or the path to the auto-selected tmp dir | ||
""" | ||
if tmp_dir is not None: | ||
# defining the most likely desired behavior for when a custom path is provided. | ||
# this most likely indicates the debug mode where we want an easily locatable dir that: | ||
# 1. gets cleared out before the test (if it already exists) | ||
# 2. is left intact after the test | ||
if before is None: | ||
before = True | ||
if after is None: | ||
after = False | ||
|
||
# using provided path | ||
path = Path(tmp_dir).resolve() | ||
|
||
# to avoid nuking parts of the filesystem, only relative paths are allowed | ||
if not tmp_dir.startswith("./"): | ||
raise ValueError( | ||
f"`tmp_dir` can only be a relative path, i.e. `./some/path`, but received `{tmp_dir}`" | ||
) | ||
|
||
# ensure the dir is empty to start with | ||
if before is True and path.exists(): | ||
shutil.rmtree(tmp_dir, ignore_errors=True) | ||
|
||
path.mkdir(parents=True, exist_ok=True) | ||
|
||
else: | ||
# defining the most likely desired behavior for when a unique tmp path is auto generated | ||
# (not a debug mode), here we require a unique tmp dir that: | ||
# 1. is empty before the test (it will be empty in this situation anyway) | ||
# 2. gets fully removed after the test | ||
if before is None: | ||
before = True | ||
if after is None: | ||
after = True | ||
|
||
# using unique tmp dir (always empty, regardless of `before`) | ||
tmp_dir = tempfile.mkdtemp() | ||
|
||
if after is True: | ||
# register for deletion | ||
self.teardown_tmp_dirs.append(tmp_dir) | ||
|
||
return tmp_dir | ||
|
||
def tearDown(self): | ||
# get_auto_remove_tmp_dir feature: remove registered temp dirs | ||
for path in self.teardown_tmp_dirs: | ||
shutil.rmtree(path, ignore_errors=True) | ||
self.teardown_tmp_dirs = [] | ||
if is_accelerate_available(): | ||
AcceleratorState._reset_state() | ||
PartialState._reset_state() | ||
|
||
# delete all the env variables having `ACCELERATE` in them | ||
for k in list(os.environ.keys()): | ||
if "ACCELERATE" in k: | ||
del os.environ[k] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os | ||
import sys | ||
|
||
from ..test_model import ModelTrainingTestMixin, TestCasePlus | ||
|
||
|
||
# So far, the image we use has a `/transformers` directory. | ||
# TODO: Find a way to get the installed `transformers` directory. | ||
SRC_DIRS = [ | ||
os.path.join("/transformers/examples/pytorch", dirname) | ||
for dirname in [ | ||
"image-classification", | ||
] | ||
] | ||
sys.path.extend(SRC_DIRS) | ||
|
||
|
||
if SRC_DIRS is not None: | ||
import run_image_classification | ||
|
||
|
||
class ViTTrainingTest(ModelTrainingTestMixin, TestCasePlus): | ||
|
||
def get_training_script(self): | ||
return run_image_classification | ||
|
||
def prepare_training_command(self, **kwargs): | ||
output_dir = kwargs.get("output_dir", "my_dir") | ||
|
||
testargs = f""" | ||
run_image_classification.py | ||
--output_dir {output_dir} | ||
--model_name_or_path google/vit-base-patch16-224-in21k | ||
--dataset_name hf-internal-testing/cats_vs_dogs_sample | ||
--do_train | ||
--do_eval | ||
--learning_rate 1e-4 | ||
--per_device_train_batch_size 2 | ||
--per_device_eval_batch_size 1 | ||
--remove_unused_columns False | ||
--overwrite_output_dir True | ||
--dataloader_num_workers 16 | ||
--metric_for_best_model accuracy | ||
--max_steps 10 | ||
--train_val_split 0.1 | ||
--seed 42 | ||
--label_column_name labels | ||
""" | ||
|
||
return testargs |