From 6e0fb4a6b2d57d873e92847abb1b43e84a890aef Mon Sep 17 00:00:00 2001 From: Wing Lian Date: Fri, 29 Nov 2024 20:37:32 -0500 Subject: [PATCH] add finetome dataset to fixtures, check eval_loss in test (#2106) [skip ci] * add finetome dataset to fixtures, check eval_loss in test * add qwen 0.5b to pytest session fixture --- tests/conftest.py | 11 +++++++++++ tests/e2e/multigpu/test_eval.py | 29 +++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index c316f6c83e..a8bf03ac01 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,6 +14,12 @@ def download_smollm2_135m_model(): snapshot_download("HuggingFaceTB/SmolLM2-135M") +@pytest.fixture(scope="session", autouse=True) +def download_qwen_2_5_half_billion_model(): + # download the model + snapshot_download("Qwen/Qwen2.5-0.5B") + + @pytest.fixture(scope="session", autouse=True) def download_tatsu_lab_alpaca_dataset(): # download the model @@ -26,6 +32,11 @@ def download_mhenrichsen_alpaca_2k_dataset(): snapshot_download("mhenrichsen/alpaca_2k_test", repo_type="dataset") +def download_mlabonne_finetome_100k_dataset(): + # download the model + snapshot_download("mlabonne/FineTome-100k", repo_type="dataset") + + @pytest.fixture def temp_dir(): # Create a temporary directory diff --git a/tests/e2e/multigpu/test_eval.py b/tests/e2e/multigpu/test_eval.py index 068a9220ca..c40a9edcce 100644 --- a/tests/e2e/multigpu/test_eval.py +++ b/tests/e2e/multigpu/test_eval.py @@ -7,10 +7,13 @@ import yaml from accelerate.test_utils import execute_subprocess_async +from tbparse import SummaryReader from transformers.testing_utils import get_torch_dist_unique_port from axolotl.utils.dict import DictDefault +from ..utils import most_recent_subdir + LOG = logging.getLogger("axolotl.tests.e2e.multigpu") os.environ["WANDB_DISABLED"] = "true" @@ -26,7 +29,7 @@ def test_eval_sample_packing(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "load_in_8bit": False, "load_in_4bit": True, "strict": False, @@ -40,8 +43,8 @@ def test_eval_sample_packing(self, temp_dir): "lora_dropout": 0.05, "lora_target_linear": True, "lora_modules_to_save": ["embed_tokens", "lm_head"], - "val_set_size": 0.1, - "special_tokens": {"pad_token": "<|end_of_text|>"}, + "val_set_size": 0.004, + "special_tokens": {"pad_token": "<|endoftext|>"}, "datasets": [ { "path": "teknium/GPT4-LLM-Cleaned", @@ -66,6 +69,7 @@ def test_eval_sample_packing(self, temp_dir): "saves_per_epoch": 1, "logging_steps": 1, "weight_decay": 0.0, + "use_tensorboard": True, } ) @@ -87,12 +91,18 @@ def test_eval_sample_packing(self, temp_dir): str(Path(temp_dir) / "config.yaml"), ] ) + tb_log_path = most_recent_subdir(temp_dir + "/runs") + event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0]) + reader = SummaryReader(event_file) + df = reader.scalars # pylint: disable=invalid-name + df = df[(df.tag == "eval/loss")] # pylint: disable=invalid-name + assert df.value.values[-1] < 2.5, "Loss is too high" def test_eval(self, temp_dir): # pylint: disable=duplicate-code cfg = DictDefault( { - "base_model": "JackFram/llama-68m", + "base_model": "HuggingFaceTB/SmolLM2-135M", "load_in_8bit": False, "load_in_4bit": True, "strict": False, @@ -106,8 +116,8 @@ def test_eval(self, temp_dir): "lora_dropout": 0.05, "lora_target_linear": True, "lora_modules_to_save": ["embed_tokens", "lm_head"], - "val_set_size": 0.1, - "special_tokens": {"pad_token": "<|end_of_text|>"}, + "val_set_size": 0.0004, + "special_tokens": {"pad_token": "<|endoftext|>"}, "datasets": [ { "path": "teknium/GPT4-LLM-Cleaned", @@ -132,6 +142,7 @@ def test_eval(self, temp_dir): "saves_per_epoch": 1, "logging_steps": 1, "weight_decay": 0.0, + "use_tensorboard": True, } ) @@ -153,3 +164,9 @@ def test_eval(self, temp_dir): str(Path(temp_dir) / "config.yaml"), ] ) + tb_log_path = most_recent_subdir(temp_dir + "/runs") + event_file = os.path.join(tb_log_path, sorted(os.listdir(tb_log_path))[0]) + reader = SummaryReader(event_file) + df = reader.scalars # pylint: disable=invalid-name + df = df[(df.tag == "eval/loss")] # pylint: disable=invalid-name + assert df.value.values[-1] < 2.9, "Loss is too high"