Skip to content

Commit

Permalink
add missing fixture decorator for predownload dataset (#2117) [skip ci]
Browse files Browse the repository at this point in the history
* add missing fixture decorator for predownload dataset

* also pre download the tokenizer files
  • Loading branch information
winglian authored Dec 3, 2024
1 parent d87df2c commit 418ad2b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 0 deletions.
1 change: 1 addition & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def download_mhenrichsen_alpaca_2k_w_revision_dataset():
)


@pytest.fixture(scope="session", autouse=True)
def download_mlabonne_finetome_100k_dataset():
# download the dataset
snapshot_download("mlabonne/FineTome-100k", repo_type="dataset")
Expand Down
12 changes: 12 additions & 0 deletions tests/prompt_strategies/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pytest
from datasets import Dataset
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer


Expand Down Expand Up @@ -60,6 +61,17 @@ def fixture_basic_dataset():

@pytest.fixture(name="llama3_tokenizer")
def fixture_llama3_tokenizer():
hf_hub_download(
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
filename="special_tokens_map.json",
)
hf_hub_download(
repo_id="NousResearch/Meta-Llama-3-8B-Instruct",
filename="tokenizer_config.json",
)
hf_hub_download(
repo_id="NousResearch/Meta-Llama-3-8B-Instruct", filename="tokenizer.json"
)
tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B-Instruct")

return tokenizer
Expand Down

0 comments on commit 418ad2b

Please sign in to comment.