From b96798fe942338f3cbd5691dcccb2a93b6819d91 Mon Sep 17 00:00:00 2001 From: Wovchena Date: Thu, 30 May 2024 10:24:30 +0400 Subject: [PATCH] cache --- .github/workflows/genai_python_lib.yml | 4 +- tests/python_tests/test_generate_api.py | 177 ++++++++++++------------ 2 files changed, 88 insertions(+), 93 deletions(-) diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml index 9e1b9727f5..1be6705e30 100644 --- a/.github/workflows/genai_python_lib.yml +++ b/.github/workflows/genai_python_lib.yml @@ -22,7 +22,7 @@ jobs: - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j # GitHub Actions already provides what is listed in ./requirements-build.txt but the internal # build system doesn't. Install ./requirements-build.txt to detect possible conflicts. - - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --verbose --verbose --verbose - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_generate_api.py - run: source ./ov/setupvars.sh && python -m pip install . --config-settings=build-dir="build" --verbose --verbose --verbose - run: python -m pytest ./tests/python_tests/test_generate_api.py @@ -47,7 +47,7 @@ jobs: # build system doesn't. Install ./requirements-build.txt to detect possible conflicts. - run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ - run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && cmake --build ./build/ --config Release -j - - run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release + - run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./requirements-build.txt -r ./tests/python_tests/requirements.txt --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release --verbose --verbose --verbose - run: set "PYTHONPATH=./build/" && call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pytest ./tests/python_tests/test_generate_api.py # cmd evaluates variables in a different way. Setting PYTHONPATH before setupvars.bat instead of doing that after solves that. - run: call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat && python -m pip install . --config-settings=build-dir="build" --verbose --verbose --verbose - run: python -m pytest ./tests/python_tests/test_generate_api.py diff --git a/tests/python_tests/test_generate_api.py b/tests/python_tests/test_generate_api.py index ab6c26528c..0eb2cf48a0 100644 --- a/tests/python_tests/test_generate_api.py +++ b/tests/python_tests/test_generate_api.py @@ -11,26 +11,18 @@ from list_test_models import models_list from typing import Union, List, Dict -@pytest.fixture(scope="module", params=models_list()) -# @functools.lru_cache(1) -def model_fixture(request): - model_id, path = request.param - # tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) - # ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(tokenizer, with_detokenizer=True) - # openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml") - # openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml") - # model = optimum.intel.openvino.OVModelForCausalLM.from_pretrained(model_id, export=True, device='CPU', load_in_8bit=False) - # model.save_pretrained(path) - # return model_id, path, tokenizer, model - from transformers import AutoTokenizer, AutoModelForCausalLM - tokenizer = AutoTokenizer.from_pretrained(model_id) - model = AutoModelForCausalLM.from_pretrained(model_id) - yield model_id, path, tokenizer, model - - import gc - del tokenizer - del model - gc.collect() + +@functools.lru_cache(1) +def read_model(params): + model_id, path = params + tokenizer = transformers.AutoTokenizer.from_pretrained(model_id) + ov_tokenizer, ov_detokenizer = openvino_tokenizers.convert_tokenizer(tokenizer, with_detokenizer=True) + openvino.save_model(ov_tokenizer, path / "openvino_tokenizer.xml") + openvino.save_model(ov_detokenizer, path / "openvino_detokenizer.xml") + model = optimum.intel.openvino.OVModelForCausalLM.from_pretrained(model_id, export=True, device='CPU', load_in_8bit=False) + model.save_pretrained(path) + return model_id, path, tokenizer, model + def run_hf_ov_genai_comparison_batched(model_fixture, generation_config: Dict, prompts: Union[str, List[str]]): model_id, path, tokenizer, model = model_fixture @@ -67,7 +59,7 @@ def run_hf_ov_genai_comparison_batched(model_fixture, generation_config: Dict, p hf_outputs.append(tokenizer.decode(hf_encoded_out[prompt_ids[prompt_count].shape[0]:], skip_special_tokens=True)) import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(path, device) + pipe = ov_genai.LLMPipeline(str(path), device) config['num_return_sequences'] = num_beams * len(prompts) ov_outputs = pipe.generate(prompts, **config) @@ -104,9 +96,8 @@ def run_hf_ov_genai_comparison(model_fixture, generation_config: Dict, prompt): hf_encoded_output = model.generate(encoded_prompt, **generation_config_hf) hf_output = tokenizer.decode(hf_encoded_output[0, encoded_prompt.shape[1]:]) - import openvino_genai as ov_genai - pipe = ov_genai.LLMPipeline(path, device) + pipe = ov_genai.LLMPipeline(str(path), device) ov_output = pipe.generate(prompt, **config) if config.get('num_return_sequences', 1) > 1: @@ -132,8 +123,9 @@ def stop_criteria_map(): (dict(num_beam_groups=2, num_beams=8, num_return_sequences=8, max_new_tokens=20, diversity_penalty=1.5), 'The Sun is yellow because'), ] @pytest.mark.parametrize("generation_config,prompt", test_cases) -def test_decoding(model_fixture, generation_config, prompt): - run_hf_ov_genai_comparison(model_fixture, generation_config, prompt) +@pytest.mark.parametrize("model_id", models_list()) +def test_decoding(model_id, generation_config, prompt): + run_hf_ov_genai_comparison(read_model(model_id), generation_config, prompt) test_configs = [ dict(max_new_tokens=20, do_sample=False), @@ -147,40 +139,42 @@ def test_multibatch(model_fixture, generation_config, prompts): run_hf_ov_genai_comparison_batched(model_fixture, generation_config, prompts) -# prompts = ['The Sun is yellow because', 'Alan Turing was a', 'table is made of'] -# @pytest.mark.parametrize("num_beam_groups", [2, 3, 8]) -# @pytest.mark.parametrize("group_size", [5, 3, 10]) -# @pytest.mark.parametrize("max_new_tokens", [20, 15]) -# @pytest.mark.parametrize("diversity_penalty", [1.0, 1.5]) -# @pytest.mark.parametrize("prompt", prompts) -# def test_beam_search_decoding(model_fixture, num_beam_groups, group_size, -# max_new_tokens, diversity_penalty, prompt): -# generation_config = dict( -# num_beam_groups=num_beam_groups, -# num_beams=num_beam_groups * group_size, -# diversity_penalty=diversity_penalty, -# num_return_sequences=num_beam_groups * group_size, -# max_new_tokens=max_new_tokens, -# ) -# run_hf_ov_genai_comparison(model_fixture, generation_config, prompt) - - -# @pytest.mark.parametrize("stop_criteria", ["never", "early", "heuristic"]) -# @pytest.mark.parametrize("prompt", prompts) -# @pytest.mark.parametrize("max_new_tokens", [20, 40, 300]) -# def test_stop_criteria(model_fixture, stop_criteria, prompt, max_new_tokens): -# # todo: for long sentences early stop_criteria fails -# if (stop_criteria == 'early' and max_new_tokens >= 300): -# pytest.skip() -# generation_config = dict( -# num_beam_groups=2, -# num_beams=2 * 3, -# diversity_penalty=1.0, -# num_return_sequences=2 * 3, -# max_new_tokens=max_new_tokens, -# stop_criteria=stop_criteria, -# ) -# run_hf_ov_genai_comparison(model_fixture, generation_config, prompt) +prompts = ['The Sun is yellow because', 'Alan Turing was a', 'table is made of'] +@pytest.mark.parametrize("num_beam_groups", [2, 3, 8]) +@pytest.mark.parametrize("group_size", [5, 3, 10]) +@pytest.mark.parametrize("max_new_tokens", [20, 15]) +@pytest.mark.parametrize("diversity_penalty", [1.0, 1.5]) +@pytest.mark.parametrize("prompt", prompts) +@pytest.mark.parametrize("model_id", models_list()) +def test_beam_search_decoding(model_id, num_beam_groups, group_size, + max_new_tokens, diversity_penalty, prompt): + generation_config = dict( + num_beam_groups=num_beam_groups, + num_beams=num_beam_groups * group_size, + diversity_penalty=diversity_penalty, + num_return_sequences=num_beam_groups * group_size, + max_new_tokens=max_new_tokens, + ) + run_hf_ov_genai_comparison(read_model(model_id), generation_config, prompt) + + +@pytest.mark.parametrize("stop_criteria", ["never", "early", "heuristic"]) +@pytest.mark.parametrize("prompt", prompts) +@pytest.mark.parametrize("max_new_tokens", [20, 40, 300]) +@pytest.mark.parametrize("model_id", models_list()) +def test_stop_criteria(model_id, stop_criteria, prompt, max_new_tokens): + # todo: for long sentences early stop_criteria fails + if (stop_criteria == 'early' and max_new_tokens >= 300): + pytest.skip() + generation_config = dict( + num_beam_groups=2, + num_beams=2 * 3, + diversity_penalty=1.0, + num_return_sequences=2 * 3, + max_new_tokens=max_new_tokens, + stop_criteria=stop_criteria, + ) + run_hf_ov_genai_comparison(read_model(model_id), generation_config, prompt) # test long sequences @@ -188,8 +182,9 @@ def test_multibatch(model_fixture, generation_config, prompts): @pytest.mark.parametrize("group_size", [5]) @pytest.mark.parametrize("max_new_tokens", [800, 2000]) @pytest.mark.parametrize("prompt", prompts) +@pytest.mark.parametrize("model_id", models_list()) @pytest.mark.skip # will be enabled in nightly since are computationally expensive -def test_beam_search_long_sentences(model_fixture, num_beam_groups, group_size, +def test_beam_search_long_sentences(model_id, num_beam_groups, group_size, max_new_tokens, prompt): generation_config = dict( num_beam_groups=num_beam_groups, @@ -198,7 +193,7 @@ def test_beam_search_long_sentences(model_fixture, num_beam_groups, group_size, num_return_sequences=num_beam_groups * group_size, max_new_tokens=max_new_tokens, ) - run_hf_ov_genai_comparison(model_fixture, generation_config, prompt) + run_hf_ov_genai_comparison(read_model(model_id), generation_config, prompt) def user_defined_callback(subword): @@ -206,29 +201,29 @@ def user_defined_callback(subword): @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) -def test_callback_one_string(model_fixture, callback): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_callback_one_string(callback): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) pipe.generate('', openvino_genai.GenerationConfig(), callback) -# @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) -# def test_callback_batch_fail(model_fixture, callback): -# pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) -# with pytest.raises(RuntimeError): -# pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), callback) +@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) +def test_callback_batch_fail(callback): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) + with pytest.raises(RuntimeError): + pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), callback) -# @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) -# def test_callback_kwargs_one_string(model_fixture, callback): -# pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) -# pipe.generate('', max_new_tokens=10, streamer=callback) +@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) +def test_callback_kwargs_one_string(callback): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) + pipe.generate('', max_new_tokens=10, streamer=callback) -# @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) -# def test_callback_kwargs_batch_fail(model_fixture, callback): -# pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) -# with pytest.raises(RuntimeError): -# pipe.generate(['1', '2'], max_new_tokens=10, streamer=callback) +@pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) +def test_callback_kwargs_batch_fail(callback): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) + with pytest.raises(RuntimeError): + pipe.generate(['1', '2'], max_new_tokens=10, streamer=callback) class Printer(openvino_genai.StreamerBase): @@ -241,46 +236,46 @@ def end(self): print('end') -def test_streamer_one_string(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_streamer_one_string(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) pipe.generate('', openvino_genai.GenerationConfig(), printer) -def test_streamer_batch_fail(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_streamer_batch_fail(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) with pytest.raises(RuntimeError): pipe.generate(['1', '2'], openvino_genai.GenerationConfig(), printer) -def test_streamer_kwargs_one_string(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_streamer_kwargs_one_string(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) pipe.generate('', do_sample=True, streamer=printer) -def test_streamer_kwargs_batch_fail(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_streamer_kwargs_batch_fail(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) with pytest.raises(RuntimeError): pipe.generate('', num_beams=2, streamer=printer) @pytest.mark.parametrize("callback", [print, user_defined_callback, lambda subword: print(subword)]) -def test_operator_wit_callback_one_string(model_fixture, callback): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_operator_wit_callback_one_string(callback): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) pipe('', openvino_genai.GenerationConfig(), callback) -def test_operator_wit_streamer_kwargs_one_string(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_operator_wit_streamer_kwargs_one_string(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) pipe('', do_sample=True, streamer=printer) -def test_operator_wit_streamer_kwargs_batch_fail(model_fixture): - pipe = openvino_genai.LLMPipeline(str(model_fixture[1])) +def test_operator_wit_streamer_kwargs_batch_fail(): + pipe = openvino_genai.LLMPipeline(str(read_model(models_list()[0])[1])) printer = Printer(pipe.get_tokenizer()) with pytest.raises(RuntimeError): pipe('', num_beams=2, streamer=printer)