From fb6e6ec431bd55292b6e8f3b59b8822f4dd2dc05 Mon Sep 17 00:00:00 2001 From: Ilya Lavrenov Date: Tue, 9 Apr 2024 16:49:26 +0400 Subject: [PATCH] Added more tests in .github folder --- .github/workflows/causal_lm_cpp.yml | 86 ++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 15 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index c853e194cc..4dd014ea82 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -38,20 +38,20 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt - python -c " - import transformers - with open('pred.txt', 'r') as file: - predictions = file.read() - tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') - tokenized = tokenizer('69', return_tensors='pt') - for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): - ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' - idx = predictions.find(ref) - if -1 == idx: - raise RuntimeError(f'Missing "{ref=}" from predictions') - predictions = predictions[:idx] + predictions[idx + len(ref):] - " - echo "69" passed + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed cpp-greedy_causal_lm-ubuntu: runs-on: ubuntu-20.04-8-cores @@ -237,6 +237,20 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('Qwen/Qwen-7B-Chat') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('Qwen/Qwen-7B-Chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: runs-on: ubuntu-20.04-16-cores @@ -264,6 +278,20 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt + python -c " + import transformers + with open('pred_qwen15.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('Qwen/Qwen1.5-7B-Chat') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('Qwen/Qwen1.5-7B-Chat').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed cpp-beam_search_causal_lm-Phi-2: runs-on: ubuntu-20.04-16-cores @@ -291,6 +319,20 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Phi-2/pytorch/dldt/FP16/ --output ./Phi-2/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('microsoft/phi-2') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('microsoft/phi-2').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed cpp-beam_search_causal_lm-notus-7b-v1: runs-on: ubuntu-20.04-16-cores @@ -318,6 +360,20 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./notus-7b-v1/pytorch/dldt/FP16/ --output ./notus-7b-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('argilla/notus-7b-v1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('argilla/notus-7b-v1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed cpp-speculative_decoding_lm-ubuntu: runs-on: ubuntu-20.04-16-cores @@ -331,7 +387,7 @@ jobs: - name: Install OpenVINO run: | mkdir ./ov/ - curl https://storage.openvinotoolkit.org/repositories/openvino/packages/2023.3/linux/l_openvino_toolkit_ubuntu20_2023.3.0.13775.ceeafaf64f3_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz + curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.1.0-14645-e6dc0865128/l_openvino_toolkit_ubuntu20_2024.1.0.dev20240304_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz sudo ./ov/install_dependencies/install_openvino_dependencies.sh - name: Download, convert and build run: |