From 053dd8876df7d5d3c878005967ebf61a268a065a Mon Sep 17 00:00:00 2001 From: Sadhvi <41192585+akiseakusa@users.noreply.github.com> Date: Tue, 9 Apr 2024 01:54:12 +0530 Subject: [PATCH] Hugging face comparison Mistral 7b (#347) Co-authored-by: Ilya Lavrenov --- .github/workflows/causal_lm_cpp.yml | 38 ++++++++++++++++++++----- text_generation/causal_lm/cpp/README.md | 1 - 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml index 5cfd1cc7b7..c853e194cc 100644 --- a/.github/workflows/causal_lm_cpp.yml +++ b/.github/workflows/causal_lm_cpp.yml @@ -10,6 +10,7 @@ on: concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true + jobs: cpp-beam_search_causal_lm-Mistral-7B: runs-on: ubuntu-20.04-16-cores @@ -37,6 +38,21 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt + python -c " + import transformers + with open('pred.txt', 'r') as file: + predictions = file.read() + tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1') + tokenized = tokenizer('69', return_tensors='pt') + for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): + ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' + idx = predictions.find(ref) + if -1 == idx: + raise RuntimeError(f'Missing "{ref=}" from predictions') + predictions = predictions[:idx] + predictions[idx + len(ref):] + " + echo "69" passed + cpp-greedy_causal_lm-ubuntu: runs-on: ubuntu-20.04-8-cores steps: @@ -63,6 +79,7 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./open_llama_3b_v2/pytorch/dldt/FP16/ --output ./open_llama_3b_v2/pytorch/dldt/FP16/ --with-detokenizer ./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0" + cpp-beam_search_causal_lm-ubuntu: runs-on: ubuntu-20.04 steps: @@ -103,7 +120,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo 69 passed + echo "69" passed timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ Hi > ./pred.txt python -c " @@ -119,7 +136,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Hi passed + echo "Hi" passed timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt python -c " @@ -135,7 +152,7 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo return 0 passed + echo "return 0" passed ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "你好! 你好嗎?" > ./pred.txt python -c " @@ -151,7 +168,8 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo 你好! 你好嗎? passed + echo "你好! 你好嗎?" passed + cpp-beam_search_causal_lm-windows: runs-on: windows-latest steps: @@ -192,6 +210,7 @@ jobs: echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py python ref.py + cpp-beam_search_causal_lm-Qwen-7B-Chat: runs-on: ubuntu-20.04-16-cores steps: @@ -218,6 +237,7 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt + cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: runs-on: ubuntu-20.04-16-cores steps: @@ -244,6 +264,7 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt + cpp-beam_search_causal_lm-Phi-2: runs-on: ubuntu-20.04-16-cores steps: @@ -269,7 +290,8 @@ jobs: run: | source ./ov/setupvars.sh convert_tokenizer ./Phi-2/pytorch/dldt/FP16/ --output ./Phi-2/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code - timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt + timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt + cpp-beam_search_causal_lm-notus-7b-v1: runs-on: ubuntu-20.04-16-cores steps: @@ -296,6 +318,7 @@ jobs: source ./ov/setupvars.sh convert_tokenizer ./notus-7b-v1/pytorch/dldt/FP16/ --output ./notus-7b-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/pytorch/dldt/FP16/ 69 > ./pred.txt + cpp-speculative_decoding_lm-ubuntu: runs-on: ubuntu-20.04-16-cores steps: @@ -333,7 +356,8 @@ jobs: predicted_speculative = f.readline() assert predicted_greedy == predicted_speculative " - echo speculative_decoding_lm passed + echo "Alan Turing was a" passed + cpp-Phi-1_5: runs-on: ubuntu-20.04-16-cores steps: @@ -376,4 +400,4 @@ jobs: raise RuntimeError(f'Missing "{ref=}" from predictions') predictions = predictions[:idx] + predictions[idx + len(ref):] " - echo Phi-1_5 passed \ No newline at end of file + echo "Alan Turing was a" passed diff --git a/text_generation/causal_lm/cpp/README.md b/text_generation/causal_lm/cpp/README.md index a2a8c300c4..20d7056907 100644 --- a/text_generation/causal_lm/cpp/README.md +++ b/text_generation/causal_lm/cpp/README.md @@ -149,5 +149,4 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro 10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta) 11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) - This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.