Skip to content

Commit

Permalink
Hugging face comparison Mistral 7b (#347)
Browse files Browse the repository at this point in the history
Co-authored-by: Ilya Lavrenov <[email protected]>
  • Loading branch information
akiseakusa and ilya-lavrenov authored Apr 8, 2024
1 parent 570101a commit 053dd88
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 8 deletions.
38 changes: 31 additions & 7 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ on:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
cpp-beam_search_causal_lm-Mistral-7B:
runs-on: ubuntu-20.04-16-cores
Expand Down Expand Up @@ -37,6 +38,21 @@ jobs:
source ./ov/setupvars.sh
convert_tokenizer ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --output ./Mistral-7B-v0.1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Mistral-7B-v0.1/pytorch/dldt/FP16/ 69 > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('mistralai/Mistral-7B-v0.1')
tokenized = tokenizer('69', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('mistralai/Mistral-7B-v0.1').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo "69" passed
cpp-greedy_causal_lm-ubuntu:
runs-on: ubuntu-20.04-8-cores
steps:
Expand All @@ -63,6 +79,7 @@ jobs:
source ./ov/setupvars.sh
convert_tokenizer ./open_llama_3b_v2/pytorch/dldt/FP16/ --output ./open_llama_3b_v2/pytorch/dldt/FP16/ --with-detokenizer
./build/greedy_causal_lm ./open_llama_3b_v2/pytorch/dldt/FP16/ "return 0"
cpp-beam_search_causal_lm-ubuntu:
runs-on: ubuntu-20.04
steps:
Expand Down Expand Up @@ -103,7 +120,7 @@ jobs:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo 69 passed
echo "69" passed
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ Hi > ./pred.txt
python -c "
Expand All @@ -119,7 +136,7 @@ jobs:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo Hi passed
echo "Hi" passed
timeout 25s ./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "return 0" > ./pred.txt
python -c "
Expand All @@ -135,7 +152,7 @@ jobs:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo return 0 passed
echo "return 0" passed
./build/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/pytorch/dldt/FP16/ "你好! 你好嗎?" > ./pred.txt
python -c "
Expand All @@ -151,7 +168,8 @@ jobs:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo 你好! 你好嗎? passed
echo "你好! 你好嗎?" passed
cpp-beam_search_causal_lm-windows:
runs-on: windows-latest
steps:
Expand Down Expand Up @@ -192,6 +210,7 @@ jobs:
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
python ref.py
cpp-beam_search_causal_lm-Qwen-7B-Chat:
runs-on: ubuntu-20.04-16-cores
steps:
Expand All @@ -218,6 +237,7 @@ jobs:
source ./ov/setupvars.sh
convert_tokenizer ./Qwen-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Qwen-7B-Chat/pytorch/dldt/FP16/ 69 > ./pred.txt
cpp-beam_search_causal_lm-Qwen1_5-7B-Chat:
runs-on: ubuntu-20.04-16-cores
steps:
Expand All @@ -244,6 +264,7 @@ jobs:
source ./ov/setupvars.sh
convert_tokenizer ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --output ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Qwen1.5-7B-Chat/pytorch/dldt/FP16/ "你好!" > ./pred_qwen15.txt
cpp-beam_search_causal_lm-Phi-2:
runs-on: ubuntu-20.04-16-cores
steps:
Expand All @@ -269,7 +290,8 @@ jobs:
run: |
source ./ov/setupvars.sh
convert_tokenizer ./Phi-2/pytorch/dldt/FP16/ --output ./Phi-2/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt
timeout 50s ./build/beam_search_causal_lm ./Phi-2/pytorch/dldt/FP16/ 69 > ./pred.txt
cpp-beam_search_causal_lm-notus-7b-v1:
runs-on: ubuntu-20.04-16-cores
steps:
Expand All @@ -296,6 +318,7 @@ jobs:
source ./ov/setupvars.sh
convert_tokenizer ./notus-7b-v1/pytorch/dldt/FP16/ --output ./notus-7b-v1/pytorch/dldt/FP16/ --with-detokenizer --trust-remote-code
timeout 50s ./build/beam_search_causal_lm ./notus-7b-v1/pytorch/dldt/FP16/ 69 > ./pred.txt
cpp-speculative_decoding_lm-ubuntu:
runs-on: ubuntu-20.04-16-cores
steps:
Expand Down Expand Up @@ -333,7 +356,8 @@ jobs:
predicted_speculative = f.readline()
assert predicted_greedy == predicted_speculative
"
echo speculative_decoding_lm passed
echo "Alan Turing was a" passed
cpp-Phi-1_5:
runs-on: ubuntu-20.04-16-cores
steps:
Expand Down Expand Up @@ -376,4 +400,4 @@ jobs:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo Phi-1_5 passed
echo "Alan Turing was a" passed
1 change: 0 additions & 1 deletion text_generation/causal_lm/cpp/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,4 @@ To enable Unicode characters for Windows cmd open `Region` settings from `Contro
10. [zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)
11. [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)


This pipeline can work with other similar topologies produced by `optimum-intel` with the same model signature.

0 comments on commit 053dd88

Please sign in to comment.