Fix abi #837
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: causal_lm_cpp | |
on: | |
pull_request: | |
paths: | |
- .github/workflows/causal_lm_cpp.yml | |
- text_generation/causal_lm/cpp/* | |
- thirdparty/openvino_tokenizers | |
- "!**.md" | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
cpp-greedy_causal_lm-ubuntu: | |
runs-on: ubuntu-20.04-8-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: greedy_causal_lm | |
run: | | |
source ./ov/setupvars.sh | |
./build/text_generation/causal_lm/cpp/greedy_causal_lm ./open_llama_3b_v2/ "return 0" | |
cpp-beam_search_causal_lm-ubuntu: | |
runs-on: ubuntu-20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Compare | |
run: | | |
source ./ov/setupvars.sh | |
timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('Why is the Sun yellow?', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Why is the Sun yellow?" passed | |
timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('69', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "69" passed | |
timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('Hi', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Hi" passed | |
timeout 25s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('return 0', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "return 0" passed | |
./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('你好! 你好嗎?', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "你好! 你好嗎?" passed | |
timeout 1m ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
prompts = [ | |
'Alan Turing was a', | |
'return 0', | |
'你好! 你好嗎?' | |
] | |
for prompt in prompts: | |
tokenized = tokenizer(prompt, return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Multi prompt" passed | |
cpp-beam_search_causal_lm-windows: | |
runs-on: windows-latest | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
shell: bash | |
run: | | |
curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/windows/w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64.zip | |
unzip ov.zip | |
- name: Download, convert and build | |
shell: cmd | |
run: | | |
call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat | |
python -m pip install --upgrade-strategy eager -r text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Compare | |
shell: cmd | |
run: | | |
call w_openvino_toolkit_windows_2024.2.0.dev20240524_x86_64\setupvars.bat | |
set PATH=.\build\src\cpp\Release;%PATH% | |
.\build\text_generation\causal_lm\cpp\Release\beam_search_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ "69" > .\pred.txt | |
echo import transformers > ref.py | |
echo predictions = open('pred.txt', 'r').read() >> ref.py | |
echo tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') >> ref.py | |
echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py | |
echo for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): >> ref.py | |
echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py | |
echo idx = predictions.find(ref) >> ref.py | |
echo if -1 == idx: >> ref.py | |
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py | |
echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py | |
python ref.py | |
cpp-beam_search_causal_lm-Qwen-7B-Chat: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Compare | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./Qwen-7B-Chat/ 69 > ./pred.txt | |
cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Run | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" > ./pred_qwen15.txt | |
cpp-beam_search_causal_lm-Phi-2: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j 15 | |
- name: Compare | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./phi-2/ 69 > ./pred.txt | |
cpp-beam_search_causal_lm-notus-7b-v1: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Compare | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./notus-7b-v1/ 69 > ./pred.txt | |
cpp-speculative_decoding_lm-ubuntu: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: run and compare | |
run: | | |
source ./ov/setupvars.sh | |
./build/text_generation/causal_lm/cpp/speculative_decoding_lm ./dolly-v2-3b/ ./dolly-v2-7b/ "Alan Turing was a" > predictions_speculative.txt | |
./build/text_generation/causal_lm/cpp/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt | |
python -c " | |
with open('predictions_greedy.txt', 'r') as f: | |
predicted_greedy = f.readline() | |
with open('predictions_speculative.txt', 'r') as f: | |
predicted_speculative = f.readline() | |
assert predicted_greedy == predicted_speculative | |
" | |
echo "Alan Turing was a" passed | |
cpp-prompt_lookup_decoding_lm-ubuntu: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: run and compare | |
run: | | |
source ./ov/setupvars.sh | |
echo 'Code:```python | |
def add(a, b): | |
return a + b | |
``` | |
Question: Can you please add 2 and 3 | |
A:' > ./prompt.txt | |
./build/text_generation/causal_lm/cpp/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt | |
./build/text_generation/causal_lm/cpp/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt | |
python -c " | |
with open('predictions_greedy.txt', 'r') as f: | |
predicted_greedy = f.readline() | |
with open('predictions_prompt_lookup.txt', 'r') as f: | |
predicted_prompt_lookup = f.readline() | |
assert predicted_greedy == predicted_prompt_lookup | |
" | |
echo "Prompt lookup" passed | |
cpp-Phi-1_5: | |
runs-on: ubuntu-20.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5 | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j 15 | |
- name: Run Generation | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt | |
timeout 50s ./build/text_generation/causal_lm/cpp/beam_search_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_beam.txt | |
- name: Compare | |
run: | | |
python -c " | |
import transformers | |
with open('pred_greedy.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.AutoTokenizer.from_pretrained('microsoft/phi-1_5') | |
tokenized = tokenizer('Alan Turing was a', return_tensors='pt') | |
for output in transformers.AutoModelForCausalLM.from_pretrained('microsoft/phi-1_5').generate(**tokenized, max_length=100, do_sample=False): | |
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo Phi-1_5 passed | |
cpp-greedy_causal_lm-redpajama-3b-chat: | |
runs-on: ubuntu-20.04-4-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.8 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/pre-release/2024.2.0rc1/linux/l_openvino_toolkit_ubuntu20_2024.2.0.dev20240524_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Download, convert and build | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./text_generation/causal_lm/cpp/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/pre-release | |
sudo apt-get install libtbb-dev | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- run: source ./ov/setupvars.sh && convert_tokenizer ./redpajama-3b-chat/ --output ./redpajama-3b-chat/ --with-detokenizer --trust-remote-code | |
- name: Run Generation | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/text_generation/causal_lm/cpp/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt | |
- name: Compare | |
run: | | |
python -c " | |
import transformers | |
with open('pred_greedy.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat') | |
tokenized = tokenizer('Alan Turing was a', return_tensors='pt') | |
for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False): | |
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Alan Turing was a" passed |