Skip to content

Commit

Permalink
Merge branch 'master' into use-continuos-batching-by-default
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov authored Oct 12, 2024
2 parents 1806fa0 + 67bcef1 commit c9dc107
Show file tree
Hide file tree
Showing 52 changed files with 1,112 additions and 272 deletions.
38 changes: 24 additions & 14 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ jobs:
echo "Multi prompt" passed
cpp-greedy_causal_lm-windows:
runs-on: windows-2019-16-core
runs-on: windows-latest
env:
PYTHONIOENCODING: "utf8"
defaults:
Expand All @@ -216,10 +216,6 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Configure Developer Command Prompt for Microsoft Visual C++
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
with:
toolset: 14.29
- run: curl --output ov.zip ${{ env.w_ov_link }}
- run: unzip -d ov ov.zip
- run: dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}"
Expand Down Expand Up @@ -701,20 +697,38 @@ jobs:
run: |
source ./ov/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
cmake --build ./build/ --config Release --target visual_language_chat -j
- name: Download and convert a model and an image
cmake --build ./build/ --config Release --target visual_language_chat py_generate_pipeline -j
- name: Download and convert MiniCPM-V-2_6 model and an image
run: |
source ./ov/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python ./samples/cpp/visual_language_chat/export_MiniCPM-V-2_6.py ./miniCPM-V-2_6/
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg
- name: Run chat chat sample
- name: Run visual_language_chat sample - MiniCPM-V-2_6
run: >
source ./ov/setupvars.sh
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./miniCPM-V-2_6/ cat.jpg
<<< $'What is on the image?\nWhat is special on the image?'
- name: Download and convert LLaVa 1.5 model and an image
run: |
source ./ov/setupvars.sh
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
optimum-cli export openvino --model llava-hf/llava-1.5-7b-hf ./llava_1_5_7b_ov/
wget https://llava-vl.github.io/static/images/monalisa.jpg
- name: Run visual_language_chat sample - LLaVa 1.5
run: >
source ./ov/setupvars.sh
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./llava_1_5_7b_ov/ monalisa.jpg
<<< $'Who drew this painting?\nWhen did the painter live?'
- name: Run python chat sample
run: |
source ./ov/setupvars.sh
export PYTHONPATH=./build/:$PYTHONPATH
printf 'What is on the image?\nWhat is special on the image?\n' > ./input.txt
timeout 120s python ./samples/python/visual_language_chat/visual_language_chat.py ./miniCPM-V-2_6/ cat.jpg < input.txt > ./pred.txt
cpp-continuous-batching-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down Expand Up @@ -760,7 +774,7 @@ jobs:
timeout 200s ./build/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark -n 10 --dynamic_split_fuse --max_batch_size 256 --max_input_len 256 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
cpp-continuous-batching-windows:
runs-on: windows-2019-16-core
runs-on: windows-latest
env:
PYTHONIOENCODING: "utf8"
defaults:
Expand All @@ -773,10 +787,6 @@ jobs:
- uses: actions/setup-python@v4
with:
python-version: 3.9
- name: Configure Developer Command Prompt for Microsoft Visual C++
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
with:
toolset: 14.29
- name: Install OpenVINO
run: |
curl --output ov.zip ${{ env.w_ov_link }}
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ jobs:
./build/samples/cpp/text2image/stable_diffusion ./models/lcm_dreamshaper_v7/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
lcm_dreamshaper_v7_cpp-windows:
runs-on: windows-2019-16-core
runs-on: windows-latest
defaults:
run:
shell: pwsh
Expand All @@ -88,11 +88,6 @@ jobs:
mv ./tmp/*/* .
popd
- name: Configure Developer Command Prompt for Microsoft Visual C++
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
with:
toolset: 14.29

- name: Build app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/llm_bench-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ jobs:
python ./llm_bench/python/benchmark.py -m tiny-random-qwen -d cpu -n 1 -f pt
- name: Test tiny-random-baichuan2 on Linux
run: |
python ./llm_bench/python/convert.py --model_id katuni4ka/tiny-random-baichuan2 --output_dir ./ov_models/tiny-random-baichuan2 --precision FP16
optimum-cli export openvino --model katuni4ka/tiny-random-baichuan2 --trust-remote-code --weight-format fp16 ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16
python ./llm_bench/python/benchmark.py -m ./ov_models/tiny-random-baichuan2/pytorch/dldt/FP16/ -d cpu -n 1
- name: Test tiny-stable-diffusion on Linux
run: |
python ./llm_bench/python/convert.py --model_id segmind/tiny-sd --output_dir ./ov_models/tiny-sd --precision FP16
optimum-cli export openvino --model segmind/tiny-sd --trust-remote-code --weight-format fp16 ./ov_models/tiny-sd/pytorch/dldt/FP16/
python ./llm_bench/python/benchmark.py -m ./ov_models/tiny-sd/pytorch/dldt/FP16/ -pf ./llm_bench/python/prompts/stable-diffusion.jsonl -d cpu -n 1
- name: WWB Tests
run: |
Expand Down
7 changes: 1 addition & 6 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
./build/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
stable_diffusion_1_5_cpp-windows:
runs-on: windows-2019-16-core
runs-on: windows-latest
defaults:
run:
shell: pwsh
Expand All @@ -94,11 +94,6 @@ jobs:
mv ./tmp/*/* .
popd
- name: Configure Developer Command Prompt for Microsoft Visual C++
uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0
with:
toolset: 14.29

- name: Build app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,9 @@ Continuous batching functionality is used within OpenVINO Model Server (OVMS) to

# Install optimum-intel to be able to download, convert and optimize LLMs from Hugging Face
# Optimum is not required to run models, only to convert and compress
pip install optimum[openvino]
pip install optimum-intel@git+https://github.com/huggingface/optimum-intel.git

# (Optional) Install (TBD) to be able to download models from Model Scope
#pip install optimum[openvino]
```

## Performing text generation
Expand Down
26 changes: 14 additions & 12 deletions llm_bench/python/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,14 @@ def run_text_generation(input_text, num, model, tokenizer, args, iter_data_list,
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
f"is different from md5 of the {num - 1} iteration {prev_md5}")
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
if not args.get("use_cb", False):
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
if bench_hook is not None:
Expand Down Expand Up @@ -412,13 +413,14 @@ def run_text_generation_genai_with_stream(input_text, num, model, tokenizer, arg
log.warning(f"[{num}] Prompt[{prompt_index}]'s md5 {result_md5_list} "
f"is different from md5 of the {num - 1} iteration {prev_md5}")
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
if not args.get("use_cb", False):
if num == 1:
# if the device is CPU, throw exception
if args['devices'].lower().startswith('cpu') is True:
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
# throw exception
assert (result_md5_list == prev_md5)
else:
llm_bench_utils.metrics_print.print_generated(num, warm_up=(num == 0), generated=generated_text[0])
streamer.reset()
Expand Down
2 changes: 1 addition & 1 deletion llm_bench/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ torch
transformers>=4.40.0
diffusers>=0.22.0
#optimum is in dependency list of optimum-intel
git+https://github.com/huggingface/optimum-intel.git@f34bd61df89f57f61c282c02297980299981ee78#egg=optimum-intel
git+https://github.com/huggingface/optimum-intel.git@main#egg=optimum-intel
git+https://github.com/openvinotoolkit/nncf.git@develop#egg=nncf
packaging
psutil
Expand Down
2 changes: 1 addition & 1 deletion llm_bench/python/who_what_benchmark/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ transformers>=4.35.2
sentence-transformers>=2.2.2
openvino>=2024.3.0
openvino-telemetry
optimum-intel>=1.14
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
openvino-tokenizers
pandas>=2.0.3
numpy>=1.23.5
Expand Down
Binary file modified samples/cpp/text2image/512x512.bmp
Binary file not shown.
Binary file modified samples/cpp/text2image/baseline.bmp
Binary file not shown.
Binary file modified samples/cpp/text2image/lora.bmp
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,20 @@ int main(int argc, char* argv[]) try {
// 'task' and 'language' parameters are supported for multilingual models only
config.language = "<|en|>";
config.task = "transcribe";
config.return_timestamps = true;

auto streamer = [](std::string word) {
std::cout << word;
return false;
};

pipeline.generate(raw_speech, config, streamer);
auto result = pipeline.generate(raw_speech, config, streamer);

std::cout << std::endl;
std::cout << "\n";

for (auto& chunk : *result.chunks) {
std::cout << "timestamps: [" << chunk.start_ts << ", " << chunk.end_ts << "] text: " << chunk.text << "\n";
}
} catch (const std::exception& error) {
try {
std::cerr << error.what() << '\n';
Expand Down
Binary file modified samples/generation.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install --upgrade-strategy eager -r ../../requirements.txt
## Run:
[This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.

`vlm_chat_sample.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
`visual_language_chat.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`


Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. # TODO: examples of larger models
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,16 @@ def main():
config.max_new_tokens = 100

pipe.start_chat()
prompt = input('question:\n')
pipe(prompt, image=image, generation_config=config, streamer=streamer)
print('\n----------')

while True:
try:
prompt = input('question:\n')
except EOFError:
break
pipe(prompt, image=image, generation_config=config, streamer=streamer)
pipe(prompt, generation_config=config, streamer=streamer)
print('\n----------')
pipe.finish_chat()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,19 @@ def streamer(word: str) -> bool:
print(word, end="")
return False

pipe.generate(
result = pipe.generate(
raw_speech,
max_new_tokens=100,
# 'task' and 'language' parameters are supported for multilingual models only
language="<|en|>",
task="transcribe",
return_timestamps=True,
streamer=streamer,
)

for chunk in result.chunks:
print(f"timestamps: [{chunk.start_ts}, {chunk.end_ts}] text: {chunk.text}")

print()


Expand Down
3 changes: 2 additions & 1 deletion samples/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
--extra-index-url https://download.pytorch.org/whl/cpu
optimum[openvino]==1.22.0
optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
numpy<2.0.0; sys_platform == 'darwin'
einops==0.8.0 # For Qwen
transformers_stream_generator==0.0.5 # For Qwen
diffusers==0.30.3
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ endif()

function(ov_genai_build_jinja2cpp)
FetchContent_Declare(jinja2cpp
URL https://github.com/jinja2cpp/Jinja2Cpp/archive/b32fbde7d98d13c34784c332c4a24a6f92c76e38.tar.gz
URL_HASH SHA256=7cc25ddbc438a5c874d404e100b4eccd8a331c195417f5487c48aebcf4b9e7fb)
URL https://github.com/ilya-lavrenov/Jinja2Cpp/archive/04073b62ec950eab6cdcb2c563c1c9bb7698f1ea.tar.gz
URL_HASH SHA256=9f2a346eec91a6a381fe8fd631e9c952fe7087882bbca7f0e4e42e75e680fc1b)

FetchContent_GetProperties(jinja2cpp)
if(NOT jinja2cpp_POPULATED)
Expand Down
8 changes: 8 additions & 0 deletions src/cpp/include/openvino/genai/processor_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ class OPENVINO_GENAI_EXPORTS ProcessorConfig {
/// Applied after norm_mean.
/// llava calls it image_std.
std::array<float, 3> norm_std{1.0f, 1.0f, 1.0f};

// llava specific config params
std::array<float, 3> image_mean{0.0f, 0.0f, 0.0f};
std::array<float, 3> image_std{1.0f, 1.0f, 1.0f};
size_t crop_size_height = 336;
size_t crop_size_width = 336;
size_t size_shortest_edge = 336;

/// @brief Default constructor
ProcessorConfig() = default;
/// @brief Construct ProcessorConfig from values in json_path.
Expand Down
17 changes: 15 additions & 2 deletions src/cpp/include/openvino/genai/vision_encoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "openvino/genai/processor_config.hpp"
#include <openvino/openvino.hpp>
#include "vlm_model_type.hpp"

namespace ov::genai {
/// @brief A pair describing image size.
Expand Down Expand Up @@ -41,8 +42,10 @@ struct EncodedImage {
/// ov::InferRequest and configured by ProcessorConfig.
class OPENVINO_GENAI_EXPORTS VisionEncoder {
public:
/// @brief A enum denoting model type.
VLMModelType model_type;
/// @brief A model for image encoding.
ov::InferRequest m_encoder;
ov::InferRequest m_vision_encoder;
/// @brief A config to follow.
ProcessorConfig m_processor_config;

Expand All @@ -52,7 +55,7 @@ class OPENVINO_GENAI_EXPORTS VisionEncoder {
explicit VisionEncoder(
const ov::InferRequest& encoder,
const ProcessorConfig& processor_config=ProcessorConfig{}
) : m_encoder{encoder}, m_processor_config{processor_config} {}
) : m_vision_encoder{encoder}, m_processor_config{processor_config} {}

/// @brief Construct the encoder from model_dir.
/// @param model_dir A folder containing openvino_embedding.xml and
Expand All @@ -63,6 +66,7 @@ class OPENVINO_GENAI_EXPORTS VisionEncoder {
/// @param core ov::Core to be used to compile the model.
explicit VisionEncoder(
const std::filesystem::path& model_dir,
const VLMModelType model_type,
const std::string& device="CPU",
const ov::AnyMap device_config={},
ov::Core core=ov::Core{}
Expand Down Expand Up @@ -117,5 +121,14 @@ class OPENVINO_GENAI_EXPORTS VisionEncoder {
image, AnyMap{std::forward<Properties>(properties)...}
);
}

private:
EncodedImage encode_minicpm(
const ov::Tensor& image, const ProcessorConfig& config
);

EncodedImage encode_llava(
const ov::Tensor& image, const ProcessorConfig& config
);
};
}
3 changes: 3 additions & 0 deletions src/cpp/include/openvino/genai/vlm_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,15 @@
#include "openvino/genai/visibility.hpp"
#include <openvino/runtime/properties.hpp>
#include <filesystem>
#include "vlm_model_type.hpp"

namespace ov::genai {
/// @brief A Configuration class passed to VLMPipeline and used to
/// change VLMPipeline's behavior. Corresponds to config.json.
class OPENVINO_GENAI_EXPORTS VLMConfig {
public:
/// @brief A enum denoting model type.
VLMModelType model_type;
/// @brief A size of a single embedding returned by a resampler.
/// Used to initialize positional embeddings for resampler input.
size_t hidden_size = 2304;
Expand Down
Loading

0 comments on commit c9dc107

Please sign in to comment.