openvinotoolkit · Wovchena · Jul 10, 2024 · Jul 2, 2024 · Jul 2, 2024 · Jul 2, 2024
diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
@@ -14,6 +14,7 @@ concurrency:
 
 env:
   l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/l_openvino_toolkit_ubuntu20_2024.3.0.dev20240708_x86_64.tgz
+  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/m_openvino_toolkit_macos_12_6_2024.3.0.dev20240708_x86_64.tgz
   w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.3.0-15945-a349dc82f9a/w_openvino_toolkit_windows_2024.3.0.dev20240708_x86_64.zip
 jobs:
   cpp-multinomial-greedy_causal_lm-ubuntu:
@@ -584,3 +585,119 @@ jobs:
           timeout 30s ./samples/python/chat_sample/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt
           diff pred2.txt ref.txt
           echo "Chat sample python" passed
+
+  cpp-continuous-batching-ubuntu:
+    runs-on: ubuntu-20.04-8-cores
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - name: Run gtests
+        run: |
+          source ./ov/setupvars.sh
+          ./build/tests/cpp/tests_continuous_batching
+      - name: Run accuracy_sample
+        run: |
+          source ./ov/setupvars.sh
+          timeout 50s ./build/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5
+      - name: Run throughput_benchmark
+        run: |
+          wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+          source ./ov/setupvars.sh
+          timeout 200s ./build/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark -n 10 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
+
+
+  cpp-continuous-batching-windows:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          curl --output ov.zip ${{ env.w_ov_link }}
+          unzip -d ov ov.zip
+          dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}"
+        shell: bash
+      - name: Install dependencies and build
+        run: |
+          call .\ov\setupvars.bat
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+          cmake -DCMAKE_BUILD_TYPE=Releas -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - name: Run gtests
+        run: |
+          set PATH=.\build\openvino_genai\;%PATH%
+          call .\ov\setupvars.bat
+          .\build\tests\cpp\Release\tests_continuous_batching.exe
+      - name: Run accuracy_sample
+        run: |
+          set PATH=.\build\openvino_genai\;%PATH%
+          call .\ov\setupvars.bat
+          .\build\samples\cpp\continuous_batching_accuracy\Release\continuous_batching_accuracy.exe -m .\TinyLlama-1.1B-Chat-v1.0\ -n 5
+      - name: Run throughput_benchmark
+        run: |
+          curl -o .\ShareGPT_V3_unfiltered_cleaned_split.json -s -L "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
+          set PATH=.\build\openvino_genai\;%PATH%
+          call .\ov\setupvars.bat
+          .\build\samples\cpp\continuous_batching_benchmark\Release\continuous_batching_benchmark.exe -n 2 --dynamic_split_fuse -m .\TinyLlama-1.1B-Chat-v1.0\ --dataset .\ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
+
+  cpp-continuous-batching-macos:
+    runs-on: macos-12
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          brew install coreutils scons
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - name: Run gtests
+        run: |
+          source ./ov/setupvars.sh
+          ./build/tests/cpp/tests_continuous_batching
+      - name: Run accuracy_sample
+        run: |
+          source ./ov/setupvars.sh
+          timeout 120s ./build/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5
+      - name: Run throughput_benchmark
+        run: |
+          wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
+          source ./ov/setupvars.sh
+          ./build/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark -n 5 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
diff --git a/.github/workflows/genai_python_lib.yml b/.github/workflows/genai_python_lib.yml
@@ -84,3 +84,90 @@ jobs:
       - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_generate_api.py -m precommit
       - run: call ./ov/setupvars.bat && python -m pip install . --verbose
       - run: python -m pytest ./tests/python_tests/test_generate_api.py -m precommit
+
+  continuous_batching_python_lib_ubuntu:
+    # A tokenizers' dependency fails to compile on ubuntu-20 n CenOS7 env.
+    runs-on: ubuntu-22.04
+    env:
+      # A tokenizers' dependency fails to compile with Ninja in CenOS7 env.
+      CMAKE_GENERATOR: Unix Makefiles
+      CMAKE_BUILD_PARALLEL_LEVEL: null
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      # Install CentOS7 instead of Ubuntu to match PyPI distribution ABI.
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl ${{ env.l_ov_centos_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          sudo ./ov/install_dependencies/install_openvino_dependencies.sh
+      - name: Install dependencies and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_sampling.py -m precommit
+      - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_preemption.py -m precommit
+      - run: source ./ov/setupvars.sh && python -m pip install .
+      - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit
+
+  continuous_batching_python_lib_windows:
+    runs-on: windows-latest
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+
+      - name: Install OpenVINO
+        run: |
+          curl --output ov.zip ${{ env.w_ov_link }}
+          unzip -d ov ov.zip
+          dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}"
+        shell: bash
+      - name: Install dependencies and build
+        run: |
+          call .\ov\setupvars.bat
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_sampling.py -m precommit
+      - run: set "PYTHONPATH=./build/" && call ./ov/setupvars.bat && python -m pytest ./tests/python_tests/test_preemption.py -m precommit
+      - run: call ./ov/setupvars.bat && python -m pip install . --verbose
+      - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit
+
+
+  continuous_batching_python_lib_macos:
+    runs-on: macos-12
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: recursive
+      - uses: actions/setup-python@v4
+        with:
+          python-version: 3.8
+      - name: Install OpenVINO
+        run: |
+          mkdir ./ov/
+          curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz
+          brew install coreutils scons
+      - name: Download, convert and build
+        run: |
+          source ./ov/setupvars.sh
+          python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager
+          cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/
+          cmake --build ./build/ --config Release -j
+      - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_sampling.py -m precommit
+      - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/test_preemption.py -m precommit
+      - run: source ./ov/setupvars.sh && python -m pip install .
+      - run: python -m pytest ./tests/python_tests/test_preemption.py -m precommit
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,4 @@ CMakeUserPresets.json
 *.?env*
 *.pyc
 __pycache__
+.py-build-cmake_cache
diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -4,6 +4,8 @@
 
 add_subdirectory(cpp/beam_search_causal_lm)
 add_subdirectory(cpp/chat_sample)
+add_subdirectory(cpp/continuous_batching_accuracy)
+add_subdirectory(cpp/continuous_batching_benchmark)
 add_subdirectory(cpp/greedy_causal_lm)
 add_subdirectory(cpp/multinomial_causal_lm)
 add_subdirectory(cpp/prompt_lookup_decoding_lm)

diff --git a/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy.cpp b/samples/cpp/continuous_batching_accuracy/continuous_batching_accuracy.cpp
@@ -67,17 +67,16 @@ int main(int argc, char* argv[]) try {
 
     // Perform the inference
 
-    ov::genai::SchedulerConfig scheduler_config {
-        // batch size
-        .max_num_batched_tokens = 32,
-        // cache params
-        .num_kv_blocks = 364,
-        .block_size = 32,
-        // mode - vLLM or dynamic_split_fuse
-        .dynamic_split_fuse = dynamic_split_fuse,
-        // vLLM specific params
-        .max_num_seqs = 2,
-    };
+    ov::genai::SchedulerConfig scheduler_config;
+    // batch size
+    scheduler_config.max_num_batched_tokens = 32;
+    // cache params
+    scheduler_config.num_kv_blocks = 364;
+    scheduler_config.block_size = 32;
+    // mode - vLLM or dynamic_split_fuse
+    scheduler_config.dynamic_split_fuse = dynamic_split_fuse;
+    // vLLM specific params
+    scheduler_config.max_num_seqs = 2;
 
     ov::genai::ContinuousBatchingPipeline pipe(models_path, scheduler_config);
     std::vector<ov::genai::GenerationResult> generation_results = pipe.generate(prompts, sampling_params);

diff --git a/samples/cpp/continuous_batching_benchmark/CMakeLists.txt b/samples/cpp/continuous_batching_benchmark/CMakeLists.txt
@@ -24,4 +24,3 @@ find_package(Threads REQUIRED)
 set(TARGET_NAME continuous_batching_benchmark)
 add_executable(${TARGET_NAME} ${TARGET_NAME}.cpp)
 target_link_libraries(${TARGET_NAME} PRIVATE openvino::genai nlohmann_json::nlohmann_json cxxopts::cxxopts Threads::Threads)
-target_compile_features(${TARGET_NAME} PRIVATE cxx_std_20)
diff --git a/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp b/samples/cpp/continuous_batching_benchmark/continuous_batching_benchmark.cpp
@@ -466,13 +466,12 @@ int main(int argc, char* argv[]) try {
     Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
 
     // Perform the first inference
-    ov::genai::SchedulerConfig scheduler_config {
-        .max_num_batched_tokens = max_batch_size,
-        .cache_size = cache_size,
-        .block_size = 32,
-        .dynamic_split_fuse = dynamic_split_fuse,
-        .max_num_seqs = 256, // not used if dynamic_split_fuse=True
-    };
+    ov::genai::SchedulerConfig scheduler_config;
+    scheduler_config.max_num_batched_tokens = max_batch_size,
+    scheduler_config.cache_size = cache_size,
+    scheduler_config.block_size = 32,
+    scheduler_config.dynamic_split_fuse = dynamic_split_fuse,
+    scheduler_config.max_num_seqs = 256, // not used if dynamic_split_fuse=True
 
     std::cout << "Benchmarking parameters: " << std::endl;
     std::cout << "\tMax number of batched tokens: " << scheduler_config.max_num_batched_tokens << std::endl;

diff --git a/tests/cpp/generate_config.cpp b/tests/cpp/generate_config.cpp
@@ -7,20 +7,23 @@
 
 TEST(GenerationConfigTest, invalid_temperature) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.temperature = -0.1;
     config.do_sample = true;
     EXPECT_THROW(config.validate(), ov::Exception);
 }
 
 TEST(GenerationConfigTest, valid_temperature) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.temperature = 0.1;
     EXPECT_NO_THROW(config.validate());
 }
 
 TEST(GenerationConfigTest, invalid_top_p) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.top_p = -0.5;
     EXPECT_THROW(config.validate(), ov::Exception);
@@ -30,13 +33,15 @@ TEST(GenerationConfigTest, invalid_top_p) {
 
 TEST(GenerationConfigTest, valid_top_p) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.top_p = 0.1;
     EXPECT_NO_THROW(config.validate());
 }
 
 TEST(GenerationConfigTest, invalid_repeatition_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.repetition_penalty = -3.0;
     EXPECT_THROW(config.validate(), ov::Exception);
@@ -46,15 +51,17 @@ TEST(GenerationConfigTest, invalid_repeatition_penalty) {
 
 TEST(GenerationConfigTest, valid_repeatition_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.repetition_penalty = 1.8;
     EXPECT_NO_THROW(config.validate());
-    config.repetition_penalty = 0.0;
+    config.repetition_penalty = 0.1;
     EXPECT_NO_THROW(config.validate());
 }
 
 TEST(GenerationConfigTest, invalid_presence_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.presence_penalty = 3.0;
     EXPECT_THROW(config.validate(), ov::Exception);
@@ -64,6 +71,7 @@ TEST(GenerationConfigTest, invalid_presence_penalty) {
 
 TEST(GenerationConfigTest, valid_presence_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.presence_penalty = 1.8;
     EXPECT_NO_THROW(config.validate());
@@ -73,6 +81,7 @@ TEST(GenerationConfigTest, valid_presence_penalty) {
 
 TEST(GenerationConfigTest, invalid_frequency_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.frequency_penalty = 3.0;
     EXPECT_THROW(config.validate(), ov::Exception);
@@ -82,6 +91,7 @@ TEST(GenerationConfigTest, invalid_frequency_penalty) {
 
 TEST(GenerationConfigTest, valid_frequency_penalty) {
     ov::genai::GenerationConfig config;
+    config.max_new_tokens = 20;
     config.do_sample = true;
     config.frequency_penalty = 1.8;
     EXPECT_NO_THROW(config.validate());
-Original file line number
+Diff line change
@@ Expand Up / @@ -34,3 +34,4 @@ CMakeUserPresets.json @@
     *.?env*
     *.pyc
     __pycache__
+    .py-build-cmake_cache