Align pipelines (#1031)

- Added template constructors for Pipelines to pass arbitrary number of properties - Dropped `_draft_model` WA and have only `draft_model` - Aligned all pipelines to accept `std::filesystem::path` as model path. - Added `Path` support to Python - Dropped Whisper ctor which accepts `Tokenizer` - Dropped `CPU` as default device. It conflicts with OpenVINO where default device is `AUTO` - Aligned argument names - models_path and properties
openvinotoolkit · Oct 22, 2024 · 7cfedba · 7cfedba
1 parent eb80e10
commit 7cfedba
Show file tree

Hide file tree

Showing 80 changed files with 722 additions and 727 deletions.
diff --git a/README.md b/README.md
@@ -86,8 +86,8 @@ Code below requires installation of C++ compatible package (see [here](https://d
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
     std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)) << '\n';
 }
 ```
@@ -129,8 +129,8 @@ Code below requires installation of C++ compatible package (see [here](https://d
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::VLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::VLMPipeline pipe(models_path, "CPU");
     ov::Tensor rgb = utils::load_image(argv[2]);
     std::cout << pipe.generate(
         prompt,
@@ -244,9 +244,10 @@ def main():
     parser.add_argument("wav_file_path")
     args = parser.parse_args()
 
-    raw_speech = read_wav(args.wav_file_path)
+    device = 'CPU'  # GPU can be used as well
+    pipe = openvino_genai.WhisperPipeline(args.model_dir, device)
 
-    pipe = openvino_genai.WhisperPipeline(args.model_dir)
+    raw_speech = read_wav(args.wav_file_path)
 
     def streamer(word: str) -> bool:
         print(word, end="")
@@ -275,14 +276,15 @@ NOTE: This sample is a simplified version of the full sample that is available [
 
 int main(int argc, char* argv[]) try {
 
-    std::string model_path = argv[1];
+    std::filesystem::path models_path = argv[1];
     std::string wav_file_path = argv[2];
+    std::string device = "CPU"; // GPU can be used as well
 
-    ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
+    ov::genai::WhisperPipeline pipeline(models_path, device);
 
-    ov::genai::WhisperPipeline pipeline{model_path};
+    ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
 
-    ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
+    ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json");
     config.max_new_tokens = 100;
     // 'task' and 'language' parameters are supported for multilingual models only
     config.language = "<|en|>";

diff --git a/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp b/samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
@@ -8,10 +8,10 @@ int main(int argc, char* argv[]) try {
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT 1>' ['<PROMPT 2>' ...]");
     }
     auto prompts = std::vector<std::string>(argv + 2, argv + argc);
-    std::string model_path = argv[1];
+    std::string models_path = argv[1];
 
     std::string device = "CPU";  // GPU can be used as well
-    ov::genai::LLMPipeline pipe(model_path, device);
+    ov::genai::LLMPipeline pipe(models_path, device);
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 20;

diff --git a/samples/cpp/benchmark_genai/benchmark_genai.cpp b/samples/cpp/benchmark_genai/benchmark_genai.cpp
@@ -31,15 +31,15 @@ int main(int argc, char* argv[]) try {
     }
 
     std::string prompt = result["prompt"].as<std::string>();
-    const std::string model_path = result["model"].as<std::string>();
+    const std::string models_path = result["model"].as<std::string>();
     std::string device = result["device"].as<std::string>();
     size_t num_warmup = result["num_warmup"].as<size_t>();
     size_t num_iter = result["num_iter"].as<size_t>();
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = result["max_new_tokens"].as<size_t>();
 
-    ov::genai::LLMPipeline pipe(model_path, device);
+    ov::genai::LLMPipeline pipe(models_path, device);
 
     for (size_t i = 0; i < num_warmup; i++)
         pipe.generate(prompt, config);

diff --git a/samples/cpp/chat_sample/chat_sample.cpp b/samples/cpp/chat_sample/chat_sample.cpp
@@ -8,10 +8,10 @@ int main(int argc, char* argv[]) try {
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR>");
     }
     std::string prompt;
-    std::string model_path = argv[1];
+    std::string models_path = argv[1];
 
     std::string device = "CPU";  // GPU, NPU can be used as well
-    ov::genai::LLMPipeline pipe(model_path, device);
+    ov::genai::LLMPipeline pipe(models_path, device);
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 100;

diff --git a/samples/cpp/greedy_causal_lm/greedy_causal_lm.cpp b/samples/cpp/greedy_causal_lm/greedy_causal_lm.cpp
@@ -7,11 +7,11 @@ int main(int argc, char* argv[]) try {
     if (3 > argc)
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<PROMPT>\"");
 
-    std::string model_path = argv[1];
+    std::string models_path = argv[1];
     std::string prompt = argv[2];
     std::string device = "CPU";  // GPU can be used as well
 
-    ov::genai::LLMPipeline pipe(model_path, device);
+    ov::genai::LLMPipeline pipe(models_path, device);
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 100;
     std::string result = pipe.generate(prompt, config);

diff --git a/samples/cpp/lora_greedy_causal_lm/lora_greedy_causal_lm.cpp b/samples/cpp/lora_greedy_causal_lm/lora_greedy_causal_lm.cpp
@@ -7,15 +7,15 @@ int main(int argc, char* argv[]) try {
     if (4 > argc)
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <ADAPTER_SAFETENSORS_FILE> \"<PROMPT>\"");
 
-    std::string model_path = argv[1];
+    std::string models_path = argv[1];
     std::string adapter_path = argv[2];
     std::string prompt = argv[3];
     std::string device = "CPU";  // GPU can be used as well
 
     using namespace ov::genai;
 
     Adapter adapter(adapter_path);
-    LLMPipeline pipe(model_path, device, adapters(adapter));    // register all required adapters here
+    LLMPipeline pipe(models_path, device, adapters(adapter));    // register all required adapters here
 
     std::cout << "Generate with LoRA adapter and alpha set to 0.75:" << std::endl;
     std::cout << pipe.generate(prompt, max_new_tokens(100), adapters(adapter, 0.75)) << std::endl;

diff --git a/samples/cpp/multinomial_causal_lm/multinomial_causal_lm.cpp b/samples/cpp/multinomial_causal_lm/multinomial_causal_lm.cpp
@@ -8,11 +8,11 @@ int main(int argc, char* argv[]) try {
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT>'");
     }
 
-    std::string model_path = argv[1];
+    std::string models_path = argv[1];
     std::string prompt = argv[2];
 
     std::string device = "CPU";  // GPU can be used as well
-    ov::genai::LLMPipeline pipe(model_path, device);
+    ov::genai::LLMPipeline pipe(models_path, device);
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 100;

diff --git a/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp b/samples/cpp/whisper_speech_recognition/whisper_speech_recognition.cpp
@@ -9,14 +9,15 @@ int main(int argc, char* argv[]) try {
         throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\"");
     }
 
-    std::string model_path = argv[1];
+    std::filesystem::path models_path = argv[1];
     std::string wav_file_path = argv[2];
+    std::string device = "CPU"; // GPU can be used as well
 
-    ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
+    ov::genai::WhisperPipeline pipeline(models_path, device);
 
-    ov::genai::WhisperPipeline pipeline{model_path};
+    ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
 
-    ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
+    ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json");
     config.max_new_tokens = 100;
     // 'task' and 'language' parameters are supported for multilingual models only
     config.language = "<|en|>";

diff --git a/samples/python/benchmark_genai/benchmark_genai.py b/samples/python/benchmark_genai/benchmark_genai.py
@@ -18,15 +18,15 @@ def main():
     # Perf metrics is stored in DecodedResults. 
     # In order to get DecodedResults instead of a string input should be a list.
     prompt = [args.prompt]
-    model_path = args.model
+    models_path = args.model
     device = args.device
     num_warmup = args.num_warmup
     num_iter = args.num_iter
 
     config = ov_genai.GenerationConfig()
     config.max_new_tokens = args.max_new_tokens
 
-    pipe = ov_genai.LLMPipeline(model_path, device)
+    pipe = ov_genai.LLMPipeline(models_path, device)
 
     for _ in range(num_warmup):
         pipe.generate(prompt, config)

diff --git a/samples/python/whisper_speech_recognition/whisper_speech_recognition.py b/samples/python/whisper_speech_recognition/whisper_speech_recognition.py
@@ -18,8 +18,6 @@ def main():
     parser.add_argument("wav_file_path")
     args = parser.parse_args()
 
-    raw_speech = read_wav(args.wav_file_path)
-
     config = openvino_genai.WhisperGenerationConfig(
         args.model_dir + "/generation_config.json"
     )
@@ -29,12 +27,14 @@ def main():
     config.task = "transcribe"
     config.return_timestamps = True
 
-    pipe = openvino_genai.WhisperPipeline(args.model_dir)
+    device = 'CPU'  # GPU can be used as well
+    pipe = openvino_genai.WhisperPipeline(args.model_dir, device)
 
     def streamer(word: str) -> bool:
         print(word, end="")
         return False
 
+    raw_speech = read_wav(args.wav_file_path)
     result = pipe.generate(raw_speech, config, streamer)
 
     print()

diff --git a/src/README.md b/src/README.md
@@ -55,14 +55,14 @@ If you want to try OpenVINO GenAI with different dependencies versions (**not**
 A simple example:
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe = ov_genai.LLMPipeline(models_path, "CPU")
 print(pipe.generate("The Sun is yellow because", max_new_tokens=100))
 ```
 
 Calling generate with custom generation config parameters, e.g. config for grouped beam search:
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe = ov_genai.LLMPipeline(models_path, "CPU")
 
 result = pipe.generate("The Sun is yellow because", max_new_tokens=100, num_beam_groups=3, num_beams=15, diversity_penalty=1.5)
 print(result)
@@ -76,7 +76,7 @@ output:
 A simple chat in Python:
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path)
+pipe = ov_genai.LLMPipeline(models_path)
 
 config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5}
 pipe.set_generation_config(config)
@@ -101,8 +101,8 @@ A simple example:
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
     std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(256));
 }
 ```
@@ -113,8 +113,8 @@ Using group beam search decoding:
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 256;
@@ -134,8 +134,8 @@ A simple chat in C++ using grouped beam search decoding:
 int main(int argc, char* argv[]) {
     std::string prompt;
 
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 100;
@@ -164,8 +164,8 @@ Streaming example with lambda function:
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
 
     auto streamer = [](std::string word) {
         std::cout << word << std::flush;
@@ -202,8 +202,8 @@ public:
 int main(int argc, char* argv[]) {
     CustomStreamer custom_streamer;
 
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
     std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(15), ov::genai::streamer(custom_streamer));
 }
 ```
@@ -226,7 +226,7 @@ class CustomStreamer(ov_genai.StreamerBase):
     def end(self):
         # Custom finalization logic.
 
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe = ov_genai.LLMPipeline(models_path, "CPU")
 custom_streamer = CustomStreamer()
 
 pipe.generate("The Sun is yellow because", max_new_tokens=15, streamer=custom_streamer)
@@ -245,7 +245,7 @@ int main(int argc, char* argv[]) {
     // fill other fields in scheduler_config with custom data if required
     scheduler_config.cache_size = 1;    // minimal possible KV cache size in GB, adjust as required
 
-    ov::genai::LLMPipeline pipe(model_path, "CPU", ov::genai::scheduler_config(scheduler_config));
+    ov::genai::LLMPipeline pipe(models_path, "CPU", ov::genai::scheduler_config(scheduler_config));
 }
 ```
 
@@ -268,7 +268,7 @@ Performance metrics are stored either in the `DecodedResults` or `EncodedResults
 
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe = ov_genai.LLMPipeline(models_path, "CPU")
 result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20)
 perf_metrics = result.perf_metrics
 
@@ -283,8 +283,8 @@ print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s')
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
     auto result = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
     auto perf_metrics = result.perf_metrics;
 
@@ -311,8 +311,8 @@ Several `perf_metrics` can be added to each other. In that case `raw_metrics` ar
 #include <iostream>
 
 int main(int argc, char* argv[]) {
-    std::string model_path = argv[1];
-    ov::genai::LLMPipeline pipe(model_path, "CPU");
+    std::string models_path = argv[1];
+    ov::genai::LLMPipeline pipe(models_path, "CPU");
     auto result_1 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
     auto result_2 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
     auto perf_metrics = result_1.perf_metrics + result_2.perf_metrics
@@ -327,7 +327,7 @@ int main(int argc, char* argv[]) {
 
 ```python
 import openvino_genai as ov_genai
-pipe = ov_genai.LLMPipeline(model_path, "CPU")
+pipe = ov_genai.LLMPipeline(models_path, "CPU")
 res_1 = pipe.generate(["The Sun is yellow because"], max_new_tokens=20)
 res_2 = pipe.generate(["Why Sky is blue because"], max_new_tokens=20)
 perf_metrics = res_1.perf_metrics + res_2.perf_metrics