Skip to content

Commit

Permalink
Align pipelines (#1031)
Browse files Browse the repository at this point in the history
- Added template constructors for Pipelines to pass arbitrary number of
properties
- Dropped `_draft_model` WA and have only `draft_model`
- Aligned all pipelines to accept `std::filesystem::path` as model path.
   - Added `Path` support to Python
- Dropped Whisper ctor which accepts `Tokenizer`
- Dropped `CPU` as default device. It conflicts with OpenVINO where
default device is `AUTO`
- Aligned argument names - models_path and properties
  • Loading branch information
ilya-lavrenov authored Oct 22, 2024
1 parent eb80e10 commit 7cfedba
Show file tree
Hide file tree
Showing 80 changed files with 722 additions and 727 deletions.
22 changes: 12 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ Code below requires installation of C++ compatible package (see [here](https://d
#include <iostream>

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(100)) << '\n';
}
```
Expand Down Expand Up @@ -129,8 +129,8 @@ Code below requires installation of C++ compatible package (see [here](https://d
#include <iostream>

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::VLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::VLMPipeline pipe(models_path, "CPU");
ov::Tensor rgb = utils::load_image(argv[2]);
std::cout << pipe.generate(
prompt,
Expand Down Expand Up @@ -244,9 +244,10 @@ def main():
parser.add_argument("wav_file_path")
args = parser.parse_args()

raw_speech = read_wav(args.wav_file_path)
device = 'CPU' # GPU can be used as well
pipe = openvino_genai.WhisperPipeline(args.model_dir, device)

pipe = openvino_genai.WhisperPipeline(args.model_dir)
raw_speech = read_wav(args.wav_file_path)

def streamer(word: str) -> bool:
print(word, end="")
Expand Down Expand Up @@ -275,14 +276,15 @@ NOTE: This sample is a simplified version of the full sample that is available [

int main(int argc, char* argv[]) try {

std::string model_path = argv[1];
std::filesystem::path models_path = argv[1];
std::string wav_file_path = argv[2];
std::string device = "CPU"; // GPU can be used as well

ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
ov::genai::WhisperPipeline pipeline(models_path, device);

ov::genai::WhisperPipeline pipeline{model_path};
ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);

ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json");
config.max_new_tokens = 100;
// 'task' and 'language' parameters are supported for multilingual models only
config.language = "<|en|>";
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/beam_search_causal_lm/beam_search_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ int main(int argc, char* argv[]) try {
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT 1>' ['<PROMPT 2>' ...]");
}
auto prompts = std::vector<std::string>(argv + 2, argv + argc);
std::string model_path = argv[1];
std::string models_path = argv[1];

std::string device = "CPU"; // GPU can be used as well
ov::genai::LLMPipeline pipe(model_path, device);
ov::genai::LLMPipeline pipe(models_path, device);

ov::genai::GenerationConfig config;
config.max_new_tokens = 20;
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/benchmark_genai/benchmark_genai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ int main(int argc, char* argv[]) try {
}

std::string prompt = result["prompt"].as<std::string>();
const std::string model_path = result["model"].as<std::string>();
const std::string models_path = result["model"].as<std::string>();
std::string device = result["device"].as<std::string>();
size_t num_warmup = result["num_warmup"].as<size_t>();
size_t num_iter = result["num_iter"].as<size_t>();

ov::genai::GenerationConfig config;
config.max_new_tokens = result["max_new_tokens"].as<size_t>();

ov::genai::LLMPipeline pipe(model_path, device);
ov::genai::LLMPipeline pipe(models_path, device);

for (size_t i = 0; i < num_warmup; i++)
pipe.generate(prompt, config);
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/chat_sample/chat_sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@ int main(int argc, char* argv[]) try {
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR>");
}
std::string prompt;
std::string model_path = argv[1];
std::string models_path = argv[1];

std::string device = "CPU"; // GPU, NPU can be used as well
ov::genai::LLMPipeline pipe(model_path, device);
ov::genai::LLMPipeline pipe(models_path, device);

ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/greedy_causal_lm/greedy_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ int main(int argc, char* argv[]) try {
if (3 > argc)
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<PROMPT>\"");

std::string model_path = argv[1];
std::string models_path = argv[1];
std::string prompt = argv[2];
std::string device = "CPU"; // GPU can be used as well

ov::genai::LLMPipeline pipe(model_path, device);
ov::genai::LLMPipeline pipe(models_path, device);
ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
std::string result = pipe.generate(prompt, config);
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/lora_greedy_causal_lm/lora_greedy_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@ int main(int argc, char* argv[]) try {
if (4 > argc)
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> <ADAPTER_SAFETENSORS_FILE> \"<PROMPT>\"");

std::string model_path = argv[1];
std::string models_path = argv[1];
std::string adapter_path = argv[2];
std::string prompt = argv[3];
std::string device = "CPU"; // GPU can be used as well

using namespace ov::genai;

Adapter adapter(adapter_path);
LLMPipeline pipe(model_path, device, adapters(adapter)); // register all required adapters here
LLMPipeline pipe(models_path, device, adapters(adapter)); // register all required adapters here

std::cout << "Generate with LoRA adapter and alpha set to 0.75:" << std::endl;
std::cout << pipe.generate(prompt, max_new_tokens(100), adapters(adapter, 0.75)) << std::endl;
Expand Down
4 changes: 2 additions & 2 deletions samples/cpp/multinomial_causal_lm/multinomial_causal_lm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ int main(int argc, char* argv[]) try {
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> '<PROMPT>'");
}

std::string model_path = argv[1];
std::string models_path = argv[1];
std::string prompt = argv[2];

std::string device = "CPU"; // GPU can be used as well
ov::genai::LLMPipeline pipe(model_path, device);
ov::genai::LLMPipeline pipe(models_path, device);

ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ int main(int argc, char* argv[]) try {
throw std::runtime_error(std::string{"Usage: "} + argv[0] + " <MODEL_DIR> \"<WAV_FILE_PATH>\"");
}

std::string model_path = argv[1];
std::filesystem::path models_path = argv[1];
std::string wav_file_path = argv[2];
std::string device = "CPU"; // GPU can be used as well

ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
ov::genai::WhisperPipeline pipeline(models_path, device);

ov::genai::WhisperPipeline pipeline{model_path};
ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);

ov::genai::WhisperGenerationConfig config{model_path + "/generation_config.json"};
ov::genai::WhisperGenerationConfig config(models_path / "generation_config.json");
config.max_new_tokens = 100;
// 'task' and 'language' parameters are supported for multilingual models only
config.language = "<|en|>";
Expand Down
4 changes: 2 additions & 2 deletions samples/python/benchmark_genai/benchmark_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ def main():
# Perf metrics is stored in DecodedResults.
# In order to get DecodedResults instead of a string input should be a list.
prompt = [args.prompt]
model_path = args.model
models_path = args.model
device = args.device
num_warmup = args.num_warmup
num_iter = args.num_iter

config = ov_genai.GenerationConfig()
config.max_new_tokens = args.max_new_tokens

pipe = ov_genai.LLMPipeline(model_path, device)
pipe = ov_genai.LLMPipeline(models_path, device)

for _ in range(num_warmup):
pipe.generate(prompt, config)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ def main():
parser.add_argument("wav_file_path")
args = parser.parse_args()

raw_speech = read_wav(args.wav_file_path)

config = openvino_genai.WhisperGenerationConfig(
args.model_dir + "/generation_config.json"
)
Expand All @@ -29,12 +27,14 @@ def main():
config.task = "transcribe"
config.return_timestamps = True

pipe = openvino_genai.WhisperPipeline(args.model_dir)
device = 'CPU' # GPU can be used as well
pipe = openvino_genai.WhisperPipeline(args.model_dir, device)

def streamer(word: str) -> bool:
print(word, end="")
return False

raw_speech = read_wav(args.wav_file_path)
result = pipe.generate(raw_speech, config, streamer)

print()
Expand Down
42 changes: 21 additions & 21 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,14 @@ If you want to try OpenVINO GenAI with different dependencies versions (**not**
A simple example:
```python
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe = ov_genai.LLMPipeline(models_path, "CPU")
print(pipe.generate("The Sun is yellow because", max_new_tokens=100))
```

Calling generate with custom generation config parameters, e.g. config for grouped beam search:
```python
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe = ov_genai.LLMPipeline(models_path, "CPU")
result = pipe.generate("The Sun is yellow because", max_new_tokens=100, num_beam_groups=3, num_beams=15, diversity_penalty=1.5)
print(result)
Expand All @@ -76,7 +76,7 @@ output:
A simple chat in Python:
```python
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path)
pipe = ov_genai.LLMPipeline(models_path)
config = {'max_new_tokens': 100, 'num_beam_groups': 3, 'num_beams': 15, 'diversity_penalty': 1.5}
pipe.set_generation_config(config)
Expand All @@ -101,8 +101,8 @@ A simple example:
#include <iostream>

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(256));
}
```
Expand All @@ -113,8 +113,8 @@ Using group beam search decoding:
#include <iostream>
int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
ov::genai::GenerationConfig config;
config.max_new_tokens = 256;
Expand All @@ -134,8 +134,8 @@ A simple chat in C++ using grouped beam search decoding:
int main(int argc, char* argv[]) {
std::string prompt;

std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");

ov::genai::GenerationConfig config;
config.max_new_tokens = 100;
Expand Down Expand Up @@ -164,8 +164,8 @@ Streaming example with lambda function:
#include <iostream>
int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
auto streamer = [](std::string word) {
std::cout << word << std::flush;
Expand Down Expand Up @@ -202,8 +202,8 @@ public:
int main(int argc, char* argv[]) {
CustomStreamer custom_streamer;

std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
std::cout << pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(15), ov::genai::streamer(custom_streamer));
}
```
Expand All @@ -226,7 +226,7 @@ class CustomStreamer(ov_genai.StreamerBase):
def end(self):
# Custom finalization logic.
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe = ov_genai.LLMPipeline(models_path, "CPU")
custom_streamer = CustomStreamer()
pipe.generate("The Sun is yellow because", max_new_tokens=15, streamer=custom_streamer)
Expand All @@ -245,7 +245,7 @@ int main(int argc, char* argv[]) {
// fill other fields in scheduler_config with custom data if required
scheduler_config.cache_size = 1; // minimal possible KV cache size in GB, adjust as required

ov::genai::LLMPipeline pipe(model_path, "CPU", ov::genai::scheduler_config(scheduler_config));
ov::genai::LLMPipeline pipe(models_path, "CPU", ov::genai::scheduler_config(scheduler_config));
}
```
Expand All @@ -268,7 +268,7 @@ Performance metrics are stored either in the `DecodedResults` or `EncodedResults
```python
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe = ov_genai.LLMPipeline(models_path, "CPU")
result = pipe.generate(["The Sun is yellow because"], max_new_tokens=20)
perf_metrics = result.perf_metrics
Expand All @@ -283,8 +283,8 @@ print(f'Throughput: {perf_metrics.get_throughput().mean:.2f} tokens/s')
#include <iostream>

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
auto result = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
auto perf_metrics = result.perf_metrics;

Expand All @@ -311,8 +311,8 @@ Several `perf_metrics` can be added to each other. In that case `raw_metrics` ar
#include <iostream>

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(models_path, "CPU");
auto result_1 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
auto result_2 = pipe.generate("The Sun is yellow because", ov::genai::max_new_tokens(20));
auto perf_metrics = result_1.perf_metrics + result_2.perf_metrics
Expand All @@ -327,7 +327,7 @@ int main(int argc, char* argv[]) {
```python
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path, "CPU")
pipe = ov_genai.LLMPipeline(models_path, "CPU")
res_1 = pipe.generate(["The Sun is yellow because"], max_new_tokens=20)
res_2 = pipe.generate(["Why Sky is blue because"], max_new_tokens=20)
perf_metrics = res_1.perf_metrics + res_2.perf_metrics
Expand Down
Loading

0 comments on commit 7cfedba

Please sign in to comment.