Skip to content

Commit

Permalink
parametrizable tokenizer plugin config
Browse files Browse the repository at this point in the history
  • Loading branch information
dkalinowski committed Jul 22, 2024
1 parent 97ea1aa commit 12fda35
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class OPENVINO_GENAI_EXPORTS ContinuousBatchingPipeline {
ContinuousBatchingPipeline(const std::string& models_path,
const SchedulerConfig& scheduler_config,
const std::string& device = "CPU",
const ov::AnyMap& plugin_config = {});
const ov::AnyMap& llm_plugin_config = {},
const ov::AnyMap& tokenizer_plugin_config = {});

std::shared_ptr<ov::genai::Tokenizer> get_tokenizer();

Expand Down
2 changes: 1 addition & 1 deletion src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
* @brief ov::Tokenizer constructor.
* @param tokenizer_path openvino_tokenizer.xml and openvino_detokenizer.xml should be located in the tokenizer_path
*/
Tokenizer(const std::string& tokenizer_path);
Tokenizer(const std::string& tokenizer_path, const ov::AnyMap& plugin_config = {});

/**
* @brief encode a single prompt
Expand Down
11 changes: 6 additions & 5 deletions src/cpp/src/continuous_batching_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ class ContinuousBatchingPipeline::Impl {
}

public:
Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string device, const ov::AnyMap& plugin_config) {
Impl(const std::string& models_path, const SchedulerConfig& scheduler_config, const std::string device, const ov::AnyMap& llm_plugin_config, const ov::AnyMap& tokenizer_plugin_config) {
ov::Core core;
m_tokenizer = std::make_shared<ov::genai::Tokenizer>(models_path);
m_tokenizer = std::make_shared<ov::genai::Tokenizer>(models_path, tokenizer_plugin_config);

// The model can be compiled for GPU as well
std::shared_ptr<ov::Model> model = core.read_model(models_path + "/openvino_model.xml");
Expand All @@ -81,7 +81,7 @@ class ContinuousBatchingPipeline::Impl {

apply_paged_attention_transformations(model, device_config);

ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), plugin_config).create_infer_request();
ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), llm_plugin_config).create_infer_request();

// setup KV caches
m_cache_manager = std::make_shared<CacheManager>(device_config);
Expand Down Expand Up @@ -279,8 +279,9 @@ class ContinuousBatchingPipeline::Impl {
ContinuousBatchingPipeline::ContinuousBatchingPipeline( const std::string& models_path,
const SchedulerConfig& scheduler_config,
const std::string& device,
const ov::AnyMap& plugin_config ) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, plugin_config);
const ov::AnyMap& llm_plugin_config,
const ov::AnyMap& tokenizer_plugin_config) {
m_impl = std::make_shared<Impl>(models_path, scheduler_config, device, llm_plugin_config, tokenizer_plugin_config);
}

std::shared_ptr<ov::genai::Tokenizer> ContinuousBatchingPipeline::get_tokenizer() {
Expand Down
8 changes: 4 additions & 4 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ class Tokenizer::TokenizerImpl {

TokenizerImpl() = default;

TokenizerImpl(std::filesystem::path tokenizer_path)
TokenizerImpl(std::filesystem::path tokenizer_path, const ov::AnyMap& plugin_config)
: m_chat_template{chat_template_from_tokenizer_json_if_exists(tokenizer_path)} {
ov::Core core;

Expand All @@ -96,7 +96,7 @@ class Tokenizer::TokenizerImpl {
// Try to read tokenizer_config if some token ids or token str are not defined.
read_tokenizer_config_if_necessary(tokenizer_path);

std::map<std::string, ov::Any> plugin_config{{"PERFORMANCE_HINT", "THROUGHPUT"}};
//std::map<std::string, ov::Any> plugin_config{{"PERFORMANCE_HINT", "THROUGHPUT"}};
auto device = "CPU"; // currently openvino_tokenizer supports only CPU

m_tokenizer = core.compile_model(tokenizer_path / "openvino_tokenizer.xml",
Expand Down Expand Up @@ -431,9 +431,9 @@ class Tokenizer::TokenizerImpl {

};

Tokenizer::Tokenizer(const std::string& tokenizer_path) {
Tokenizer::Tokenizer(const std::string& tokenizer_path, const ov::AnyMap& plugin_config) {
ScopedVar env_manager(tokenizers_relative_to_genai().string());
m_pimpl = std::make_shared<TokenizerImpl>(tokenizer_path);
m_pimpl = std::make_shared<TokenizerImpl>(tokenizer_path, plugin_config);
}

TokenizedInputs Tokenizer::encode(const std::string prompt) {
Expand Down

0 comments on commit 12fda35

Please sign in to comment.