Skip to content

Commit

Permalink
Use split_core_complile_config for CB
Browse files Browse the repository at this point in the history
  • Loading branch information
ilya-lavrenov authored Oct 12, 2024
1 parent 743e018 commit cfccefa
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions src/cpp/src/continuous_batching_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "text_callback_streamer.hpp"
#include "continuous_batching_impl.hpp"
#include "paged_attention_transformations.hpp"
#include "utils.hpp"

namespace ov::genai {
template<class... Ts> struct overloaded : Ts... {using Ts::operator()...;};
Expand All @@ -18,15 +19,18 @@ ContinuousBatchingPipeline::ContinuousBatchingImpl::ContinuousBatchingImpl(
m_tokenizer = tokenizer;
ov::Core core;

auto [core_plugin_config, compile_plugin_config] = ov::genai::utils::split_core_complile_config(plugin_config);
core.set_property(core_plugin_config);

// The model can be compiled for GPU as well
std::shared_ptr<ov::Model> model = core.read_model(models_path + "/openvino_model.xml");

DeviceConfig device_config(core, scheduler_config, device, plugin_config);
DeviceConfig device_config(core, scheduler_config, device, compile_plugin_config);

bool is_need_per_layer_cache_control = scheduler_config.use_cache_eviction;
apply_paged_attention_transformations(model, device_config, is_need_per_layer_cache_control);

ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), plugin_config).create_infer_request();
ov::InferRequest infer_request = core.compile_model(model, device_config.get_device(), compile_plugin_config).create_infer_request();

// setup KV caches
m_cache_manager = std::make_shared<CacheManager>(device_config, core);
Expand Down

0 comments on commit cfccefa

Please sign in to comment.