openvinotoolkit · sammysun0711 · Jan 20, 2025 · Jan 21, 2025
diff --git a/src/cpp/include/openvino/genai/llm_pipeline.hpp b/src/cpp/include/openvino/genai/llm_pipeline.hpp
@@ -288,6 +288,8 @@ OPENVINO_GENAI_EXPORTS std::pair<std::string, Any> draft_model(
     const std::string& device = {},
     const ov::AnyMap& properties = {});
 
+OPENVINO_GENAI_EXPORTS void clear_core_device(const std::string &device_name);
+
 template <typename... Properties,
           typename std::enable_if<ov::util::StringAny<Properties...>::value, bool>::type = true>
 inline std::pair<std::string, Any> draft_model(

diff --git a/src/cpp/src/llm_pipeline.cpp b/src/cpp/src/llm_pipeline.cpp
@@ -83,6 +83,9 @@ std::pair<std::string, Any> draft_model(
     return { utils::DRAFT_MODEL_ARG_NAME, Any::make<ModelDesc>(model, tokenizer, device, plugin_config, scheduler_config, generation_config) };
 }
 
+void clear_core_device(const std::string &device_name){
+    utils::singleton_core().unload_plugin(device_name);
+};
 // Public LLMPipeline
 
 ov::genai::LLMPipeline::LLMPipeline(

diff --git a/src/cpp/src/llm_pipeline_stateful.cpp b/src/cpp/src/llm_pipeline_stateful.cpp
@@ -41,7 +41,6 @@ StatefulLLMPipeline::StatefulLLMPipeline(
     utils::apply_slice_before_matmul_transformation(model);
     m_kv_cache_seq_length_axis = ov::genai::utils::get_seq_len_axis(model);
 
-    ov::CompiledModel compiled_model;
     if (auto filtered_properties = extract_adapters_from_properties(properties, &m_generation_config.adapters)) {
         m_generation_config.adapters->set_tensor_name_prefix("base_model.model.model.");
         m_adapter_controller = AdapterController(model, *m_generation_config.adapters, device);   // TODO: Make the prefix name configurable
@@ -399,4 +398,8 @@ void StatefulLLMPipeline::finish_chat() {
     }
 }
 
+StatefulLLMPipeline::~StatefulLLMPipeline(){
+    compiled_model.release_memory();
+};
+
 } // namespace ov::genai
diff --git a/src/cpp/src/llm_pipeline_stateful.hpp b/src/cpp/src/llm_pipeline_stateful.hpp
@@ -10,6 +10,7 @@ namespace ov::genai {
 
 class StatefulLLMPipeline final : public LLMPipelineImplBase {
     ov::InferRequest m_model_runner;
+    ov::CompiledModel compiled_model;
     Sampler m_sampler;
 
     // Chat scenario specific parameters
@@ -72,6 +73,8 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {
     void start_chat(const std::string& system_message) override;
 
     void finish_chat() override;
+
+    ~StatefulLLMPipeline() override;
 };
 
 } // namespace ov::genai