Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hide VLM files and API #951

Merged
merged 14 commits into from
Oct 15, 2024
Merged
2 changes: 1 addition & 1 deletion samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

#include "load_image.hpp"
#include <openvino/genai/vlm_pipeline.hpp>
#include <openvino/genai/visual_language/vlm_pipeline.hpp>
#include <openvino/runtime/intel_gpu/properties.hpp>

bool print_subword(std::string&& subword) {
Expand Down
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -6,54 +6,13 @@
#include "openvino/genai/llm_pipeline.hpp"
#include "openvino/genai/streamer_base.hpp"
#include "openvino/genai/tokenizer.hpp"
#include "openvino/genai/vision_encoder.hpp"
#include "openvino/genai/vlm_config.hpp"
#include <filesystem>

namespace ov::genai {
/// @brief A string prompt and source image.
struct PromptImages {
/// @brief A prompt represented as std::string.
std::string prompt;
/// @brief An image represented as ov::Tensor.
std::vector<ov::Tensor> images;
};

/// @brief A Visual language modeling pipeline class used to generate a
/// response or run a chat given a prompt and an image.
class OPENVINO_GENAI_EXPORTS VLMPipeline {
public:
// A config to follow for LLM input construction.
VLMConfig m_vlm_config;
// A config to follow for text generation.
GenerationConfig m_generation_config;
// A tokenizer encoding a prompt.
Tokenizer m_tokenizer;
// An encoder to infer embeddings of an image.
VisionEncoder m_vision_encoder;
// A resampler model to resample image embeddings.
// [N, H*W, old_hidden_size] is the input shape.
// [N, query_num, hidden_size] is the output shape.
ov::InferRequest m_resampler;
// A model to compute token embeddings.
// Input shape: [N, conversation length].
// Output shape: [1, conversation length, hidden_size].
ov::InferRequest m_embedding;
// A language model used to generate a response.
// Input shapes: inputs_embeds[N, conversation length, hidden_size],
// position_ids[N, conversation length], beam_idx[N].
// Output shape: logits[N, conversation length, vocab_size].
ov::InferRequest m_language;
// Precomputed positional embeddings for the resampler.
// [70, 70, hidden_size]. 70 is the initial guess of the image
// height and width after dividing by patch_size.
ov::Tensor m_pos_embed_cache;
// True if chat mode is activated to save conversation
// history between generate() calls.
bool m_is_chat_conversation;
ChatHistory m_history;
std::string m_templated_chat_history;
size_t image_id = 0; // Used to insert <image_id>i</image_id> per image (not a slice).

/// @brief Construct a pipeline form a folder containing tokenizer
/// and model IRs.
/// @param model_dir A folder to read tokenizer and model IRs.
Expand Down Expand Up @@ -122,7 +81,7 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
/// role.
void start_chat(const std::string& system_message="");
/// @brief Deactivate chat mode.
void finish_chat() {m_is_chat_conversation = false;}
void finish_chat();
/// @brief Set a custom chat template. Can be used to deactivate
/// chat_template application for chat mode if called with
/// "{% for message in messages %}{{ message['content'] }}{% endfor %}"
Expand All @@ -139,9 +98,6 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
private:
class VLMPipelineImpl;
std::unique_ptr<VLMPipelineImpl> m_pimpl;

ov::Tensor get_inputs_embeds_minicpm(const std::string& prompt, const std::vector<ov::Tensor>& images);
ov::Tensor get_inputs_embeds_llava(const std::string& prompt, const std::vector<ov::Tensor>& images);
};

/*
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <nlohmann/json.hpp>

#include "openvino/genai/llm_pipeline.hpp"
#include "openvino/genai/processor_config.hpp"
#include "visual_language/processor_config.hpp"

namespace ov {
namespace genai {
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/src/vision_encoder.cpp
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include <openvino/genai/vision_encoder.hpp>
#include "clip.hpp"
#include "vision_encoder.hpp"
#include "visual_language/clip.hpp"
#include "utils.hpp"

using namespace ov::genai;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@

#pragma once

#include "openvino/genai/processor_config.hpp"
#include <openvino/openvino.hpp>
#include "vlm_model_type.hpp"
#include "visual_language/processor_config.hpp"
#include "visual_language/vlm_model_type.hpp"

namespace ov::genai {
/// @brief A pair describing image size.
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/genai/processor_config.hpp"
#include "processor_config.hpp"
#include "utils.hpp"
#include <fstream>

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/genai/vlm_config.hpp"
#include "vlm_config.hpp"
#include "utils.hpp"
#include <fstream>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#pragma once

#include "openvino/genai/visibility.hpp"
#include "visual_language/vlm_model_type.hpp"
#include <openvino/runtime/properties.hpp>
#include <filesystem>
#include "vlm_model_type.hpp"

namespace ov::genai {
/// @brief A Configuration class passed to VLMPipeline and used to
Expand Down
Loading
Loading