Skip to content

Commit

Permalink
Add sampling to vlm pipeline by Sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Oct 11, 2024
1 parent 82d6697 commit b28f62a
Show file tree
Hide file tree
Showing 6 changed files with 237 additions and 229 deletions.
54 changes: 47 additions & 7 deletions samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@
// SPDX-License-Identifier: Apache-2.0

#include "load_image.hpp"
#include <filesystem>
#include <openvino/genai/vlm_pipeline.hpp>
#include <openvino/runtime/intel_gpu/properties.hpp>

namespace fs = std::filesystem;

bool print_subword(std::string&& subword) {
return !(std::cout << subword << std::flush);
}
Expand All @@ -13,7 +16,40 @@ int main(int argc, char* argv[]) try {
if (3 != argc) {
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
}
ov::Tensor image = utils::load_image(argv[2]);

// multinomial or beam_search can be used as well
ov::genai::GenerationConfig generation_config = ov::genai::greedy();
// ov::genai::GenerationConfig generation_config = ov::genai::multinomial();
// ov::genai::GenerationConfig generation_config = ov::genai::beam_search();

ov::AnyMap properies;
properies.insert(ov::genai::generation_config(generation_config));

// streamer could be used with greedy and multinomial
// if num_return_sequences > 1 in case of multinomial, the streamer will use the output from the first sequence
if (generation_config.is_greedy_decoding() or generation_config.is_multinomial()) {
properies.insert(ov::genai::streamer(print_subword));
}

std::vector<ov::Tensor> images;
std::string input_path = argv[2];
if (!input_path.empty() && fs::exists(input_path)) {
if (fs::is_directory(input_path)) {
for (const auto& dir_entry : fs::directory_iterator(input_path)) {
ov::Tensor image = utils::load_image(dir_entry.path());
images.push_back(std::move(image));
}
} else if (fs::is_regular_file(input_path)) {
ov::Tensor image = utils::load_image(input_path);
images.push_back(std::move(image));
}
}

if (images.empty())
throw std::runtime_error("No one image found by path " + input_path);
else
properies.insert(images.size() == 1 ? ov::genai::image(images.at(0)) : ov::genai::images(images));

std::string device = "CPU"; // GPU can be used as well
ov::AnyMap enable_compile_cache;
if ("GPU" == device) {
Expand All @@ -26,16 +62,20 @@ int main(int argc, char* argv[]) try {

pipe.start_chat();
std::cout << "question:\n";

std::getline(std::cin, prompt);
pipe.generate(
prompt,
ov::genai::image(image),
ov::genai::streamer(print_subword)
);
auto resuls = pipe.generate(prompt, properies);
if (generation_config.is_beam_search()) {
std::cout << resuls.texts.at(0) << std::endl;
}

std::cout << "\n----------\n"
"question:\n";
while (std::getline(std::cin, prompt)) {
pipe.generate(prompt, ov::genai::streamer(print_subword));
resuls = pipe.generate(prompt, properies);
if (generation_config.is_beam_search()) {
std::cout << resuls.texts.at(0) << std::endl;
}
std::cout << "\n----------\n"
"question:\n";
}
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/include/openvino/genai/vlm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,8 +140,8 @@ class OPENVINO_GENAI_EXPORTS VLMPipeline {
class VLMPipelineImpl;
std::unique_ptr<VLMPipelineImpl> m_pimpl;

ov::Tensor get_inputs_embeds_minicpm(const std::string& prompt, const std::vector<ov::Tensor>& images);
ov::Tensor get_inputs_embeds_llava(const std::string& prompt, const std::vector<ov::Tensor>& images);
std::pair<ov::Tensor, ov::Tensor> get_inputs_embeds_minicpm(const std::string& prompt, const std::vector<ov::Tensor>& images);
std::pair<ov::Tensor, ov::Tensor> get_inputs_embeds_llava(const std::string& prompt, const std::vector<ov::Tensor>& images);
};

/*
Expand Down
24 changes: 24 additions & 0 deletions src/cpp/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,22 @@ Sampler::GroupBeamSearcher::GroupBeamSearcher(SequenceGroup::Ptr sequence_group,
}
}


std::vector<int32_t> Sampler::GroupBeamSearcher::get_beam_idxs() {
std::vector<int32_t> next_beams;

for (Group& group : m_groups) {
if (!group.done) {
for (Beam& beam : group.ongoing) {
next_beams.push_back(beam.m_global_beam_idx);
}
}
}

return next_beams;
}


void Sampler::GroupBeamSearcher::select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output) {
assert(m_parameters.num_beams % m_parameters.num_beam_groups == 0 &&
"number of beams should be divisible by number of groups");
Expand Down Expand Up @@ -581,6 +597,14 @@ void register_new_token(const Token& sampled_token_id,
}
};

std::vector<int32_t> Sampler::get_beam_idxs(uint64_t request_id) {
auto beam_searcher = m_beam_search_info.find(request_id);
if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
return { 0 };
}
return beam_searcher->second.get_beam_idxs();
}

std::list<uint64_t>
create_n_forked_sequences(SequenceGroup::Ptr sequence_group,
LogitProcessor& logit_processor,
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Sampler {
SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits, bool is_validation_mode_enabled = false);
void set_seed(size_t seed) { rng_engine.seed(seed); }
void clear_beam_search_info(uint64_t request_id);
std::vector<int32_t> get_beam_idxs(uint64_t request_id);
};

class Sampler::GroupBeamSearcher {
Expand Down Expand Up @@ -109,5 +110,6 @@ class Sampler::GroupBeamSearcher {

void select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output);
void finalize(SamplerOutput& sampler_output);
std::vector<int32_t> get_beam_idxs();
};
}
Loading

0 comments on commit b28f62a

Please sign in to comment.