Add sampling to vlm pipeline by Sampler

openvinotoolkit · Oct 15, 2024 · b3db775 · b3db775
1 parent 1c52294
commit b3db775
Show file tree

Hide file tree

Showing 6 changed files with 979 additions and 165 deletions.
diff --git a/samples/cpp/visual_language_chat/visual_language_chat.cpp b/samples/cpp/visual_language_chat/visual_language_chat.cpp
@@ -3,8 +3,11 @@
 
 #include "load_image.hpp"
 #include <openvino/genai/visual_language/pipeline.hpp>
+#include <filesystem>
 #include <openvino/runtime/intel_gpu/properties.hpp>
 
+namespace fs = std::filesystem;
+
 bool print_subword(std::string&& subword) {
     return !(std::cout << subword << std::flush);
 }
@@ -13,7 +16,40 @@ int main(int argc, char* argv[]) try {
     if (3 != argc) {
         throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
     }
-    ov::Tensor image = utils::load_image(argv[2]);
+
+    // multinomial or beam_search can be used as well
+    ov::genai::GenerationConfig generation_config = ov::genai::greedy();
+    // ov::genai::GenerationConfig generation_config = ov::genai::multinomial();
+    // ov::genai::GenerationConfig generation_config = ov::genai::beam_search();
+
+    ov::AnyMap properies;
+    properies.insert(ov::genai::generation_config(generation_config));
+
+    // streamer could be used with greedy and multinomial
+     // if num_return_sequences > 1 in case of multinomial, the streamer will use the output from the first sequence
+    if (generation_config.is_greedy_decoding() or generation_config.is_multinomial()) {
+        properies.insert(ov::genai::streamer(print_subword));
+    }
+
+    std::vector<ov::Tensor> images;
+    std::string input_path = argv[2];
+    if (!input_path.empty() && fs::exists(input_path)) {
+        if (fs::is_directory(input_path)) {
+            for (const auto& dir_entry : fs::directory_iterator(input_path)) {
+                ov::Tensor image = utils::load_image(dir_entry.path());
+                images.push_back(std::move(image));
+            }
+        } else if (fs::is_regular_file(input_path)) {
+            ov::Tensor image = utils::load_image(input_path);
+            images.push_back(std::move(image));
+        }
+    }
+
+    if (images.empty())
+        throw std::runtime_error("No one image found by path " + input_path);
+    else
+        properies.insert(images.size() == 1 ? ov::genai::image(images.at(0)) : ov::genai::images(images));
+
     std::string device = "CPU";  // GPU can be used as well
     ov::AnyMap enable_compile_cache;
     if ("GPU" == device) {
@@ -26,16 +62,21 @@ int main(int argc, char* argv[]) try {
 
     pipe.start_chat();
     std::cout << "question:\n";
+
     std::getline(std::cin, prompt);
-    pipe.generate(
-        prompt,
-        ov::genai::image(image),
-        ov::genai::streamer(print_subword)
-    );
+    auto resuls = pipe.generate(prompt, properies);
+    if (generation_config.is_beam_search()) {
+        std::cout << resuls.texts.at(0) << std::endl;
+    }
+    properies.erase(images.size() == 1 ? "image" : "images");
+
     std::cout << "\n----------\n"
         "question:\n";
     while (std::getline(std::cin, prompt)) {
-        pipe.generate(prompt, ov::genai::streamer(print_subword));
+        resuls = pipe.generate(prompt, properies);
+        if (generation_config.is_beam_search()) {
+            std::cout << resuls.texts.at(0) << std::endl;
+        }
         std::cout << "\n----------\n"
             "question:\n";
     }

diff --git a/src/cpp/src/sampler.cpp b/src/cpp/src/sampler.cpp
@@ -230,6 +230,22 @@ Sampler::GroupBeamSearcher::GroupBeamSearcher(SequenceGroup::Ptr sequence_group,
     }
 }
 
+
+std::vector<int32_t> Sampler::GroupBeamSearcher::get_beam_idxs() {
+    std::vector<int32_t> next_beams;
+
+    for (Group& group : m_groups) {
+        if (!group.done) {
+            for (Beam& beam : group.ongoing) {
+                next_beams.push_back(beam.m_global_beam_idx);
+            }
+        }
+    }
+
+    return next_beams;
+}
+
+
 void Sampler::GroupBeamSearcher::select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output) {
     assert(m_parameters.num_beams % m_parameters.num_beam_groups == 0 &&
         "number of beams should be divisible by number of groups");
@@ -581,6 +597,14 @@ void register_new_token(const Token& sampled_token_id,
     }
 };
 
+std::vector<int32_t> Sampler::get_beam_idxs(uint64_t request_id) {
+    auto beam_searcher = m_beam_search_info.find(request_id);
+    if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
+        return { 0 };
+    }
+    return beam_searcher->second.get_beam_idxs();
+}
+
 std::list<uint64_t>
 create_n_forked_sequences(SequenceGroup::Ptr sequence_group,
                           LogitProcessor& logit_processor,

diff --git a/src/cpp/src/sampler.hpp b/src/cpp/src/sampler.hpp
@@ -65,6 +65,7 @@ class Sampler {
     SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits, bool is_validation_mode_enabled = false);
     void set_seed(size_t seed) { rng_engine.seed(seed); }
     void clear_beam_search_info(uint64_t request_id);
+    std::vector<int32_t> get_beam_idxs(uint64_t request_id);
 };
 
 class Sampler::GroupBeamSearcher {
@@ -109,5 +110,6 @@ class Sampler::GroupBeamSearcher {
 
     void select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output);
     void finalize(SamplerOutput& sampler_output);
+    std::vector<int32_t> get_beam_idxs();
 };
 }