Skip to content

Commit

Permalink
Add sampling to vlm pipeline by Sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Oct 17, 2024
1 parent 330f122 commit 752358c
Show file tree
Hide file tree
Showing 8 changed files with 223 additions and 220 deletions.
10 changes: 8 additions & 2 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,8 @@ jobs:
python -m pip install -U "optimum<1.23" --no-dependencies
source ./ov/setupvars.sh
optimum-cli export openvino -m openbmb/MiniCPM-V-2_6 MiniCPM-V-2_6 --trust-remote-code
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg
mkdir cat_img
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat_img/cat.jpg
- name: Generate reference
shell: python
run: |
Expand Down Expand Up @@ -739,6 +740,11 @@ jobs:
source ./ov/setupvars.sh
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ lines.png
<<< $'What is unusual on this image?' | tee cpp.txt
- name: Run visual_language_chat C++ sample with dir - MiniCPM-V-2_6
run: >
source ./ov/setupvars.sh
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ cat_img
<<< $'What is unusual on this image?' | tee cpp.txt
- run: diff cpp.txt ref.txt
- name: Download and convert LLaVa 1.5 model and an image
run: |
Expand Down Expand Up @@ -767,7 +773,7 @@ jobs:
source ./ov/setupvars.sh
export PYTHONPATH=./build/:$PYTHONPATH
printf 'What is on the image?\nWhat is special on the image?\n' > ./input.txt
timeout 120s python ./samples/python/visual_language_chat/visual_language_chat.py ./MiniCPM-V-2_6/ cat.jpg < input.txt > ./pred.txt
timeout 120s python ./samples/python/visual_language_chat/visual_language_chat.py ./miniCPM-V-2_6/ cat_img/cat.jpg < input.txt > ./pred.txt
cpp-continuous-batching-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down
22 changes: 22 additions & 0 deletions samples/cpp/visual_language_chat/load_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@
#include "stb_image.h"
#include "load_image.hpp"

namespace fs = std::filesystem;

std::vector<ov::Tensor> utils::load_images(const std::filesystem::path& input_path) {
std::vector<ov::Tensor> images;
if (!input_path.empty() && fs::exists(input_path)) {
if (fs::is_directory(input_path)) {
for (const auto& dir_entry : fs::directory_iterator(input_path)) {
ov::Tensor image = utils::load_image(dir_entry.path());
images.push_back(std::move(image));
}
} else if (fs::is_regular_file(input_path)) {
ov::Tensor image = utils::load_image(input_path);
images.push_back(std::move(image));
}
}

if (images.empty())
throw std::runtime_error(std::string{"No images were found in path "} + input_path.string());

return images;
}

ov::Tensor utils::load_image(const std::filesystem::path& image_path) {
int x = 0, y = 0, channels_in_file = 0;
constexpr int desired_channels = 3;
Expand Down
1 change: 1 addition & 0 deletions samples/cpp/visual_language_chat/load_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@

namespace utils {
ov::Tensor load_image(const std::filesystem::path& image_path);
std::vector<ov::Tensor> load_images(const std::filesystem::path& image_path);
}
24 changes: 16 additions & 8 deletions samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "load_image.hpp"
#include <openvino/genai/visual_language/pipeline.hpp>
#include <filesystem>
#include <openvino/runtime/intel_gpu/properties.hpp>

bool print_subword(std::string&& subword) {
Expand All @@ -11,9 +12,14 @@ bool print_subword(std::string&& subword) {

int main(int argc, char* argv[]) try {
if (3 != argc) {
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE OR DIR_WITH_IMAGES>");
}
ov::Tensor image = utils::load_image(argv[2]);

std::vector<ov::Tensor> images = utils::load_images(argv[2]);

ov::genai::GenerationConfig generation_config;
generation_config.max_new_tokens = 30;

std::string device = "CPU"; // GPU can be used as well
ov::AnyMap enable_compile_cache;
if ("GPU" == device) {
Expand All @@ -26,16 +32,18 @@ int main(int argc, char* argv[]) try {

pipe.start_chat();
std::cout << "question:\n";

std::getline(std::cin, prompt);
pipe.generate(
prompt,
ov::genai::image(image),
ov::genai::streamer(print_subword)
);
pipe.generate(prompt,
ov::genai::images(images),
ov::genai::generation_config(generation_config),
ov::genai::streamer(print_subword));
std::cout << "\n----------\n"
"question:\n";
while (std::getline(std::cin, prompt)) {
pipe.generate(prompt, ov::genai::streamer(print_subword));
pipe.generate(prompt,
ov::genai::generation_config(generation_config),
ov::genai::streamer(print_subword));
std::cout << "\n----------\n"
"question:\n";
}
Expand Down
25 changes: 25 additions & 0 deletions src/cpp/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,22 @@ Sampler::GroupBeamSearcher::GroupBeamSearcher(SequenceGroup::Ptr sequence_group,
}
}


std::vector<int32_t> Sampler::GroupBeamSearcher::get_beam_idxs() {
std::vector<int32_t> next_beams;

for (Group& group : m_groups) {
if (!group.done) {
for (Beam& beam : group.ongoing) {
next_beams.push_back(beam.m_global_beam_idx);
}
}
}

return next_beams;
}


void Sampler::GroupBeamSearcher::select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output) {
assert(m_parameters.num_beams % m_parameters.num_beam_groups == 0 &&
"number of beams should be divisible by number of groups");
Expand Down Expand Up @@ -581,6 +597,15 @@ void register_new_token(const Token& sampled_token_id,
}
};

std::vector<int32_t> Sampler::get_beam_idxs(SequenceGroup::CPtr sequence_group) {
size_t request_id = sequence_group->get_request_id();
auto beam_searcher = m_beam_search_info.find(request_id);
if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
return std::vector<int32_t>(sequence_group->num_running_seqs(), 0);
}
return beam_searcher->second.get_beam_idxs();
}

std::list<uint64_t>
create_n_forked_sequences(SequenceGroup::Ptr sequence_group,
LogitProcessor& logit_processor,
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Sampler {
SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits, bool is_validation_mode_enabled = false);
void set_seed(size_t seed) { rng_engine.seed(seed); }
void clear_beam_search_info(uint64_t request_id);
std::vector<int32_t> get_beam_idxs(SequenceGroup::CPtr sequence_group);
};

class Sampler::GroupBeamSearcher {
Expand Down Expand Up @@ -109,5 +110,6 @@ class Sampler::GroupBeamSearcher {

void select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output);
void finalize(SamplerOutput& sampler_output);
std::vector<int32_t> get_beam_idxs();
};
}
Loading

0 comments on commit 752358c

Please sign in to comment.