Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sampling to vlm pipeline by Sampler #950

Merged
merged 2 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -709,7 +709,8 @@ jobs:
python -m pip install -U "optimum<1.23" --no-dependencies
source ./ov/setupvars.sh
optimum-cli export openvino -m openbmb/MiniCPM-V-2_6 MiniCPM-V-2_6 --trust-remote-code
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg
mkdir cat_img
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat_img/cat.jpg
- name: Generate reference
shell: python
run: |
Expand Down Expand Up @@ -740,6 +741,11 @@ jobs:
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ lines.png
<<< $'What is unusual on this image?' | tee cpp.txt
- run: diff cpp.txt ref.txt
- name: Run visual_language_chat C++ sample with dir - MiniCPM-V-2_6
run: >
source ./ov/setupvars.sh
&& timeout 120s ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ cat_img
<<< $'What is unusual on this image?'
- name: Download and convert LLaVa 1.5 model and an image
run: |
source ./ov/setupvars.sh
Expand Down Expand Up @@ -767,7 +773,7 @@ jobs:
source ./ov/setupvars.sh
export PYTHONPATH=./build/:$PYTHONPATH
printf 'What is on the image?\nWhat is special on the image?\n' > ./input.txt
timeout 120s python ./samples/python/visual_language_chat/visual_language_chat.py ./MiniCPM-V-2_6/ cat.jpg < input.txt > ./pred.txt
timeout 120s python ./samples/python/visual_language_chat/visual_language_chat.py ./MiniCPM-V-2_6/ cat_img/cat.jpg < input.txt > ./pred.txt

cpp-continuous-batching-ubuntu:
runs-on: ubuntu-20.04-8-cores
Expand Down
22 changes: 22 additions & 0 deletions samples/cpp/visual_language_chat/load_image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,28 @@
#include "stb_image.h"
#include "load_image.hpp"

namespace fs = std::filesystem;

std::vector<ov::Tensor> utils::load_images(const std::filesystem::path& input_path) {
std::vector<ov::Tensor> images;
if (!input_path.empty() && fs::exists(input_path)) {
if (fs::is_directory(input_path)) {
for (const auto& dir_entry : fs::directory_iterator(input_path)) {
ov::Tensor image = utils::load_image(dir_entry.path());
images.push_back(std::move(image));
}
} else if (fs::is_regular_file(input_path)) {
ov::Tensor image = utils::load_image(input_path);
images.push_back(std::move(image));
}
}

if (images.empty())
throw std::runtime_error(std::string{"No images were found in path "} + input_path.string());

return images;
}

ov::Tensor utils::load_image(const std::filesystem::path& image_path) {
int x = 0, y = 0, channels_in_file = 0;
constexpr int desired_channels = 3;
Expand Down
1 change: 1 addition & 0 deletions samples/cpp/visual_language_chat/load_image.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@

namespace utils {
ov::Tensor load_image(const std::filesystem::path& image_path);
std::vector<ov::Tensor> load_images(const std::filesystem::path& image_path);
}
24 changes: 16 additions & 8 deletions samples/cpp/visual_language_chat/visual_language_chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include "load_image.hpp"
#include <openvino/genai/visual_language/pipeline.hpp>
#include <filesystem>
#include <openvino/runtime/intel_gpu/properties.hpp>

bool print_subword(std::string&& subword) {
Expand All @@ -11,9 +12,14 @@ bool print_subword(std::string&& subword) {

int main(int argc, char* argv[]) try {
if (3 != argc) {
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE>");
throw std::runtime_error(std::string{"Usage "} + argv[0] + " <MODEL_DIR> <IMAGE_FILE OR DIR_WITH_IMAGES>");
}
ov::Tensor image = utils::load_image(argv[2]);

std::vector<ov::Tensor> images = utils::load_images(argv[2]);

ov::genai::GenerationConfig generation_config;
generation_config.max_new_tokens = 200;

std::string device = "CPU"; // GPU can be used as well
ov::AnyMap enable_compile_cache;
if ("GPU" == device) {
Expand All @@ -26,16 +32,18 @@ int main(int argc, char* argv[]) try {

pipe.start_chat();
std::cout << "question:\n";

std::getline(std::cin, prompt);
pipe.generate(
prompt,
ov::genai::image(image),
ov::genai::streamer(print_subword)
);
pipe.generate(prompt,
ov::genai::images(images),
ov::genai::generation_config(generation_config),
ov::genai::streamer(print_subword));
std::cout << "\n----------\n"
"question:\n";
while (std::getline(std::cin, prompt)) {
pipe.generate(prompt, ov::genai::streamer(print_subword));
pipe.generate(prompt,
ov::genai::generation_config(generation_config),
ov::genai::streamer(print_subword));
std::cout << "\n----------\n"
"question:\n";
}
Expand Down
25 changes: 25 additions & 0 deletions src/cpp/src/sampler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,22 @@ Sampler::GroupBeamSearcher::GroupBeamSearcher(SequenceGroup::Ptr sequence_group,
}
}


std::vector<int32_t> Sampler::GroupBeamSearcher::get_beam_idxs() {
std::vector<int32_t> next_beams;

for (Group& group : m_groups) {
if (!group.done) {
for (Beam& beam : group.ongoing) {
next_beams.push_back(beam.m_global_beam_idx);
}
}
}

return next_beams;
}


void Sampler::GroupBeamSearcher::select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output) {
assert(m_parameters.num_beams % m_parameters.num_beam_groups == 0 &&
"number of beams should be divisible by number of groups");
Expand Down Expand Up @@ -581,6 +597,15 @@ void register_new_token(const Token& sampled_token_id,
}
};

std::vector<int32_t> Sampler::get_beam_idxs(SequenceGroup::CPtr sequence_group) {
size_t request_id = sequence_group->get_request_id();
auto beam_searcher = m_beam_search_info.find(request_id);
if (m_beam_search_info.find(request_id) == m_beam_search_info.end()) {
return std::vector<int32_t>(sequence_group->num_running_seqs(), 0);
}
return beam_searcher->second.get_beam_idxs();
}

std::list<uint64_t>
create_n_forked_sequences(SequenceGroup::Ptr sequence_group,
LogitProcessor& logit_processor,
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/sampler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Sampler {
SamplerOutput sample(std::vector<SequenceGroup::Ptr> & sequence_groups, ov::Tensor logits, bool is_validation_mode_enabled = false);
void set_seed(size_t seed) { rng_engine.seed(seed); }
void clear_beam_search_info(uint64_t request_id);
std::vector<int32_t> get_beam_idxs(SequenceGroup::CPtr sequence_group);
};

class Sampler::GroupBeamSearcher {
Expand Down Expand Up @@ -109,5 +110,6 @@ class Sampler::GroupBeamSearcher {

void select_next_tokens(const ov::Tensor& logits, SamplerOutput& sampler_output);
void finalize(SamplerOutput& sampler_output);
std::vector<int32_t> get_beam_idxs();
};
}
Loading
Loading