Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some updates to Text 2 image pipeline #944

Merged
merged 14 commits into from
Oct 13, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/causal_lm_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ jobs:
&& python samples\python\greedy_causal_lm\greedy_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\py.txt
- run: fc .\cpp.txt .\py.txt

cpp-beam_search_causal_lm-Qwen-7B-Chat:
cpp-greedy_causal_lm-Qwen-7B-Chat:
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
runs-on: ubuntu-20.04-16-cores
defaults:
run:
Expand Down Expand Up @@ -866,7 +866,7 @@ jobs:
Overall_Status:
name: ci/gha_overall_status_causal_lm
needs: [cpp-multinomial-greedy_causal_lm-ubuntu, cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows,
cpp-beam_search_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2,
cpp-greedy_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2,
cpp-beam_search_causal_lm-notus-7b-v1, cpp-speculative_decoding_lm-ubuntu, cpp-prompt_lookup_decoding_lm-ubuntu,
cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu,
visual_language_chat_sample-ubuntu,
Expand Down
11 changes: 0 additions & 11 deletions samples/cpp/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,6 @@ Prompt: `cyberpunk cityscape like Tokyo New York with tall buildings at dusk gol

![](./512x512.bmp)

## Supported models

Models can be downloaded from [HuggingFace](https://huggingface.co/models). This sample can run the following list of models, but not limited to:

- [botp/stable-diffusion-v1-5](https://huggingface.co/botp/stable-diffusion-v1-5)
- [stabilityai/stable-diffusion-2](https://huggingface.co/stabilityai/stable-diffusion-2)
- [stabilityai/stable-diffusion-2-1](https://huggingface.co/stabilityai/stable-diffusion-2-1)
- [dreamlike-art/dreamlike-anime-1.0](https://huggingface.co/dreamlike-art/dreamlike-anime-1.0)
- [SimianLuo/LCM_Dreamshaper_v7](https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7)
- [stabilityai/stable-diffusion-xl-base-1.0](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
- [stabilityai/stable-diffusion-xl-base-0.9](https://huggingface.co/stabilityai/stable-diffusion-xl-base-0.9)

## Run with optional LoRA adapters

Expand Down
113 changes: 64 additions & 49 deletions samples/cpp/text2image/imwrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,60 +30,59 @@ unsigned char file[14] = {
};

unsigned char info[40] = {
40,
0,
0,
0, // info hd size
0,
0,
0,
0, // width
0,
0,
0,
0, // height
1,
0, // number color planes
24,
0, // bits per pixel
0,
0,
0,
0, // compression is none
0,
0,
0,
0, // image bits size
0x13,
0x0B,
0,
0, // horz resolution in pixel / m
0x13,
0x0B,
0,
0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72
// dpi)
0,
0,
0,
0, // #colors in palette
0,
0,
0,
0, // #important colors
};

}

void imwrite(const std::string& name, ov::Tensor image, bool convert_bgr2rgb) {
std::ofstream output_file(name, std::ofstream::binary);
OPENVINO_ASSERT(output_file.is_open(), "Failed to open the output BMP image path");
40,
0,
0,
0, // info hd size
0,
0,
0,
0, // width
0,
0,
0,
0, // height
1,
0, // number color planes
24,
0, // bits per pixel
0,
0,
0,
0, // compression is none
0,
0,
0,
0, // image bits size
0x13,
0x0B,
0,
0, // horz resolution in pixel / m
0x13,
0x0B,
0,
0, // vert resolution (0x03C3 = 96 dpi, 0x0B13 = 72
// dpi)
0,
0,
0,
0, // #colors in palette
0,
0,
0,
0, // #important colors
};

void imwrite_single_image(const std::string& name, ov::Tensor image, bool convert_bgr2rgb) {
const ov::Shape shape = image.get_shape();
const size_t width = shape[2], height = shape[1], channels = shape[3];
OPENVINO_ASSERT(image.get_element_type() == ov::element::u8 &&
shape.size() == 4 && shape[0] == 1 && channels == 3,
"Image of u8 type and [1, H, W, 3] shape is expected");
"Image of u8 type and [1, H, W, 3] shape is expected.",
"Given image has shape ", shape, " and element type ", image.get_element_type());

std::ofstream output_file(name, std::ofstream::binary);
OPENVINO_ASSERT(output_file.is_open(), "Failed to open the output BMP image path");

int padSize = static_cast<int>(4 - (width * channels) % 4) % 4;
int sizeData = static_cast<int>(width * height * channels + height * padSize);
Expand Down Expand Up @@ -131,3 +130,19 @@ void imwrite(const std::string& name, ov::Tensor image, bool convert_bgr2rgb) {
output_file.write(reinterpret_cast<const char*>(pad), padSize);
}
}

} // namespace


void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb) {
const ov::Shape shape = images.get_shape(); img_shape = {1, shape[1], shape[2], shape[3]};
ilya-lavrenov marked this conversation as resolved.
Show resolved Hide resolved
uint8_t* img_data = images.data<uint8_t>();

for (int img_num = 0, num_images = shape[0], img_size = ov::shape_size(img_shape); img_num < num_images; ++img_num, img_data += img_size) {
char img_name[25];
sprintf(img_name, name.c_str(), img_num);

ov::Tensor image(images.get_element_type(), img_shape, img_data);
imwrite_single_image(img_name, image, true);
}
}
8 changes: 4 additions & 4 deletions samples/cpp/text2image/imwrite.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
#include "openvino/runtime/tensor.hpp"

/**
* @brief Writes image to file
* @param name File name
* @param image Image tensor
* @brief Writes mutiple images (depending on `image` tensor batch size) to BPM file(s)
* @param name File name or pattern to use to write images
* @param image Image(s) tensor
* @param convert_bgr2rgb Convert BGR to RGB
*/
void imwrite(const std::string& name, ov::Tensor image, bool convert_bgr2rgb);
void imwrite(const std::string& name, ov::Tensor images, bool convert_bgr2rgb);
32 changes: 2 additions & 30 deletions samples/cpp/text2image/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,6 @@

#include "imwrite.hpp"

namespace {

void imwrite_output_imgs(const ov::Tensor& output) {
ov::Shape out_shape = output.get_shape();

if (out_shape[0] == 1) {
imwrite("image.bmp", output, true);
return;
}

ov::Shape img_shape = {1, out_shape[1], out_shape[2], out_shape[3]};
size_t img_size = output.get_size() / out_shape[0];

ov::Tensor image(output.get_element_type(), img_shape);
uint8_t* out_data = output.data<uint8_t>();
uint8_t* img_data = image.data<uint8_t>();

for (int img_num = 0; img_num < out_shape[0]; ++img_num) {
std::memcpy(img_data, out_data + img_size * img_num, img_size * sizeof(uint8_t));

char img_name[25];
sprintf(img_name, "image_%d.bmp", img_num);

imwrite(img_name, image, true);
}
}

} //namespace

int32_t main(int32_t argc, char* argv[]) try {
OPENVINO_ASSERT(argc == 3, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>'");

Expand All @@ -47,7 +18,8 @@ int32_t main(int32_t argc, char* argv[]) try {
ov::genai::num_inference_steps(20),
ov::genai::num_images_per_prompt(1));

imwrite_output_imgs(image);
// writes `num_images_per_prompt` images by pattern name
imwrite("image_%d.bmp", image, true);

return EXIT_SUCCESS;
} catch (const std::exception& error) {
Expand Down
7 changes: 7 additions & 0 deletions src/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ function(ov_genai_build_jinja2cpp)
option(RAPIDJSON_BUILD_DOC "Build rapidjson documentation." OFF)

add_subdirectory("${jinja2cpp_SOURCE_DIR}" "${jinja2cpp_BINARY_DIR}" EXCLUDE_FROM_ALL)

if(CMAKE_COMPILER_IS_GNUCXX OR OV_COMPILER_IS_CLANG OR (OV_COMPILER_IS_INTEL_LLVM AND UNIX))
target_compile_options(jinja2cpp PRIVATE -Wno-undef)
endif()
if(SUGGEST_OVERRIDE_SUPPORTED)
target_compile_options(jinja2cpp PRIVATE -Wno-suggest-override)
endif()
endif()
endfunction()

Expand Down
12 changes: 6 additions & 6 deletions src/cpp/include/openvino/genai/text2image/pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
// SD XL: prompt2 and negative_prompt2
// FLUX: prompt2 (prompt if prompt2 is not defined explicitly)
// SD 3: prompt2, prompt3 (with fallback to prompt) and negative_prompt2, negative_prompt3
std::string prompt2, prompt3;
std::string negative_prompt, negative_prompt2, negative_prompt3;
std::optional<std::string> prompt_2 = std::nullopt, prompt_3 = std::nullopt;
std::string negative_prompt, negative_prompt_2, negative_prompt_3;

size_t num_images_per_prompt = 1;

Expand Down Expand Up @@ -165,12 +165,12 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
// Generation config properties
//

static constexpr ov::Property<std::string> prompt2{"prompt2"};
static constexpr ov::Property<std::string> prompt3{"prompt3"};
static constexpr ov::Property<std::string> prompt_2{"prompt_2"};
static constexpr ov::Property<std::string> prompt_3{"prompt_3"};

static constexpr ov::Property<std::string> negative_prompt{"negative_prompt"};
static constexpr ov::Property<std::string> negative_prompt2{"negative_prompt2"};
static constexpr ov::Property<std::string> negative_prompt3{"negative_prompt3"};
static constexpr ov::Property<std::string> negative_prompt_2{"negative_prompt_2"};
static constexpr ov::Property<std::string> negative_prompt_3{"negative_prompt_3"};

static constexpr ov::Property<size_t> num_images_per_prompt{"num_images_per_prompt"};
static constexpr ov::Property<float> guidance_scale{"guidance_scale"};
Expand Down
4 changes: 3 additions & 1 deletion src/cpp/src/text2image/diffusion_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ class Text2ImagePipeline::DiffusionPipeline {
protected:
virtual void initialize_generation_config(const std::string& class_name) = 0;

virtual void check_inputs(const int height, const int width) const = 0;
virtual void check_image_size(const int height, const int width) const = 0;

virtual void check_inputs(const GenerationConfig& generation_config) const = 0;

std::shared_ptr<IScheduler> m_scheduler;
GenerationConfig m_generation_config;
Expand Down
8 changes: 4 additions & 4 deletions src/cpp/src/text2image/models/clip_text_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,16 +94,16 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string

if (do_classifier_free_guidance) {
perform_tokenization(neg_prompt,
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
++current_batch_idx;
} else {
// Negative prompt is ignored when --guidanceScale < 1.0
}

perform_tokenization(pos_prompt,
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));

// text embeddings
m_request.set_tensor("input_ids", input_ids);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,16 @@ ov::Tensor CLIPTextModelWithProjection::infer(const std::string& pos_prompt, con

if (do_classifier_free_guidance) {
perform_tokenization(neg_prompt,
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
++current_batch_idx;
} else {
// Negative prompt is ignored when --guidanceScale < 1.0
}

perform_tokenization(pos_prompt,
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));
ov::Tensor(input_ids, {current_batch_idx , 0},
{current_batch_idx + 1, m_config.max_position_embeddings}));

// text embeddings
m_request.set_tensor("input_ids", input_ids);
Expand Down
4 changes: 2 additions & 2 deletions src/cpp/src/text2image/schedulers/lcm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ LCMScheduler::LCMScheduler(const std::string scheduler_config_path) :
LCMScheduler::LCMScheduler(const Config& scheduler_config)
: m_config(scheduler_config),
m_seed(42),
m_gen(100, std::mt19937(m_seed)),
m_gen(m_seed),
m_normal(0.0f, 1.0f) {

m_sigma_data = 0.5f; // Default: 0.5
Expand Down Expand Up @@ -191,7 +191,7 @@ std::map<std::string, ov::Tensor> LCMScheduler::step(ov::Tensor noise_pred, ov::

if (inference_step != m_num_inference_steps - 1) {
for (std::size_t i = 0; i < batch_size * latent_size; ++i) {
float gen_noise = m_normal(m_gen[i / latent_size]);
float gen_noise = m_normal(m_gen);
prev_sample_data[i] = alpha_prod_t_prev_sqrt * denoised_data[i] + beta_prod_t_prev_sqrt * gen_noise;
}
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/text2image/schedulers/lcm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class LCMScheduler : public IScheduler {
std::vector<int64_t> m_timesteps;

uint32_t m_seed;
std::vector<std::mt19937> m_gen;
std::mt19937 m_gen;
std::normal_distribution<float> m_normal;

std::vector<float> threshold_sample(const std::vector<float>& flat_sample);
Expand Down
Loading
Loading