diff --git a/.github/workflows/causal_lm_cpp.yml b/.github/workflows/causal_lm_cpp.yml
index 13ea4d754c..9856294340 100644
--- a/.github/workflows/causal_lm_cpp.yml
+++ b/.github/workflows/causal_lm_cpp.yml
@@ -16,10 +16,10 @@ concurrency:
   cancel-in-progress: true
 
 env:
-  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/l_openvino_toolkit_ubuntu20_2024.5.0.dev20241024_x86_64.tgz
-  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/l_openvino_toolkit_ubuntu22_2024.5.0.dev20241024_x86_64.tgz
-  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-16993-9c432a3641a/m_openvino_toolkit_macos_12_6_2024.5.0.dev20241014_x86_64.tgz
-  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/w_openvino_toolkit_windows_2024.5.0.dev20241024_x86_64.zip
+  l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/l_openvino_toolkit_ubuntu20_2024.5.0.dev20241028_x86_64.tgz
+  l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/l_openvino_toolkit_ubuntu22_2024.5.0.dev20241028_x86_64.tgz
+  m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/m_openvino_toolkit_macos_12_6_2024.5.0.dev20241028_x86_64.tgz
+  w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/w_openvino_toolkit_windows_2024.5.0.dev20241028_x86_64.zip
 jobs:
   cpp-multinomial-greedy_causal_lm-ubuntu:
     runs-on: ubuntu-20.04-8-cores
@@ -46,7 +46,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2
           optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
@@ -105,7 +105,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
       - name: Compare
@@ -241,7 +241,7 @@ jobs:
       - name: Download and convert model
         run: |
           call .\ov\setupvars.bat
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
@@ -299,7 +299,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat
       - run: >
@@ -333,7 +333,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat
       - run: >
@@ -368,7 +368,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2
       - run: >
@@ -403,7 +403,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1
       - run: >
@@ -438,7 +438,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
@@ -488,7 +488,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past
@@ -560,7 +560,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5
       - name: Run Generation
@@ -615,7 +615,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat
       - name: Run Generation
@@ -670,7 +670,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
       - name: Compare
@@ -736,7 +736,7 @@ jobs:
         run: |
           source ./ov/setupvars.sh
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt opencv-python --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt opencv-python
       - name: Download and convert MiniCPM-V-2_6 model and an image
         run: |
           python -m pip install git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv
@@ -860,7 +860,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
       - name: Run gtests
@@ -906,7 +906,7 @@ jobs:
       - name: Download and convert and model
         run: |
           call .\ov\setupvars.bat
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
       - name: Run gtests
@@ -951,7 +951,7 @@ jobs:
       - name: Download and convert and model
         run: |
           source ./ov/setupvars.sh
-          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
           optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
       - name: Run gtests
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
index 1598cf1597..d806bf4d79 100644
--- a/.github/workflows/lcm_dreamshaper_cpp.yml
+++ b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -18,8 +18,8 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.9'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/l_openvino_toolkit_ubuntu22_2024.5.0.dev20241024_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/w_openvino_toolkit_windows_2024.5.0.dev20241024_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/l_openvino_toolkit_ubuntu22_2024.5.0.dev20241028_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/w_openvino_toolkit_windows_2024.5.0.dev20241028_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 jobs:
@@ -60,7 +60,7 @@ jobs:
         run: |
           source openvino_lcm_cpp/bin/activate
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install -r ./samples/requirements.txt
 
       - name: Download and convert models and tokenizer
         run: |
@@ -120,7 +120,7 @@ jobs:
         run: |
           . "./openvino_lcm_cpp/Scripts/Activate.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install -r ./samples/requirements.txt
 
       - name: Download and convert models and tokenizer
         run: |
diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
index 2a52a0e839..db7dd91a16 100644
--- a/.github/workflows/stable_diffusion_1_5_cpp.yml
+++ b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -18,8 +18,8 @@ concurrency:
 
 env:
   PYTHON_VERSION: '3.10'
-  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/l_openvino_toolkit_ubuntu20_2024.5.0.dev20241024_x86_64.tgz
-  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17130-65a8f393ba0/w_openvino_toolkit_windows_2024.5.0.dev20241024_x86_64.zip
+  LINUX_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/l_openvino_toolkit_ubuntu20_2024.5.0.dev20241028_x86_64.tgz
+  WINDOWS_OV_ARCHIVE_URL: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17180-08365f6fddc/w_openvino_toolkit_windows_2024.5.0.dev20241028_x86_64.zip
   OV_INSTALL_DIR: ${{ github.workspace }}/ov
 
 jobs:
@@ -60,7 +60,7 @@ jobs:
         run: |
           source openvino_sd_cpp/bin/activate
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install -r ./samples/requirements.txt
 
       - name: Download and convert models and tokenizer
         run: |
@@ -134,7 +134,7 @@ jobs:
         run: |
           . "./openvino_sd_cpp/Scripts/Activate.ps1"
           python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
-          python -m pip install -r ./samples/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+          python -m pip install -r ./samples/requirements.txt
 
       - name: Download and convert models and tokenizer
         run: |
diff --git a/samples/cpp/beam_search_causal_lm/README.md b/samples/cpp/beam_search_causal_lm/README.md
index 41bfee3942..947160e092 100644
--- a/samples/cpp/beam_search_causal_lm/README.md
+++ b/samples/cpp/beam_search_causal_lm/README.md
@@ -6,15 +6,17 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `beam_search_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
diff --git a/samples/cpp/benchmark_genai/README.md b/samples/cpp/benchmark_genai/README.md
index 1a46db05d9..d7b3f6ac21 100644
--- a/samples/cpp/benchmark_genai/README.md
+++ b/samples/cpp/benchmark_genai/README.md
@@ -6,7 +6,7 @@ This sample script demonstrates how to benchmark an LLMs in OpenVINO GenAI. The
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -15,6 +15,8 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 ## Usage
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 ```sh
 benchmark_genai [OPTIONS]
 ```
diff --git a/samples/cpp/chat_sample/README.md b/samples/cpp/chat_sample/README.md
index 3f736985c2..bdc1d294ee 100644
--- a/samples/cpp/chat_sample/README.md
+++ b/samples/cpp/chat_sample/README.md
@@ -6,7 +6,7 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -15,6 +15,8 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 ## Run:
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `chat_sample TinyLlama-1.1B-Chat-v1.0`
 
 
diff --git a/samples/cpp/greedy_causal_lm/README.md b/samples/cpp/greedy_causal_lm/README.md
index 79852e0d10..2f3a7751bf 100644
--- a/samples/cpp/greedy_causal_lm/README.md
+++ b/samples/cpp/greedy_causal_lm/README.md
@@ -6,7 +6,7 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -15,6 +15,8 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `greedy_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
diff --git a/samples/cpp/multinomial_causal_lm/README.md b/samples/cpp/multinomial_causal_lm/README.md
index 21c9a07e77..35ca054fdd 100644
--- a/samples/cpp/multinomial_causal_lm/README.md
+++ b/samples/cpp/multinomial_causal_lm/README.md
@@ -6,7 +6,7 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -15,6 +15,8 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `multinomial_causal_lm TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
diff --git a/samples/cpp/prompt_lookup_decoding_lm/README.md b/samples/cpp/prompt_lookup_decoding_lm/README.md
index c5517c5bf6..2057ff2c6f 100644
--- a/samples/cpp/prompt_lookup_decoding_lm/README.md
+++ b/samples/cpp/prompt_lookup_decoding_lm/README.md
@@ -8,7 +8,7 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 source <INSTALL_DIR>/setupvars.sh
@@ -18,6 +18,8 @@ optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "return 0;"`
 
 
diff --git a/samples/cpp/speculative_decoding_lm/README.md b/samples/cpp/speculative_decoding_lm/README.md
index 48d8e50c92..c8e52c75ab 100644
--- a/samples/cpp/speculative_decoding_lm/README.md
+++ b/samples/cpp/speculative_decoding_lm/README.md
@@ -12,7 +12,7 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -22,6 +22,8 @@ optimum-cli export openvino --trust-remote-code --model meta-llama/Llama-2-7b-ch
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `speculative_decoding_lm TinyLlama-1.1B-Chat-v1.0 Llama-2-7b-chat-hf "Why is the Sun yellow?"`
 
 
diff --git a/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp b/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp
index 12f51df0eb..66758531da 100644
--- a/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp
+++ b/samples/cpp/speculative_decoding_lm/speculative_decoding_lm.cpp
@@ -12,7 +12,7 @@ int main(int argc, char* argv[]) try {
 
     ov::genai::GenerationConfig config;
     config.max_new_tokens = 100;
-    // Speculative decoding generation parameters are mutually excluded
+    // Speculative decoding generation parameters like `num_assistant_tokens` and `assistant_confidence_threshold` are mutually excluded
     // add parameter to enable speculative decoding to generate `num_assistant_tokens` candidates by draft_model per iteration
     config.num_assistant_tokens = 5;
     // add parameter to enable speculative decoding to generate candidates by draft_model while candidate probability is higher than `assistant_confidence_threshold`
diff --git a/samples/cpp/text2image/README.md b/samples/cpp/text2image/README.md
index fa58838dad..5f1388683f 100644
--- a/samples/cpp/text2image/README.md
+++ b/samples/cpp/text2image/README.md
@@ -19,7 +19,7 @@ Users can change the sample code and play with the following generation paramete
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -28,6 +28,8 @@ optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task sta
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `stable_diffusion ./dreamlike_anime_1_0_ov/FP16 'cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting'`
 
 ### Examples
diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md
index 99ba417baf..96b1c78ec0 100644
--- a/samples/cpp/visual_language_chat/README.md
+++ b/samples/cpp/visual_language_chat/README.md
@@ -6,7 +6,7 @@ This example showcases inference of Visual language models (VLMs): [`openbmb/Min
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -15,6 +15,8 @@ optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code Mi
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 [This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.
 
 `visual_language_chat miniCPM-V-2_6 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
diff --git a/samples/cpp/whisper_speech_recognition/README.md b/samples/cpp/whisper_speech_recognition/README.md
index af779aab9e..773135b648 100644
--- a/samples/cpp/whisper_speech_recognition/README.md
+++ b/samples/cpp/whisper_speech_recognition/README.md
@@ -6,7 +6,7 @@ This example showcases inference of speech recognition Whisper Models. The appli
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+It's not required to install [../../export-requirements.txt](../../export requirements.txt) for deployment if the model has already been exported.
 
 ```sh
 pip install --upgrade-strategy eager -r ../../requirements.txt
@@ -21,6 +21,8 @@ You can download example audio file: https://storage.openvinotoolkit.org/models_
 
 ## Run
 
+Follow [Get Started with Samples](https://docs.openvino.ai/2024/learn-openvino/openvino-samples/get-started-demos.html) to run the sample.
+
 `whisper_speech_recognition whisper-base how_are_you_doing_today.wav`
 
 Output:
diff --git a/samples/deployment-requirements.txt b/samples/deployment-requirements.txt
new file mode 100644
index 0000000000..c94f8d6a13
--- /dev/null
+++ b/samples/deployment-requirements.txt
@@ -0,0 +1,5 @@
+--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+--pre
+openvino_genai~=2024.5.0.0.dev
+librosa==0.10.2  # For Whisper
+pillow==11.0.0  # Image processing
diff --git a/samples/export-requirements.txt b/samples/export-requirements.txt
new file mode 100644
index 0000000000..9850dfd6e0
--- /dev/null
+++ b/samples/export-requirements.txt
@@ -0,0 +1,11 @@
+--extra-index-url https://download.pytorch.org/whl/cpu
+--extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
+--pre
+openvino-tokenizers~=2024.5.0.0.dev
+optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
+numpy<2.0.0; sys_platform == 'darwin'
+einops==0.8.0  # For Qwen
+transformers_stream_generator==0.0.5  # For Qwen
+diffusers==0.31.0
+timm==1.0.11  # For exporting InternVL2
+torchvision  # For visual language models
diff --git a/samples/python/beam_search_causal_lm/README.md b/samples/python/beam_search_causal_lm/README.md
index 8dd06f264c..fac6a26e8e 100644
--- a/samples/python/beam_search_causal_lm/README.md
+++ b/samples/python/beam_search_causal_lm/README.md
@@ -6,16 +6,18 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
 ## Run
 
-`beam_search_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python beam_search_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
diff --git a/samples/python/benchmark_genai/README.md b/samples/python/benchmark_genai/README.md
index 9baf17c4d7..95f24b6eca 100644
--- a/samples/python/benchmark_genai/README.md
+++ b/samples/python/benchmark_genai/README.md
@@ -6,15 +6,18 @@ This sample script demonstrates how to benchmark an LLMs in OpenVINO GenAI. The
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
+
 ## Usage
 
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
 ```sh
 python benchmark_genai.py [OPTIONS]
 ```
diff --git a/samples/python/chat_sample/README.md b/samples/python/chat_sample/README.md
index dc2c39b3a5..7e3c206431 100644
--- a/samples/python/chat_sample/README.md
+++ b/samples/python/chat_sample/README.md
@@ -6,16 +6,18 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
 ## Run:
 
-`chat_sample.py TinyLlama-1.1B-Chat-v1.0`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python chat_sample.py TinyLlama-1.1B-Chat-v1.0`
 
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
diff --git a/samples/python/greedy_causal_lm/README.md b/samples/python/greedy_causal_lm/README.md
index 7aeabe9ac2..a634e21cb0 100644
--- a/samples/python/greedy_causal_lm/README.md
+++ b/samples/python/greedy_causal_lm/README.md
@@ -10,16 +10,18 @@ There are two sample files:
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
 ## Run
 
-`greedy_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python greedy_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
diff --git a/samples/python/multinomial_causal_lm/README.md b/samples/python/multinomial_causal_lm/README.md
index 351773ec0d..69a3cd4008 100644
--- a/samples/python/multinomial_causal_lm/README.md
+++ b/samples/python/multinomial_causal_lm/README.md
@@ -8,16 +8,18 @@ This sample also contains example implementation of an iterable streamer with bu
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0
 ```
 
 ## Run
 
-`multinomial_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python multinomial_causal_lm.py TinyLlama-1.1B-Chat-v1.0 "Why is the Sun yellow?"`
 
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. For example, the model meta-llama/Llama-2-13b-chat-hf can benefit from being run on a dGPU. Modify the source code to change the device for inference to the GPU.
diff --git a/samples/python/speculative_decoding_lm/README.md b/samples/python/speculative_decoding_lm/README.md
index c0e38706f9..22df151af3 100644
--- a/samples/python/speculative_decoding_lm/README.md
+++ b/samples/python/speculative_decoding_lm/README.md
@@ -12,17 +12,20 @@ This example showcases inference of text-generation Large Language Models (LLMs)
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
+
 Download assisting and main model to run speculative decoding sample.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b
 optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b
 ```
 
 ## Run
 
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
 `python speculative_decoding_lm.py ./dolly-v2-7b ./dolly-v2-3b "Why is the Sun yellow?"`
 
 
diff --git a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
index 38dccbf1bb..857941d45b 100755
--- a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
+++ b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
@@ -36,7 +36,11 @@ def main():
     
     config = openvino_genai.GenerationConfig()
     config.max_new_tokens = 100
+    # Speculative decoding generation parameters like `num_assistant_tokens` and `assistant_confidence_threshold` are mutually excluded
+    # add parameter to enable speculative decoding to generate `num_assistant_tokens` candidates by draft_model per iteration
     config.num_assistant_tokens = 5
+    # add parameter to enable speculative decoding to generate candidates by draft_model while candidate probability is higher than `assistant_confidence_threshold`
+    # config.assistant_confidence_threshold = 0.4
 
     # Since the streamer is set, the results will be printed 
     # every time a new token is generated and put into the streamer queue.
diff --git a/samples/python/text2image/README.md b/samples/python/text2image/README.md
index 3be523b4a8..1a59107e85 100644
--- a/samples/python/text2image/README.md
+++ b/samples/python/text2image/README.md
@@ -19,15 +19,17 @@ Users can change the sample code and play with the following generation paramete
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 dreamlike_anime_1_0_ov/FP16
 ```
 
 ## Run
 
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
 `python main.py ./dreamlike_anime_1_0_ov/FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"`
 
 ### Examples
@@ -47,7 +49,7 @@ Here is an example how to run the sample with a single adapter. First download a
 
 Then run `lora.py`:
 
-`python lora.py ./lora_stable_diffusion dreamlike_anime_1_0_ov/FP16 "curly-haired unicorn in the forest, anime, line" soulcard.safetensors 0.7`
+`python lora.py ./dreamlike_anime_1_0_ov/FP16 "curly-haired unicorn in the forest, anime, line" soulcard.safetensors 0.7`
 
 The sample generates two images with and without adapters applied using the same prompt:
    - `lora.bmp` with adapters applied
diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md
index 06355d9ee5..e8744a8c8f 100644
--- a/samples/python/visual_language_chat/README.md
+++ b/samples/python/visual_language_chat/README.md
@@ -6,17 +6,20 @@ This example showcases inference of text-generation Vision Language Models (VLMs
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --model openbmb/MiniCPM-V-2_6 --trust-remote-code MiniCPM-V-2_6
 ```
 
 ## Run:
+
 [This image](https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11) can be used as a sample image.
 
-`visual_language_chat.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python visual_language_chat.py ./miniCPM-V-2_6/ 319483352-d5fbbd1a-d484-415c-88cb-9986625b7b11.jpg`
 
 
 Discrete GPUs (dGPUs) usually provide better performance compared to CPUs. It is recommended to run larger models on a dGPU with 32GB+ RAM. # TODO: examples of larger models
diff --git a/samples/python/whisper_speech_recognition/README.md b/samples/python/whisper_speech_recognition/README.md
index e324abfb67..158bd18311 100644
--- a/samples/python/whisper_speech_recognition/README.md
+++ b/samples/python/whisper_speech_recognition/README.md
@@ -6,10 +6,10 @@ This example showcases inference of speech recognition Whisper Models. The appli
 
 The `--upgrade-strategy eager` option is needed to ensure `optimum-intel` is upgraded to the latest version.
 
-It's not required to install [../../requirements.txt](../../requirements.txt) for deployment if the model has already been exported.
+Install [../../export-requirements.txt](../../export-requirements.txt) to convert a model.
 
 ```sh
-pip install --upgrade-strategy eager -r ../../requirements.txt
+pip install --upgrade-strategy eager -r ../../export-requirements.txt
 optimum-cli export openvino --trust-remote-code --model openai/whisper-base whisper-base
 ```
 
@@ -28,7 +28,9 @@ python recorder.py
 
 ## Run the Whisper model
 
-`whisper_speech_recognition whisper-base how_are_you_doing_today.wav`
+Install [deployment-requirements.txt](../../deployment-requirements.txt) via `pip install -r ../../deployment-requirements.txt` and then, run a sample:
+
+`python whisper_speech_recognition.py whisper-base how_are_you_doing_today.wav`
 
 Output:
 ```
diff --git a/samples/requirements.txt b/samples/requirements.txt
index 2ccd59e609..ae7f6ebe43 100644
--- a/samples/requirements.txt
+++ b/samples/requirements.txt
@@ -1,9 +1,2 @@
---extra-index-url https://download.pytorch.org/whl/cpu
-optimum-intel @ git+https://github.com/huggingface/optimum-intel.git
-numpy<2.0.0; sys_platform == 'darwin'
-einops==0.8.0  # For Qwen
-transformers_stream_generator==0.0.5  # For Qwen
-diffusers==0.30.3
-librosa # For Whisper
-torchvision  # For visual language models
-timm  # For exporting InternVL2
+-r ./deployment-requirements.txt
+-r ./export-requirements.txt
diff --git a/src/cpp/include/openvino/genai/text2image/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/text2image/autoencoder_kl.hpp
index b1088f7448..1bb9bf97b4 100644
--- a/src/cpp/include/openvino/genai/text2image/autoencoder_kl.hpp
+++ b/src/cpp/include/openvino/genai/text2image/autoencoder_kl.hpp
@@ -24,6 +24,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL {
         size_t latent_channels = 4;
         size_t out_channels = 3;
         float scaling_factor = 0.18215f;
+        float shift_factor = 0.0609f;
         std::vector<size_t> block_out_channels = { 64 };
 
         explicit Config(const std::filesystem::path& config_path);
@@ -48,6 +49,8 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL {
 
     AutoencoderKL& compile(const std::string& device, const ov::AnyMap& properties = {});
 
+    const Config& get_config() const;
+
     template <typename... Properties>
     ov::util::EnableIfAllStringAny<AutoencoderKL&, Properties...> compile(
             const std::string& device,
diff --git a/src/cpp/include/openvino/genai/text2image/pipeline.hpp b/src/cpp/include/openvino/genai/text2image/pipeline.hpp
index a8201cf6c9..54d540179b 100644
--- a/src/cpp/include/openvino/genai/text2image/pipeline.hpp
+++ b/src/cpp/include/openvino/genai/text2image/pipeline.hpp
@@ -19,6 +19,7 @@
 #include "openvino/genai/text2image/clip_text_model_with_projection.hpp"
 #include "openvino/genai/text2image/unet2d_condition_model.hpp"
 #include "openvino/genai/text2image/autoencoder_kl.hpp"
+#include "openvino/genai/text2image/sd3_transformer_2d_model.hpp"
 
 namespace ov {
 namespace genai {
@@ -57,7 +58,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
             LCM,
             LMS_DISCRETE,
             DDIM,
-            EULER_DISCRETE
+            EULER_DISCRETE,
+            FLOW_MATCH_EULER_DISCRETE
         };
 
         static std::shared_ptr<Scheduler> from_config(const std::filesystem::path& scheduler_config_path,
@@ -67,12 +69,13 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
     };
 
     struct OPENVINO_GENAI_EXPORTS GenerationConfig {
-        // LCM: prompt only w/o negative prompt
-        // SD XL: prompt2 and negative_prompt2
-        // FLUX: prompt2 (prompt if prompt2 is not defined explicitly)
-        // SD 3: prompt2, prompt3 (with fallback to prompt) and negative_prompt2, negative_prompt3
+        // LCM: prompt only w/o negative_prompt
+        // SD XL: prompt_2 and negative_prompt2
+        // FLUX: prompt_2 (prompt if prompt_2 is not defined explicitly)
+        // SD 3: prompt_2, prompt3 (with fallback to prompt) and negative_prompt_2, negative_prompt_3
         std::optional<std::string> prompt_2 = std::nullopt, prompt_3 = std::nullopt;
-        std::string negative_prompt, negative_prompt_2, negative_prompt_3;
+        std::string negative_prompt;
+        std::optional<std::string> negative_prompt_2 = std::nullopt, negative_prompt_3 = std::nullopt;
 
         size_t num_images_per_prompt = 1;
 
@@ -131,6 +134,14 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
         const UNet2DConditionModel& unet,
         const AutoencoderKL& vae_decoder);
 
+    // creates SD3 pipeline from building blocks
+    static Text2ImagePipeline stable_diffusion_3(
+        const std::shared_ptr<Scheduler>& scheduler_type,
+        const CLIPTextModelWithProjection& clip_text_model_1,
+        const CLIPTextModelWithProjection& clip_text_model_2,
+        const SD3Transformer2DModel& transformer,
+        const AutoencoderKL& vae_decoder);
+
     GenerationConfig get_generation_config() const;
     void set_generation_config(const GenerationConfig& generation_config);
 
@@ -160,6 +171,7 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline {
 
     class StableDiffusionPipeline;
     class StableDiffusionXLPipeline;
+    class StableDiffusion3Pipeline;
 };
 
 //
diff --git a/src/cpp/include/openvino/genai/text2image/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/text2image/sd3_transformer_2d_model.hpp
new file mode 100644
index 0000000000..674f29168a
--- /dev/null
+++ b/src/cpp/include/openvino/genai/text2image/sd3_transformer_2d_model.hpp
@@ -0,0 +1,77 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <filesystem>
+#include <string>
+
+#include "openvino/core/any.hpp"
+#include "openvino/runtime/infer_request.hpp"
+#include "openvino/runtime/properties.hpp"
+#include "openvino/runtime/tensor.hpp"
+
+#include "openvino/genai/visibility.hpp"
+
+namespace ov {
+namespace genai {
+
+class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel {
+public:
+    struct Config {
+        size_t sample_size = 128;
+        size_t patch_size = 2;
+        size_t in_channels = 16;
+        size_t num_layers = 18;
+        size_t attention_head_dim = 64;
+        size_t num_attention_heads = 18;
+        size_t joint_attention_dim = 4096;
+        size_t caption_projection_dim = 1152;
+        size_t pooled_projection_dim = 2048;
+        size_t out_channels = 16;
+        size_t pos_embed_max_size = 96;
+        std::vector<size_t> block_out_channels = { 128, 256, 512, 512 };
+
+        explicit Config(const std::filesystem::path& config_path);
+    };
+
+    explicit SD3Transformer2DModel(const std::filesystem::path& root_dir);
+
+    SD3Transformer2DModel(const std::filesystem::path& root_dir,
+                          const std::string& device,
+                          const ov::AnyMap& properties = {});
+
+    template <typename... Properties,
+              typename std::enable_if<ov::util::StringAny<Properties...>::value, bool>::type = true>
+    SD3Transformer2DModel(const std::filesystem::path& root_dir, const std::string& device, Properties&&... properties)
+        : SD3Transformer2DModel(root_dir, device, ov::AnyMap{std::forward<Properties>(properties)...}) {}
+
+    SD3Transformer2DModel(const SD3Transformer2DModel&);
+
+    const Config& get_config() const;
+
+    SD3Transformer2DModel& reshape(int batch_size, int height, int width, int tokenizer_model_max_length);
+
+    SD3Transformer2DModel& compile(const std::string& device, const ov::AnyMap& properties = {});
+
+    template <typename... Properties>
+    ov::util::EnableIfAllStringAny<SD3Transformer2DModel&, Properties...> compile(const std::string& device,
+                                                                                  Properties&&... properties) {
+        return compile(device, ov::AnyMap{std::forward<Properties>(properties)...});
+    }
+
+    void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states);
+
+    ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep);
+
+    size_t get_vae_scale_factor() const;
+
+private:
+    Config m_config;
+    ov::InferRequest m_request;
+    std::shared_ptr<ov::Model> m_model;
+    size_t m_vae_scale_factor;
+};
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp
index 09abbe29ab..c56d02afef 100644
--- a/src/cpp/src/continuous_batching_impl.cpp
+++ b/src/cpp/src/continuous_batching_impl.cpp
@@ -123,7 +123,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
     static ManualTimer step_timer("step()");
     step_timer.start();
 
-    // Pull awaiting requests
     _pull_awaiting_requests();
 
     m_pipeline_metrics.requests = m_requests.size();
@@ -148,8 +147,10 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::step() {
     if (scheduler_output.m_total_num_scheduled_tokens == 0) {
         for (size_t i = 0; i < m_requests.size(); ++i) {
             SequenceGroup::Ptr sequence_group = m_requests[i];
-            sequence_group->set_out_of_memory();
-            sequence_group->notify_handle();
+            if (!sequence_group->is_waiting()) {
+                sequence_group->set_out_of_memory();
+                sequence_group->notify_handle();
+            }
         }
         _free_non_running_requests();
         return;
diff --git a/src/cpp/src/continuous_batching_impl.hpp b/src/cpp/src/continuous_batching_impl.hpp
index 78e92d6c76..8276edb36b 100644
--- a/src/cpp/src/continuous_batching_impl.hpp
+++ b/src/cpp/src/continuous_batching_impl.hpp
@@ -49,7 +49,7 @@ class ContinuousBatchingPipeline::ContinuousBatchingImpl : public ContinuousBatc
               const DeviceConfig& device_config,
               ov::Core& core);
 
-    void _pull_awaiting_requests();
+    virtual void _pull_awaiting_requests();
 
     void _fill_prompt_log_probs(std::vector<SequenceGroup::Ptr>& sequence_groups, ov::Tensor& logits);
 public:
diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index cc7236af42..7a9653cd85 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -120,7 +120,7 @@ bool GenerationConfig::is_multinomial() const {
 }
 
 bool GenerationConfig::is_speculative_decoding() const {
-    return assistant_confidence_threshold > 0 || num_assistant_tokens > 0;
+    return (assistant_confidence_threshold > 0 || num_assistant_tokens > 0);
 }
 
 void GenerationConfig::validate() const {
diff --git a/src/cpp/src/sampler.cpp b/src/cpp/src/sampler.cpp
index 2657c79df7..38deb74186 100644
--- a/src/cpp/src/sampler.cpp
+++ b/src/cpp/src/sampler.cpp
@@ -630,8 +630,6 @@ stop_sample_tokens(Sequence::Ptr running_sequence,
                    size_t& max_removed_tokens_per_request) {
     running_sequence->remove_last_tokens(token_idx);
     max_removed_tokens_per_request = std::max(max_removed_tokens_per_request, token_idx);
-    running_sequence->set_status(SequenceStatus::FINISHED);
-    running_sequence->set_finish_reason(GenerationFinishReason::STOP);
 }
 
 void
@@ -652,26 +650,48 @@ align_all_sequence_len(SequenceGroup::Ptr& sequence_group,
     logit_processor.update_generated_len(min_generated_tokens);
 }
 
-bool
-validate_candidate(Sequence::Ptr running_sequence,
-                   size_t& token_idx,
-                   Token& sampled_token,
-                   bool& is_extend_sequence,
-                   size_t& max_removed_tokens) {
-    if (token_idx > 0) {
-        const auto& generated_tokens = running_sequence->get_generated_ids();
-        auto it = generated_tokens.rbegin();
-        std::advance(it, token_idx - 1);
-        // to validate candidates from assisting model and remove incorrect ones from generated sequence
-        if (*it != sampled_token.m_index) {
-            running_sequence->remove_last_tokens(token_idx);
-            max_removed_tokens = std::max(max_removed_tokens, token_idx);
-            is_extend_sequence = true;
-            return false;
-        } else {
-            sampled_token.m_index = *it;
-        }
+bool Sampler::validate_candidate(
+    Sequence::Ptr running_sequence,
+    size_t& token_idx,
+    Token& sampled_token,
+    bool& is_extend_sequence,
+    size_t& max_removed_tokens,
+    bool do_sample) {
+    OPENVINO_ASSERT(token_idx > 0);
+    const auto& generated_tokens = running_sequence->get_generated_ids();
+    auto it_token_id = generated_tokens.rbegin();
+    std::advance(it_token_id, token_idx - 1);
+
+    bool is_candidate_accepted = false;
+    // first tokens in case of speculative decoding should be generated by main model
+    if (do_sample &&
+        running_sequence->get_generated_len() != running_sequence->get_sequence_group_ptr()->get_num_tokens_to_validate()) {
+        const auto& generated_log_probs = running_sequence->get_generated_log_probs();
+        auto it_log_prob = generated_log_probs.rbegin();
+        std::advance(it_log_prob, token_idx - 1);
+
+        float p_i = std::exp(*it_log_prob),
+                q_i = std::exp(sampled_token.m_log_prob),
+                probability_ratio = p_i / q_i;
+        
+        auto dist = std::uniform_int_distribution<>(0, 100); // equivalent to multinomial with number of trials == 1
+        float r_i = dist(rng_engine);
+        r_i /= 100;
+        is_candidate_accepted = r_i <= probability_ratio;
+    } else {
+        is_candidate_accepted = *it_token_id == sampled_token.m_index;
     }
+
+    // to validate candidates from assisting model and remove incorrect ones from generated sequence
+    if (!is_candidate_accepted) {
+        running_sequence->remove_last_tokens(token_idx);
+        max_removed_tokens = std::max(max_removed_tokens, token_idx);
+        is_extend_sequence = true;
+        return false;
+    } else {
+        sampled_token.m_index = *it_token_id;
+    }
+
     return true;
 
 }
@@ -759,8 +779,9 @@ SamplerOutput Sampler::sample(std::vector<SequenceGroup::Ptr> & sequence_groups,
                         // flag to add sampled token to generated sequence or extend logit processors only
                         bool is_extend_sequence = token_offset == 0 || is_generate_n_tokens,
                              is_validation_passed = true;
-                        if (is_validation_mode_enabled && !is_generate_n_tokens) {
-                            is_validation_passed = validate_candidate(running_sequences[running_sequence_id], token_offset, sampled_token_id, is_extend_sequence, max_removed_tokens_per_request);
+                        if (is_validation_mode_enabled && !is_extend_sequence) {
+                            is_validation_passed = validate_candidate(running_sequences[running_sequence_id], token_offset, sampled_token_id,
+                                                                      is_extend_sequence, max_removed_tokens_per_request, sampling_params.do_sample);
                             // update log prob just while validation process
                             if (!is_extend_sequence) {
                                 OPENVINO_ASSERT(generated_and_verified_len < running_sequences[running_sequence_id]->get_generated_len());
@@ -775,6 +796,7 @@ SamplerOutput Sampler::sample(std::vector<SequenceGroup::Ptr> & sequence_groups,
                     }
                     min_generated_len = std::min(min_generated_len, running_sequence->get_generated_len());
                 }
+                align_all_sequence_len(sequence_group, min_generated_len, logit_processor);
                 for (const auto& dropped_seq_id : _try_finish_generation(sequence_group)) {
                     sampler_output.m_dropped_sequences.push_back(dropped_seq_id);
                 }
@@ -799,7 +821,9 @@ SamplerOutput Sampler::sample(std::vector<SequenceGroup::Ptr> & sequence_groups,
             }
             // Notify handle after sampling is done. 
             // For non-streaming this is effective only when the generation is finished.
-            sequence_group->notify_handle();
+            OPENVINO_ASSERT(num_tokens_to_process >= max_removed_tokens_per_request);
+            size_t num_output_token_to_push = num_tokens_to_process - max_removed_tokens_per_request + 1;
+            sequence_group->notify_handle(num_output_token_to_push);
         } else {
             // we are in prompt processing phase when prompt is split into chunks and processed step by step
         }
@@ -810,7 +834,6 @@ SamplerOutput Sampler::sample(std::vector<SequenceGroup::Ptr> & sequence_groups,
         sequence_group->finish_iteration();
         // decrease sequence_group context in case of candidates generated by draft_model were not accepted by main_model
         if (max_removed_tokens_per_request) {
-            align_all_sequence_len(sequence_group, min_generated_len, logit_processor);
             auto min_processed_tokens = sequence_group->get_prompt_len() + min_generated_len - 1;
             sequence_group->update_processed_tokens_num(min_processed_tokens);
             logit_processor.update_generated_len(min_processed_tokens);
diff --git a/src/cpp/src/sampler.hpp b/src/cpp/src/sampler.hpp
index 83b2ddb692..dd7d7d4eb9 100644
--- a/src/cpp/src/sampler.hpp
+++ b/src/cpp/src/sampler.hpp
@@ -48,6 +48,9 @@ class Sampler {
     std::vector<Token> _multinomial_sample(const Logits& logits, size_t num_tokens_per_sequence);
     std::vector<int64_t> _try_finish_generation(SequenceGroup::Ptr & sequence_group);
 
+    bool validate_candidate(Sequence::Ptr running_sequence, size_t& token_idx, Token& sampled_token,
+                            bool& is_extend_sequence, size_t& max_removed_tokens, bool do_sample);
+
     // request ID => beam search tracking information
     std::map<uint64_t, GroupBeamSearcher> m_beam_search_info;
 
diff --git a/src/cpp/src/sequence_group.hpp b/src/cpp/src/sequence_group.hpp
index b2532b220c..c5be82f0f2 100644
--- a/src/cpp/src/sequence_group.hpp
+++ b/src/cpp/src/sequence_group.hpp
@@ -609,7 +609,7 @@ class SequenceGroup {
         m_generation_stream->push(std::move(outputs));
     }
 
-    void notify_handle() {
+    void notify_handle(size_t num_output_token_to_push = 0) {
         if (out_of_memory()) {
             set_generation_status(GenerationStatus::IGNORED);
         } else if (has_finished()) {
@@ -625,12 +625,8 @@ class SequenceGroup {
             // (after stop string is detected its tokens are already sent)
             if (num_total_seqs() == 1 &&
                 (m_sampling_params.stop_strings.empty() || m_sampling_params.include_stop_str_in_output)) {
-                auto previous_step_gen_len = get_num_processed_tokens() > 0 ? get_num_processed_tokens() - get_prompt_len() + 1 : 0;
-                auto generation_len = m_sequences.front()->get_generated_len();
-                if (previous_step_gen_len < generation_len) {
-                    auto token_to_print = generation_len - previous_step_gen_len;
-                    push_partial_outputs(token_to_print);
-                }
+                if (num_output_token_to_push)
+                    push_partial_outputs(num_output_token_to_push);
             } else if (has_finished() || out_of_memory()) {
                 push_outputs();
             }
diff --git a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
index fd30e9f608..c649c544a6 100644
--- a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
+++ b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.cpp
@@ -20,23 +20,16 @@ ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::Contin
 
 void
 ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::finish_request(SequenceGroup::Ptr request) {
-    
-    for (const auto& sequence : request->get_sequences()) {
-        m_scheduler->free_sequence(sequence->get_id());
+    for (const auto& sequence: request->get_sequences()) {
+        if (m_scheduler->has_block_table(sequence->get_id())) {
+            m_scheduler->free_sequence(sequence->get_id());
+        }
     }
     m_sampler->clear_request_info(request->get_request_id());
+    request->set_generation_status(GenerationStatus::DROPPED_BY_HANDLE);
 }
 
 void ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::finish_request(int64_t request_id) {
-    // finish all request s in case of -1
-    if (request_id == -1) {
-        while (!m_requests.empty()) {
-            const auto& request = *m_requests.rbegin();
-            finish_request(request);
-            m_requests.pop_back();
-        }
-        return;
-    }
     for (size_t i = 0; i < m_requests.size(); ++i) {
         auto& request = m_requests[i];
         if (request->get_request_id() != request_id) {
@@ -50,8 +43,6 @@ void ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::f
 
 GeneratedRequests
 ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::get_generated_requests() {
-    _pull_awaiting_requests();
-
     GeneratedRequests result;
     for (const auto& request : m_requests) {
         const auto& request_id = request->get_request_id();
@@ -197,8 +188,6 @@ UpdateRequestResult
 ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::init_request_by_candidate(
     uint64_t request_id,
     const GeneratedSequences& candidates) {
-    _pull_awaiting_requests();
-
     for (auto& request : m_requests) {
         if (request->get_request_id() != request_id) {
             continue;
@@ -218,8 +207,6 @@ UpdateRequestResult
 ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::update_request(uint64_t request_id,
                                                                                          const GeneratedSequences& candidates,
                                                                                          bool is_update_logit_processor) {
-    _pull_awaiting_requests();
-
     UpdateRequestResult result{0, 0};
     for (auto& request : m_requests) {
         if (request_id != request->get_request_id()) {
@@ -227,14 +214,9 @@ ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::update
         }
 
         std::vector<Sequence::Ptr> running_sequences = request->get_running_sequences();
+        OPENVINO_ASSERT(running_sequences.size() > 0);
         size_t min_generated_tokens, min_candidate_len;
-        if (request->get_context_len() == 0 && !request->get_num_tokens_to_validate()) {
-            if (candidates.begin()->second.log_probs.empty()) {
-                // lock generation in case on empty generation
-                request->pause_generation(true);
-                return result;
-            }
-            // init request by sequences in case the pipeline was not started
+        if (running_sequences.front()->get_generated_len() == 0 && !request->get_num_tokens_to_validate()) {
             m_sampler->create_logit_processor(request_id, request->get_sampling_parameters(), request->get_prompt_ids());
             auto& logit_processor = m_sampler->get_logit_processor(request_id);
             result.inserted_tokens_cnt = init_request(request, candidates, logit_processor, is_update_logit_processor);
@@ -270,11 +252,21 @@ ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::update
         // update request context information to provide correct scheduling phase
         const size_t num_processed_tokens = request->get_num_processed_tokens(),
                      prompt_len = request->get_prompt_len(),
-                     updated_context_len = min_candidate_len + prompt_len;
-        if (num_processed_tokens > 0)
+                     updated_context_len = min_candidate_len + prompt_len,
+                     max_new_tokens = request->get_sampling_parameters().max_new_tokens;
+        size_t generated_len = request->get_context_len() - request->get_prompt_len();
+        if (num_processed_tokens > 0) {
             request->update_processed_tokens_num(num_processed_tokens - result.removed_tokens_cnt);
+            generated_len -= result.removed_tokens_cnt;
+        }
         request->set_num_validated_tokens(result.inserted_tokens_cnt);
         request->pause_generation(false);
+        generated_len += result.inserted_tokens_cnt;
+
+        // to pause `draft_model` generation in case of `generated_len >= max_new_tokens - 1` to generate last token by `main_model`
+        if (!m_is_validation_mode_enabled && (generated_len >= max_new_tokens - 1 || result.inserted_tokens_cnt == 0)) {
+            request->pause_generation(true);
+        }
         break;
     }
 
@@ -282,13 +274,8 @@ ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::update
 }
 
 void
-ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::unlock_next_request_generation() {
-    for (auto& request : m_requests) {
-        if (!request->has_finished() && !request->can_generate_tokens()) {
-            request->pause_generation(false);
-            return;
-        }
-    }
+ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::pull_awaiting_requests() {
+    ContinuousBatchingImpl::_pull_awaiting_requests();
 }
 
 void ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::multistep() {
@@ -308,13 +295,16 @@ void ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl::m
                 request->pause_generation(true);
             } else if (request->get_num_processed_tokens() == 0 && sampling_params.num_return_sequences > 1) {
                 request->pause_generation(true);
-            } else if (sampling_params.num_assistant_tokens <= generated_tokens_cnt) {
+            } else if (sampling_params.num_assistant_tokens <= generated_tokens_cnt && sampling_params.assistant_confidence_threshold == 0.f) {
                 request->pause_generation(true);
-            } else if (request->get_num_processed_tokens() - request->get_prompt_len() + 1 >= sampling_params.max_new_tokens - 1) {
+            } else if (request->get_context_len() >= request->get_prompt_len() &&
+                (request->get_context_len() - request->get_prompt_len()) >= sampling_params.max_new_tokens - 1) {
+                request->pause_generation(true);
+            } else if (sampling_params.max_new_tokens == 0) {
                 request->pause_generation(true);
             }
             to_generate |= request->can_generate_tokens();
         }
     }
 }
-}
\ No newline at end of file
+}
diff --git a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
index a75a160f14..0040708b4b 100644
--- a/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
+++ b/src/cpp/src/speculative_decoding/continuous_batching_for_speculative_decoding_impl.hpp
@@ -23,9 +23,9 @@ class ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl :
                                                  bool is_validation_mode_enabled);
 
     void multistep();
-    void finish_request(int64_t request_id = -1);
-    void unlock_next_request_generation();
 
+    void finish_request(int64_t request_id = -1);
+    void pull_awaiting_requests();
     GeneratedRequests get_generated_requests();
     UpdateRequestResult update_request(uint64_t request_id, const GeneratedSequences& candidates, bool is_update_logit_processor);
 
@@ -33,5 +33,6 @@ class ContinuousBatchingPipeline::ContinuousBatchingForSpeculativeDecodingImpl :
 
 protected:
     void finish_request(SequenceGroup::Ptr request);
+    void _pull_awaiting_requests() override {};
 };
 }
\ No newline at end of file
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
index 2008f1fb9a..864646d5cd 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.cpp
@@ -82,7 +82,8 @@ GenerationHandle
 ContinuousBatchingPipeline::SpeculativeDecodingImpl::add_request(uint64_t request_id,
                                                                  const ov::Tensor& input_ids,
                                                                  ov::genai::GenerationConfig sampling_params) {
-    m_draft_pipeline->add_request(request_id, input_ids, sampling_params);
+    std::lock_guard<std::mutex> lock(m_draft_generations_mutex);
+    m_draft_generations.insert({request_id, m_draft_pipeline->add_request(request_id, input_ids, sampling_params)});
     return m_main_pipeline->add_request(request_id, input_ids, sampling_params);
 };
 
@@ -90,7 +91,8 @@ GenerationHandle
 ContinuousBatchingPipeline::SpeculativeDecodingImpl::add_request(uint64_t request_id,
                                                                  const std::string& prompt,
                                                                  ov::genai::GenerationConfig sampling_params) {
-    m_draft_pipeline->add_request(request_id, prompt, sampling_params);
+    std::lock_guard<std::mutex> lock(m_draft_generations_mutex);
+    m_draft_generations.insert({request_id, m_draft_pipeline->add_request(request_id, prompt, sampling_params)});
     return m_main_pipeline->add_request(request_id, prompt, sampling_params);
 }
 
@@ -112,12 +114,18 @@ void print_generated_request(const ov::genai::GeneratedRequests& requests) {
 }
 
 void ContinuousBatchingPipeline::SpeculativeDecodingImpl::step() {
+    // this blocks adding new requests during step as it may break coherence between main and draft models
+    std::lock_guard<std::mutex> lock{m_draft_generations_mutex};
+    m_draft_pipeline->pull_awaiting_requests();
+    m_main_pipeline->pull_awaiting_requests();
+
     // generate candidates by draft model
     ManualTimer draft_timer("speculative_decoding: draft_model: multistep()");
     draft_timer.start();
     m_draft_pipeline->multistep();
     draft_timer.end();
     m_sd_metrics.draft_duration += draft_timer.get_duration();
+    m_pipeline_metrics = m_main_pipeline->get_metrics();
 
     // to generate num_matches statistic
     std::map<int64_t, UpdateRequestResult> update_sequence_info;
@@ -133,6 +141,7 @@ void ContinuousBatchingPipeline::SpeculativeDecodingImpl::step() {
     m_main_pipeline->step();
     main_timer.end();
     m_sd_metrics.main_duration += main_timer.get_duration();
+    m_pipeline_metrics = m_main_pipeline->get_metrics();
 
     auto main_generated_requests = m_main_pipeline->get_generated_requests();
     for (const auto& checked_sequence : main_generated_requests) {
@@ -145,8 +154,8 @@ void ContinuousBatchingPipeline::SpeculativeDecodingImpl::step() {
         auto request_id = draft_request.first;
         if (!main_generated_requests.count(request_id)) {
             m_draft_pipeline->finish_request(request_id);
-            // in case of some requests not to started, unlock generation of next request
-            m_draft_pipeline->unlock_next_request_generation();
+            // remove draft_generation_handle from queue
+            m_draft_generations.erase(request_id);
         }
         auto updated_seq_info = update_sequence_info[request_id];
         float acceptance_rate = 1 - static_cast<float>(updated_seq_info.removed_tokens_cnt) / updated_seq_info.inserted_tokens_cnt;
@@ -175,18 +184,16 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::generate(const std::vector<
         }
     }, streamer);
 
-    std::vector<GenerationHandle> main_generations, draft_generations;
+    std::vector<GenerationHandle> main_generations;
     for (size_t request_id = 0; request_id < input_ids.size(); ++request_id) {
         OPENVINO_ASSERT(1 == input_ids[request_id].get_shape().at(0), "Use multiple tensors to pass a batch.");
         main_generations.push_back(m_main_pipeline->add_request(request_id, input_ids[request_id], sampling_params[request_id]));
 
         auto draft_sampling_params = sampling_params[request_id];
         // set the parameters do not stop draft generation without stopping of the same request for main pipeline
-        draft_sampling_params.max_new_tokens = draft_sampling_params.max_new_tokens + 1;
-        draft_sampling_params.min_new_tokens = draft_sampling_params.min_new_tokens + 1;
         draft_sampling_params.ignore_eos = true;
-        draft_generations.push_back(m_draft_pipeline->add_request(request_id, input_ids[request_id], draft_sampling_params));
-        // decrease generation len to generate last token by main model
+        std::lock_guard<std::mutex> lock(m_draft_generations_mutex);
+        m_draft_generations.insert({request_id, m_draft_pipeline->add_request(request_id, input_ids[request_id], draft_sampling_params)});
     }
 
     std::vector<EncodedGenerationResult> results;
@@ -210,7 +217,6 @@ ContinuousBatchingPipeline::SpeculativeDecodingImpl::generate(const std::vector<
     if (streamer_ptr) {
         streamer_ptr->end();
     }
-    draft_generations.clear();
 
     for (size_t generation_idx = 0; generation_idx < main_generations.size(); ++generation_idx) {
         const auto& generation = main_generations[generation_idx];
diff --git a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
index b427e311b4..f854713b5e 100644
--- a/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
+++ b/src/cpp/src/speculative_decoding/speculative_decoding_impl.hpp
@@ -30,6 +30,9 @@ class ContinuousBatchingPipeline::SpeculativeDecodingImpl : public ContinuousBat
 protected:
     std::shared_ptr<ContinuousBatchingForSpeculativeDecodingImpl> m_main_pipeline, m_draft_pipeline;
     SpeculativeDecodingMetrics m_sd_metrics;
+    // Mutex protecting access to m_draft_generations, so add_request and step methods can be called from different threads
+    std::mutex m_draft_generations_mutex;
+    std::map<uint64_t, GenerationHandle> m_draft_generations;
     
 public:
     SpeculativeDecodingImpl(const std::filesystem::path& main_models_path,
diff --git a/src/cpp/src/text2image/models/autoencoder_kl.cpp b/src/cpp/src/text2image/models/autoencoder_kl.cpp
index fca9c21050..c9d51cb844 100644
--- a/src/cpp/src/text2image/models/autoencoder_kl.cpp
+++ b/src/cpp/src/text2image/models/autoencoder_kl.cpp
@@ -32,6 +32,7 @@ AutoencoderKL::Config::Config(const std::filesystem::path& config_path) {
     read_json_param(data, "latent_channels", latent_channels);
     read_json_param(data, "out_channels", out_channels);
     read_json_param(data, "scaling_factor", scaling_factor);
+    read_json_param(data, "shift_factor", shift_factor);
     read_json_param(data, "block_out_channels", block_out_channels);
 }
 
@@ -52,6 +53,10 @@ AutoencoderKL::AutoencoderKL(const std::filesystem::path& root_dir,
 
 AutoencoderKL::AutoencoderKL(const AutoencoderKL&) = default;
 
+const AutoencoderKL::Config& AutoencoderKL::get_config() const {
+    return m_config;
+}
+
 AutoencoderKL& AutoencoderKL::reshape(int batch_size, int height, int width) {
     OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot reshape already compiled model");
 
diff --git a/src/cpp/src/text2image/models/sd3_transformer_2d_model.cpp b/src/cpp/src/text2image/models/sd3_transformer_2d_model.cpp
new file mode 100644
index 0000000000..7db52f5e8b
--- /dev/null
+++ b/src/cpp/src/text2image/models/sd3_transformer_2d_model.cpp
@@ -0,0 +1,132 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/text2image/sd3_transformer_2d_model.hpp"
+
+#include <fstream>
+
+#include "json_utils.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace genai {
+
+SD3Transformer2DModel::Config::Config(const std::filesystem::path& config_path) {
+    std::ifstream file(config_path);
+    OPENVINO_ASSERT(file.is_open(), "Failed to open ", config_path);
+
+    nlohmann::json data = nlohmann::json::parse(file);
+    using utils::read_json_param;
+
+    read_json_param(data, "sample_size", sample_size);
+    read_json_param(data, "patch_size", patch_size);
+    read_json_param(data, "in_channels", in_channels);
+    read_json_param(data, "num_layers", num_layers);
+    read_json_param(data, "attention_head_dim", attention_head_dim);
+    read_json_param(data, "num_attention_heads", num_attention_heads);
+    read_json_param(data, "joint_attention_dim", joint_attention_dim);
+    read_json_param(data, "caption_projection_dim", caption_projection_dim);
+    read_json_param(data, "pooled_projection_dim", pooled_projection_dim);
+    read_json_param(data, "out_channels", out_channels);
+    read_json_param(data, "pos_embed_max_size", pos_embed_max_size);
+
+    file.close();
+
+    // block_out_channels should be read from VAE encoder / decoder config to compute proper m_vae_scale_factor
+    std::filesystem::path vae_config_path = config_path.parent_path().parent_path() / "vae_decoder" / "config.json";
+    file.open(vae_config_path);
+    OPENVINO_ASSERT(file.is_open(), "Failed to open ", vae_config_path);
+    data = nlohmann::json::parse(file);
+    read_json_param(data, "block_out_channels", block_out_channels);
+}
+
+SD3Transformer2DModel::SD3Transformer2DModel(const std::filesystem::path& root_dir)
+    : m_config(root_dir / "config.json") {
+    m_model = utils::singleton_core().read_model((root_dir / "openvino_model.xml").string());
+
+    // compute VAE scale factor
+    m_vae_scale_factor = std::pow(2, m_config.block_out_channels.size() - 1);
+}
+
+SD3Transformer2DModel::SD3Transformer2DModel(const std::filesystem::path& root_dir,
+                                             const std::string& device,
+                                             const ov::AnyMap& properties)
+    : SD3Transformer2DModel(root_dir) {
+    compile(device, properties);
+}
+
+SD3Transformer2DModel::SD3Transformer2DModel(const SD3Transformer2DModel&) = default;
+
+const SD3Transformer2DModel::Config& SD3Transformer2DModel::get_config() const {
+    return m_config;
+}
+
+SD3Transformer2DModel& SD3Transformer2DModel::reshape(int batch_size,
+                                                      int height,
+                                                      int width,
+                                                      int tokenizer_model_max_length) {
+    OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot reshape already compiled model");
+
+    // hidden_states=latent_model_input,
+    // timestep=timestep,
+    // encoder_hidden_states=prompt_embeds,
+    // pooled_projections=pooled_prompt_embeds,
+
+    height /= m_vae_scale_factor;
+    width /= m_vae_scale_factor;
+
+    std::map<std::string, ov::PartialShape> name_to_shape;
+
+    for (auto&& input : m_model->inputs()) {
+        std::string input_name = input.get_any_name();
+        name_to_shape[input_name] = input.get_partial_shape();
+        if (input_name == "timestep") {
+            name_to_shape[input_name][0] = batch_size;
+        } else if (input_name == "hidden_states") {
+            name_to_shape[input_name] = {batch_size, name_to_shape[input_name][1], height, width};
+        } else if (input_name == "encoder_hidden_states") {
+            name_to_shape[input_name][0] = batch_size;
+            name_to_shape[input_name][1] =
+                tokenizer_model_max_length *
+                2;  // x2 is necessary because of the concatenation of prompt_embeds and t5_prompt_embeds
+        } else if (input_name == "pooled_projections") {
+            name_to_shape[input_name][0] = batch_size;
+        }
+    }
+
+    m_model->reshape(name_to_shape);
+
+    return *this;
+}
+
+SD3Transformer2DModel& SD3Transformer2DModel::compile(const std::string& device, const ov::AnyMap& properties) {
+    OPENVINO_ASSERT(m_model, "Model has been already compiled. Cannot re-compile already compiled model");
+    ov::CompiledModel compiled_model = utils::singleton_core().compile_model(m_model, device, properties);
+    m_request = compiled_model.create_infer_request();
+    // release the original model
+    m_model.reset();
+
+    return *this;
+}
+
+void SD3Transformer2DModel::set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) {
+    OPENVINO_ASSERT(m_request, "Transformer model must be compiled first");
+    m_request.set_tensor(tensor_name, encoder_hidden_states);
+}
+
+size_t SD3Transformer2DModel::get_vae_scale_factor() const {
+    return m_vae_scale_factor;
+}
+
+ov::Tensor SD3Transformer2DModel::infer(const ov::Tensor latent_model_input, const ov::Tensor timestep) {
+    OPENVINO_ASSERT(m_request, "Transformer model must be compiled first. Cannot infer non-compiled model");
+
+    m_request.set_tensor("hidden_states", latent_model_input);
+    m_request.set_tensor("timestep", timestep);
+    m_request.infer();
+
+    return m_request.get_output_tensor();
+}
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/src/text2image/numpy_utils.cpp b/src/cpp/src/text2image/numpy_utils.cpp
index 9554681820..b263573e47 100644
--- a/src/cpp/src/text2image/numpy_utils.cpp
+++ b/src/cpp/src/text2image/numpy_utils.cpp
@@ -74,6 +74,85 @@ std::vector<float> interp(const std::vector<std::int64_t>& x, const std::vector<
     return interp_res;
 }
 
+void concat_3d_by_rows(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2) {
+    OPENVINO_ASSERT(
+            shape_1[0] == shape_2[0] && shape_1[1] == shape_2[1],
+            "Tensors for concatenation must have the same dimensions");
+
+    for (size_t i = 0; i < shape_1[0]; ++i) {
+        for (size_t j = 0; j < shape_1[1]; ++j) {
+            size_t offset_1 = (i * shape_1[1] + j) * shape_1[2];
+            size_t offset_2 = (i * shape_2[1] + j) * shape_2[2];
+
+            size_t step = (i * shape_1[1] + j) * (shape_1[2] + shape_2[2]);
+
+            std::memcpy(res + step, data_1 + offset_1, shape_1[2] * sizeof(float));
+            std::memcpy(res + step + shape_1[2],
+                        data_2 + offset_2,
+                        shape_2[2] * sizeof(float));
+        }
+    }
+}
+
+void concat_2d_by_rows(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2) {
+    OPENVINO_ASSERT(
+            shape_1[0] == shape_2[0],
+            "Tensors for concatenation must have the same dimensions");
+
+        for (size_t i = 0; i < shape_1[0]; ++i) {
+            size_t offset_1 = i * shape_1[1];
+            size_t offset_2 = i * shape_2[1];
+
+            size_t step = i * (shape_1[1] + shape_2[1]);
+
+            std::memcpy(res + step, data_1 + offset_1, shape_1[1] * sizeof(float));
+            std::memcpy(res + step + shape_1[1],
+                        data_2 + offset_2,
+                        shape_2[1] * sizeof(float));
+        }
+}
+
+void concat_3d_by_cols(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2) {
+    OPENVINO_ASSERT(
+            shape_1[0] == shape_2[0] && shape_1[2] == shape_2[2],
+            "Tensors for concatenation must have the same dimensions");
+    
+    for (size_t i = 0; i < shape_1[0]; ++i) {
+        size_t shift_1 = i * shape_1[1] * shape_1[2];
+        size_t shift_2 = i * shape_2[1] * shape_2[2];
+
+        size_t step = shift_1 + shift_2;
+
+        std::memcpy(res + step, data_1 + shift_1, shape_1[1] * shape_1[2] * sizeof(float));
+        std::memcpy(res + step + shape_1[1] * shape_1[2], data_2 + shift_2, shape_2[1] * shape_2[2] * sizeof(float));
+    }
+}
+
+void concat_3d_by_channels(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2) {
+    OPENVINO_ASSERT(
+            shape_1[1] == shape_2[1] && shape_1[2] == shape_2[2],
+            "Tensors for concatenation must have the same dimensions");
+    
+        size_t size_1 = shape_1[0] * shape_1[1] * shape_1[2];
+        size_t size_2 = shape_2[0] * shape_2[1] * shape_2[2];
+
+        std::memcpy(res, data_1, size_1 * sizeof(float));
+        std::memcpy(res + size_1, data_2, size_2 * sizeof(float));
+}
+
+void concat_2d_by_channels(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2) {
+    OPENVINO_ASSERT(
+            shape_1[1] == shape_2[1],
+            "Tensors for concatenation must have the same dimensions");
+    
+        size_t size_1 = shape_1[0] * shape_1[1];
+        size_t size_2 = shape_2[0] * shape_2[1];
+
+        std::memcpy(res, data_1, size_1 * sizeof(float));
+        std::memcpy(res + size_1, data_2, size_2 * sizeof(float));
+}
+
+
 } // namespace ov
 } // namespace genai
 } // namespace numpy_utils
diff --git a/src/cpp/src/text2image/numpy_utils.hpp b/src/cpp/src/text2image/numpy_utils.hpp
index d6144eeb99..6c8c6da5ad 100644
--- a/src/cpp/src/text2image/numpy_utils.hpp
+++ b/src/cpp/src/text2image/numpy_utils.hpp
@@ -6,10 +6,13 @@
 #include <vector>
 #include <cstddef>
 #include <cstdint>
+#include <cstring>
 #include <numeric>
 #include <algorithm>
 #include <cmath>
 
+#include "openvino/core/shape.hpp"
+
 namespace ov {
 namespace genai {
 namespace numpy_utils {
@@ -42,6 +45,12 @@ void rescale_zero_terminal_snr(std::vector<float>& betas);
 // np.interp(...) implementation
 std::vector<float> interp(const std::vector<std::int64_t>& x, const std::vector<size_t>& xp, const std::vector<float>& fp);
 
+void concat_3d_by_rows(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2);
+void concat_3d_by_cols(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2);
+void concat_3d_by_channels(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2);
+void concat_2d_by_rows(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2);
+void concat_2d_by_channels(const float* data_1, const float* data_2, float* res, const ov::Shape shape_1, const ov::Shape shape_2);
+
 } // namespace ov
 } // namespace genai
 } // namespace numpy_utils
diff --git a/src/cpp/src/text2image/schedulers/ddim.cpp b/src/cpp/src/text2image/schedulers/ddim.cpp
index eaeb210cd1..9b1367d84f 100644
--- a/src/cpp/src/text2image/schedulers/ddim.cpp
+++ b/src/cpp/src/text2image/schedulers/ddim.cpp
@@ -189,6 +189,10 @@ std::vector<int64_t> DDIMScheduler::get_timesteps() const {
     return m_timesteps;
 }
 
+std::vector<float> DDIMScheduler::get_float_timesteps() const {
+    OPENVINO_THROW("DDIMScheduler doesn't support float timesteps");
+}
+
 float DDIMScheduler::get_init_noise_sigma() const {
     return 1.0f;
 }
diff --git a/src/cpp/src/text2image/schedulers/ddim.hpp b/src/cpp/src/text2image/schedulers/ddim.hpp
index d0ab53d0f5..a3be88f9f3 100644
--- a/src/cpp/src/text2image/schedulers/ddim.hpp
+++ b/src/cpp/src/text2image/schedulers/ddim.hpp
@@ -39,6 +39,8 @@ class DDIMScheduler : public IScheduler {
 
     std::vector<std::int64_t> get_timesteps() const override;
 
+    std::vector<float> get_float_timesteps() const override;
+
     float get_init_noise_sigma() const override;
 
     void scale_model_input(ov::Tensor sample, size_t inference_step) override;
diff --git a/src/cpp/src/text2image/schedulers/euler_discrete.cpp b/src/cpp/src/text2image/schedulers/euler_discrete.cpp
index 6ac65177d8..ac4406ad7f 100644
--- a/src/cpp/src/text2image/schedulers/euler_discrete.cpp
+++ b/src/cpp/src/text2image/schedulers/euler_discrete.cpp
@@ -258,6 +258,10 @@ std::vector<int64_t> EulerDiscreteScheduler::get_timesteps() const {
     return m_timesteps;
 }
 
+std::vector<float> EulerDiscreteScheduler::get_float_timesteps() const {
+    OPENVINO_THROW("EulerDiscreteScheduler doesn't support float timesteps");
+}
+
 float EulerDiscreteScheduler::get_init_noise_sigma() const {
     float max_sigma = *std::max_element(m_sigmas.begin(), m_sigmas.end());
 
diff --git a/src/cpp/src/text2image/schedulers/euler_discrete.hpp b/src/cpp/src/text2image/schedulers/euler_discrete.hpp
index e6c826f739..21d0778479 100644
--- a/src/cpp/src/text2image/schedulers/euler_discrete.hpp
+++ b/src/cpp/src/text2image/schedulers/euler_discrete.hpp
@@ -41,6 +41,8 @@ class EulerDiscreteScheduler : public IScheduler {
 
     std::vector<std::int64_t> get_timesteps() const override;
 
+    std::vector<float> get_float_timesteps() const override;
+
     float get_init_noise_sigma() const override;
 
     void scale_model_input(ov::Tensor sample, size_t inference_step) override;
diff --git a/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.cpp b/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.cpp
new file mode 100644
index 0000000000..cc19ef490f
--- /dev/null
+++ b/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.cpp
@@ -0,0 +1,149 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "text2image/schedulers/flow_match_euler_discrete.hpp"
+
+#include <cassert>
+#include <fstream>
+#include <iterator>
+#include <random>
+
+#include "text2image/numpy_utils.hpp"
+#include "utils.hpp"
+
+namespace ov {
+namespace genai {
+
+FlowMatchEulerDiscreteScheduler::Config::Config(const std::filesystem::path& scheduler_config_path) {
+    std::ifstream file(scheduler_config_path);
+    OPENVINO_ASSERT(file.is_open(), "Failed to open ", scheduler_config_path);
+
+    nlohmann::json data = nlohmann::json::parse(file);
+    using utils::read_json_param;
+
+    read_json_param(data, "num_train_timesteps", num_train_timesteps);
+    read_json_param(data, "shift", shift);
+    read_json_param(data, "use_dynamic_shifting", use_dynamic_shifting);
+    read_json_param(data, "base_shift", base_shift);
+    read_json_param(data, "max_shift", max_shift);
+    read_json_param(data, "base_image_seq_len", base_image_seq_len);
+    read_json_param(data, "max_image_seq_len", max_image_seq_len);
+}
+
+FlowMatchEulerDiscreteScheduler::FlowMatchEulerDiscreteScheduler(const std::filesystem::path& scheduler_config_path)
+    : FlowMatchEulerDiscreteScheduler(Config(scheduler_config_path)) {}
+
+FlowMatchEulerDiscreteScheduler::FlowMatchEulerDiscreteScheduler(const Config& scheduler_config)
+    : m_config(scheduler_config) {
+    using numpy_utils::linspace;
+
+    int32_t num_train_timesteps = m_config.num_train_timesteps;
+    float shift = m_config.shift;
+
+    auto linspaced = linspace<float>(1.0f, static_cast<float>(num_train_timesteps), num_train_timesteps, true);
+    for (auto it = linspaced.rbegin(); it != linspaced.rend(); ++it) {
+        m_timesteps.push_back(*it);
+    }
+
+    std::transform(m_timesteps.begin(),
+                   m_timesteps.end(),
+                   std::back_inserter(m_sigmas),
+                   [num_train_timesteps](float x) {
+                       return x / num_train_timesteps;
+                   });
+
+    if (!m_config.use_dynamic_shifting) {
+        std::transform(m_sigmas.begin(), m_sigmas.end(), m_sigmas.begin(), [shift](float x) {
+            return shift * x / (1 + (shift - 1) * x);
+        });
+    }
+
+    for (size_t i = 0; i < m_timesteps.size(); ++i) {
+        m_timesteps[i] = m_sigmas[i] * num_train_timesteps;
+    }
+
+    m_step_index = -1, m_begin_index = -1;
+    m_sigma_max = m_sigmas[0], m_sigma_min = m_sigmas.back();
+}
+
+float FlowMatchEulerDiscreteScheduler::sigma_to_t(float sigma) {
+    return sigma * m_config.num_train_timesteps;
+}
+
+void FlowMatchEulerDiscreteScheduler::set_timesteps(size_t num_inference_steps) {
+    m_timesteps.clear();
+    m_sigmas.clear();
+
+    m_num_inference_steps = num_inference_steps;
+    int32_t num_train_timesteps = m_config.num_train_timesteps;
+    float shift = m_config.shift;
+
+    using numpy_utils::linspace;
+    m_timesteps = linspace<float>(sigma_to_t(m_sigma_max), sigma_to_t(m_sigma_min), m_num_inference_steps, true);
+
+    for (const float& i : m_timesteps) {
+        m_sigmas.push_back(i / num_train_timesteps);
+    }
+
+    OPENVINO_ASSERT(!m_config.use_dynamic_shifting,
+                    "Parameter 'use_dynamic_shifting' is not supported. Please, add support.");
+
+    for (size_t i = 0; i < m_sigmas.size(); ++i) {
+        m_sigmas[i] = shift * m_sigmas[i] / (1 + (shift - 1) * m_sigmas[i]);
+        m_timesteps[i] = m_sigmas[i] * num_train_timesteps;
+    }
+    m_sigmas.push_back(0);
+
+    m_step_index = -1, m_begin_index = -1;
+}
+
+std::map<std::string, ov::Tensor> FlowMatchEulerDiscreteScheduler::step(ov::Tensor noise_pred,
+                                                                        ov::Tensor latents,
+                                                                        size_t inference_step) {
+    // noise_pred - model_output
+    // latents - sample
+    // inference_step
+
+    float* model_output_data = noise_pred.data<float>();
+    float* sample_data = latents.data<float>();
+
+    if (m_step_index == -1)
+        init_step_index();
+
+    ov::Tensor prev_sample(latents.get_element_type(), latents.get_shape());
+    float* prev_sample_data = prev_sample.data<float>();
+
+    float sigma_diff = m_sigmas[m_step_index + 1] - m_sigmas[m_step_index];
+
+    for (size_t i = 0; i < prev_sample.get_size(); ++i) {
+        prev_sample_data[i] = sample_data[i] + sigma_diff * model_output_data[i];
+    }
+
+    m_step_index++;
+
+    return {{"latent", prev_sample}};
+}
+
+std::vector<std::int64_t> FlowMatchEulerDiscreteScheduler::get_timesteps() const {
+    OPENVINO_THROW("FlowMatchEulerDiscreteScheduler doesn't support int timesteps");
+}
+
+std::vector<float> FlowMatchEulerDiscreteScheduler::get_float_timesteps() const {
+    return m_timesteps;
+}
+
+float FlowMatchEulerDiscreteScheduler::get_init_noise_sigma() const {
+    return 1.0f;
+}
+
+void FlowMatchEulerDiscreteScheduler::scale_model_input(ov::Tensor sample, size_t inference_step) {
+    return;
+}
+
+void FlowMatchEulerDiscreteScheduler::init_step_index() {
+    // TODO: support index_for_timestep method
+    m_step_index = (m_begin_index == -1) ? 0 : m_begin_index;
+}
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.hpp b/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.hpp
new file mode 100644
index 0000000000..98a068bf01
--- /dev/null
+++ b/src/cpp/src/text2image/schedulers/flow_match_euler_discrete.hpp
@@ -0,0 +1,60 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#pragma once
+
+#include <list>
+#include <string>
+
+#include "text2image/schedulers/types.hpp"
+#include "text2image/schedulers/ischeduler.hpp"
+
+namespace ov {
+namespace genai {
+
+class FlowMatchEulerDiscreteScheduler : public IScheduler {
+public:
+    struct Config {
+        int32_t num_train_timesteps = 1000;
+        float shift = 1.0f;
+        bool use_dynamic_shifting = false;
+        float base_shift = 0.5f, max_shift = 1.15f;
+        int32_t base_image_seq_len = 256, max_image_seq_len = 4096;
+
+        Config() = default;
+        explicit Config(const std::filesystem::path& scheduler_config_path);
+    };
+
+    explicit FlowMatchEulerDiscreteScheduler(const std::filesystem::path& scheduler_config_path);
+    explicit FlowMatchEulerDiscreteScheduler(const Config& scheduler_config);
+
+    void set_timesteps(size_t num_inference_steps) override;
+
+    std::vector<int64_t> get_timesteps() const override;
+
+    std::vector<float> get_float_timesteps() const override;
+
+    float get_init_noise_sigma() const override;
+
+    void scale_model_input(ov::Tensor sample, size_t inference_step) override;
+
+    std::map<std::string, ov::Tensor> step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step) override;
+
+    void set_begin_index(size_t begin_index);
+
+private:
+    Config m_config;
+
+    std::vector<float> m_sigmas;
+    std::vector<float> m_timesteps;
+
+    float m_sigma_min, m_sigma_max;
+    size_t m_step_index, m_begin_index;
+    size_t m_num_inference_steps;
+
+    void init_step_index();
+    float sigma_to_t(float simga);
+};
+
+} // namespace genai
+} // namespace ov
diff --git a/src/cpp/src/text2image/schedulers/ischeduler.hpp b/src/cpp/src/text2image/schedulers/ischeduler.hpp
index 51039765bf..d625265df2 100644
--- a/src/cpp/src/text2image/schedulers/ischeduler.hpp
+++ b/src/cpp/src/text2image/schedulers/ischeduler.hpp
@@ -17,6 +17,8 @@ class IScheduler : public Text2ImagePipeline::Scheduler {
 
     virtual std::vector<std::int64_t> get_timesteps() const = 0;
 
+    virtual std::vector<float> get_float_timesteps() const = 0;
+
     virtual float get_init_noise_sigma() const = 0;
 
     virtual void scale_model_input(ov::Tensor sample, size_t inference_step) = 0;
diff --git a/src/cpp/src/text2image/schedulers/lcm.cpp b/src/cpp/src/text2image/schedulers/lcm.cpp
index c4f0f072a1..3dcc10d6fd 100644
--- a/src/cpp/src/text2image/schedulers/lcm.cpp
+++ b/src/cpp/src/text2image/schedulers/lcm.cpp
@@ -208,6 +208,10 @@ std::vector<int64_t> LCMScheduler::get_timesteps() const {
     return m_timesteps;
 }
 
+std::vector<float> LCMScheduler::get_float_timesteps() const {
+    OPENVINO_THROW("LCMScheduler doesn't support float timesteps");
+}
+
 float LCMScheduler::get_init_noise_sigma() const {
     return 1.0f;
 }
diff --git a/src/cpp/src/text2image/schedulers/lcm.hpp b/src/cpp/src/text2image/schedulers/lcm.hpp
index 13b9d9406c..0353d1af0c 100644
--- a/src/cpp/src/text2image/schedulers/lcm.hpp
+++ b/src/cpp/src/text2image/schedulers/lcm.hpp
@@ -46,6 +46,8 @@ class LCMScheduler : public IScheduler {
 
     std::vector<std::int64_t> get_timesteps() const override;
 
+    std::vector<float> get_float_timesteps() const override;
+
     float get_init_noise_sigma() const override;
 
     void scale_model_input(ov::Tensor sample, size_t inference_step) override;
diff --git a/src/cpp/src/text2image/schedulers/lms_discrete.cpp b/src/cpp/src/text2image/schedulers/lms_discrete.cpp
index dbb1358373..e9f1fb5ae7 100644
--- a/src/cpp/src/text2image/schedulers/lms_discrete.cpp
+++ b/src/cpp/src/text2image/schedulers/lms_discrete.cpp
@@ -187,6 +187,10 @@ std::vector<int64_t> LMSDiscreteScheduler::get_timesteps() const {
     return m_timesteps;
 }
 
+std::vector<float> LMSDiscreteScheduler::get_float_timesteps() const {
+    OPENVINO_THROW("LMSDiscreteScheduler doesn't support float timesteps");
+}
+
 std::map<std::string, ov::Tensor> LMSDiscreteScheduler::step(ov::Tensor noise_pred, ov::Tensor latents, size_t inference_step) {
     const float sigma = m_sigmas[inference_step];
 
diff --git a/src/cpp/src/text2image/schedulers/lms_discrete.hpp b/src/cpp/src/text2image/schedulers/lms_discrete.hpp
index 6c0a61a777..f87f1d8a91 100644
--- a/src/cpp/src/text2image/schedulers/lms_discrete.hpp
+++ b/src/cpp/src/text2image/schedulers/lms_discrete.hpp
@@ -35,6 +35,8 @@ class LMSDiscreteScheduler : public IScheduler {
 
     std::vector<std::int64_t> get_timesteps() const override;
 
+    std::vector<float> get_float_timesteps() const override;
+
     float get_init_noise_sigma() const override;
 
     void scale_model_input(ov::Tensor sample, size_t inference_step) override;
diff --git a/src/cpp/src/text2image/schedulers/scheduler.cpp b/src/cpp/src/text2image/schedulers/scheduler.cpp
index 2ee4c2adac..c15aad1626 100644
--- a/src/cpp/src/text2image/schedulers/scheduler.cpp
+++ b/src/cpp/src/text2image/schedulers/scheduler.cpp
@@ -11,6 +11,7 @@
 #include "text2image/schedulers/lms_discrete.hpp"
 #include "text2image/schedulers/ddim.hpp"
 #include "text2image/schedulers/euler_discrete.hpp"
+#include "text2image/schedulers/flow_match_euler_discrete.hpp"
 
 namespace ov {
 namespace genai {
@@ -38,6 +39,8 @@ std::shared_ptr<Text2ImagePipeline::Scheduler> Text2ImagePipeline::Scheduler::fr
         scheduler = std::make_shared<DDIMScheduler>(scheduler_config_path);
     } else if (scheduler_type == Scheduler::Type::EULER_DISCRETE) {
         scheduler = std::make_shared<EulerDiscreteScheduler>(scheduler_config_path);
+    } else if (scheduler_type == Scheduler::Type::FLOW_MATCH_EULER_DISCRETE) {
+        scheduler = std::make_shared<FlowMatchEulerDiscreteScheduler>(scheduler_config_path);
     } else {
         OPENVINO_THROW("Unsupported scheduler type '", scheduler_type, ". Please, manually create scheduler via supported one");
     }
diff --git a/src/cpp/src/text2image/schedulers/types.cpp b/src/cpp/src/text2image/schedulers/types.cpp
index 0ca970f359..aed46e7d70 100644
--- a/src/cpp/src/text2image/schedulers/types.cpp
+++ b/src/cpp/src/text2image/schedulers/types.cpp
@@ -51,6 +51,8 @@ void read_json_param(const nlohmann::json& data, const std::string& name, Text2I
             param = Text2ImagePipeline::Scheduler::LMS_DISCRETE;
         else if (scheduler_type_str == "EulerDiscreteScheduler")
             param = Text2ImagePipeline::Scheduler::EULER_DISCRETE;
+        else if (scheduler_type_str == "FlowMatchEulerDiscreteScheduler")
+            param = Text2ImagePipeline::Scheduler::FLOW_MATCH_EULER_DISCRETE;
         else if (!scheduler_type_str.empty()) {
             OPENVINO_THROW("Unsupported value for 'prediction_type' ", scheduler_type_str);
         }
diff --git a/src/cpp/src/text2image/stable_diffusion_3_pipeline.hpp b/src/cpp/src/text2image/stable_diffusion_3_pipeline.hpp
new file mode 100644
index 0000000000..8999f95306
--- /dev/null
+++ b/src/cpp/src/text2image/stable_diffusion_3_pipeline.hpp
@@ -0,0 +1,618 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include <cassert>
+#include <ctime>
+
+#include "text2image/diffusion_pipeline.hpp"
+#include "text2image/numpy_utils.hpp"
+#include "utils.hpp"
+
+namespace {
+
+// src - input tensor with data for padding
+// res - zeros tonsor with target shape
+void padding_right(const float* src, float* res, const ov::Shape src_size, const ov::Shape res_size) {
+    OPENVINO_ASSERT(src_size[0] == res_size[0] && src_size[1] == res_size[1],
+                    "Tensors for padding_right must have the same dimensions");
+
+    for (size_t i = 0; i < res_size[0]; ++i) {
+        for (size_t j = 0; j < res_size[1]; ++j) {
+            size_t offset_1 = (i * res_size[1] + j) * res_size[2];
+            size_t offset_2 = (i * src_size[1] + j) * src_size[2];
+
+            std::memcpy(res + offset_1, src + offset_2, src_size[2] * sizeof(float));
+        }
+    }
+}
+
+ov::Tensor tensor_batch_copy(const ov::Tensor input, const size_t num_images_per_prompt, size_t batch_size_multiplier) {
+    ov::Shape repeated_shape = input.get_shape();
+    repeated_shape[0] *= num_images_per_prompt;
+    ov::Tensor tensor_repeated(input.get_element_type(), repeated_shape);
+
+    for (size_t n = 0; n < num_images_per_prompt; ++n) {
+        batch_copy(input, tensor_repeated, 0, n);
+    }
+
+    return tensor_repeated;
+}
+
+ov::Tensor split_2d_by_batch(const ov::Tensor input, size_t batch_num) {
+    ov::Tensor result(input.get_element_type(), {1, input.get_shape()[1]});
+
+    size_t shift = batch_num * input.get_shape()[1];
+    std::memcpy(result.data<float>(), input.data<float>() + shift, result.get_shape()[1] * sizeof(float));
+
+    return result;
+}
+
+ov::Tensor split_3d_by_batch(const ov::Tensor input, size_t batch_num) {
+    ov::Tensor result(input.get_element_type(), {1, input.get_shape()[1], input.get_shape()[2]});
+
+    size_t shift = batch_num * input.get_shape()[1] * input.get_shape()[2];
+    std::memcpy(result.data<float>(),
+                input.data<float>() + shift,
+                result.get_shape()[1] * input.get_shape()[2] * sizeof(float));
+
+    return result;
+}
+
+}  // namespace
+
+namespace ov {
+namespace genai {
+
+class Text2ImagePipeline::StableDiffusion3Pipeline : public Text2ImagePipeline::DiffusionPipeline {
+public:
+    explicit StableDiffusion3Pipeline(const std::filesystem::path& root_dir) {
+        const std::filesystem::path model_index_path = root_dir / "model_index.json";
+        std::ifstream file(model_index_path);
+        OPENVINO_ASSERT(file.is_open(), "Failed to open ", model_index_path);
+
+        nlohmann::json data = nlohmann::json::parse(file);
+        using utils::read_json_param;
+
+        set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
+
+        const std::string text_encoder = data["text_encoder"][1].get<std::string>();
+        if (text_encoder == "CLIPTextModelWithProjection") {
+            m_clip_text_encoder_1 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder");
+        } else {
+            OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
+        }
+
+        const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
+        if (text_encoder_2 == "CLIPTextModelWithProjection") {
+            m_clip_text_encoder_2 = std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2");
+        } else {
+            OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
+        }
+
+        // TODO:
+        // const std::string text_encoder_3 = data["text_encoder_3"][1].get<std::string>();
+        // if (text_encoder_2 == "T5EncoderModel") {
+        //     m_t5_encoder_model = std::make_shared<T5EncoderModel>(root_dir + "/text_encoder_3");
+        // } else {
+        //     OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
+        // }
+
+        const std::string vae = data["vae"][1].get<std::string>();
+        if (vae == "AutoencoderKL") {
+            m_vae_decoder = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder");
+        } else {
+            OPENVINO_THROW("Unsupported '", vae, "' VAE decoder type");
+        }
+
+        const std::string transformer = data["transformer"][1].get<std::string>();
+        if (transformer == "SD3Transformer2DModel") {
+            m_transformer = std::make_shared<SD3Transformer2DModel>(root_dir / "transformer");
+        } else {
+            OPENVINO_THROW("Unsupported '", transformer, "'Transformer type");
+        }
+
+        // initialize generation config
+        initialize_generation_config(data["_class_name"].get<std::string>());
+    }
+
+    StableDiffusion3Pipeline(const std::filesystem::path& root_dir,
+                             const std::string& device,
+                             const ov::AnyMap& properties) {
+        const std::filesystem::path model_index_path = root_dir / "model_index.json";
+        std::ifstream file(model_index_path);
+        OPENVINO_ASSERT(file.is_open(), "Failed to open ", model_index_path);
+
+        nlohmann::json data = nlohmann::json::parse(file);
+        using utils::read_json_param;
+
+        set_scheduler(Scheduler::from_config(root_dir / "scheduler/scheduler_config.json"));
+
+        const std::string text_encoder = data["text_encoder"][1].get<std::string>();
+        if (text_encoder == "CLIPTextModelWithProjection") {
+            m_clip_text_encoder_1 =
+                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder", device, properties);
+        } else {
+            OPENVINO_THROW("Unsupported '", text_encoder, "' text encoder type");
+        }
+
+        const std::string text_encoder_2 = data["text_encoder_2"][1].get<std::string>();
+        if (text_encoder_2 == "CLIPTextModelWithProjection") {
+            m_clip_text_encoder_2 =
+                std::make_shared<CLIPTextModelWithProjection>(root_dir / "text_encoder_2", device, properties);
+        } else {
+            OPENVINO_THROW("Unsupported '", text_encoder_2, "' text encoder type");
+        }
+
+        // TODO: text_encoder_3
+
+        const std::string vae = data["vae"][1].get<std::string>();
+        if (vae == "AutoencoderKL") {
+            m_vae_decoder = std::make_shared<AutoencoderKL>(root_dir / "vae_decoder", device, properties);
+        } else {
+            OPENVINO_THROW("Unsupported '", vae, "' VAE decoder type");
+        }
+
+        const std::string transformer = data["transformer"][1].get<std::string>();
+        if (transformer == "SD3Transformer2DModel") {
+            m_transformer = std::make_shared<SD3Transformer2DModel>(root_dir / "transformer", device, properties);
+        } else {
+            OPENVINO_THROW("Unsupported '", transformer, "'Transformer type");
+        }
+
+        // initialize generation config
+        initialize_generation_config(data["_class_name"].get<std::string>());
+    }
+
+    StableDiffusion3Pipeline(const CLIPTextModelWithProjection& clip_text_model_1,
+                             const CLIPTextModelWithProjection& clip_text_model_2,
+                             const SD3Transformer2DModel& transformer,
+                             const AutoencoderKL& vae_decoder)
+        : m_clip_text_encoder_1(std::make_shared<CLIPTextModelWithProjection>(clip_text_model_1)),
+          m_clip_text_encoder_2(std::make_shared<CLIPTextModelWithProjection>(clip_text_model_2)),
+          m_vae_decoder(std::make_shared<AutoencoderKL>(vae_decoder)),
+          m_transformer(std::make_shared<SD3Transformer2DModel>(transformer)) {}
+
+    void reshape(const int num_images_per_prompt,
+                 const int height,
+                 const int width,
+                 const float guidance_scale) override {
+        check_image_size(height, width);
+
+        const size_t batch_size_multiplier =
+            do_classifier_free_guidance(guidance_scale) ? 2 : 1;  // Transformer accepts 2x batch in case of CFG
+        m_clip_text_encoder_1->reshape(batch_size_multiplier);
+        m_clip_text_encoder_2->reshape(batch_size_multiplier);
+        m_transformer->reshape(num_images_per_prompt * batch_size_multiplier,
+                               height,
+                               width,
+                               m_clip_text_encoder_1->get_config().max_position_embeddings);
+        m_vae_decoder->reshape(num_images_per_prompt, height, width);
+    }
+
+    void compile(const std::string& device, const ov::AnyMap& properties) override {
+        m_clip_text_encoder_1->compile(device, properties);
+        m_clip_text_encoder_2->compile(device, properties);
+        m_vae_decoder->compile(device, properties);
+        m_transformer->compile(device, properties);
+    }
+
+    ov::Tensor generate(const std::string& positive_prompt, const ov::AnyMap& properties) override {
+        using namespace numpy_utils;
+        GenerationConfig generation_config = m_generation_config;
+        generation_config.update_generation_config(properties);
+
+        const auto& transformer_config = m_transformer->get_config();
+        const size_t batch_size_multiplier = do_classifier_free_guidance(generation_config.guidance_scale)
+                                                 ? 2
+                                                 : 1;  // Transformer accepts 2x batch in case of CFG
+
+        const size_t vae_scale_factor = m_transformer->get_vae_scale_factor();
+
+        if (generation_config.height < 0)
+            generation_config.height = transformer_config.sample_size * vae_scale_factor;
+        if (generation_config.width < 0)
+            generation_config.width = transformer_config.sample_size * vae_scale_factor;
+
+        check_inputs(generation_config);
+
+        if (generation_config.random_generator == nullptr) {
+            uint32_t seed = time(NULL);
+            generation_config.random_generator = std::make_shared<CppStdGenerator>(seed);
+        }
+
+        // Input tensors for transformer model
+        ov::Tensor prompt_embeds_inp, pooled_prompt_embeds_inp;
+
+        // 1. Encode positive prompt:
+        std::string prompt_2_str =
+            generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt;
+        std::string prompt_3_str =
+            generation_config.prompt_3 != std::nullopt ? *generation_config.prompt_3 : positive_prompt;
+
+        std::string negative_prompt_1_str = generation_config.negative_prompt;
+        std::string negative_prompt_2_str = generation_config.negative_prompt_2 != std::nullopt
+                                                ? *generation_config.negative_prompt_2
+                                                : negative_prompt_1_str;
+        std::string negative_prompt_3_str = generation_config.negative_prompt_3 != std::nullopt
+                                                ? *generation_config.negative_prompt_3
+                                                : negative_prompt_1_str;
+
+        // text_encoder_1_output - stores positive and negative pooled_prompt_embeds
+        ov::Tensor text_encoder_1_output =
+            m_clip_text_encoder_1->infer(positive_prompt,
+                                         negative_prompt_1_str,
+                                         do_classifier_free_guidance(generation_config.guidance_scale));
+
+        // get positive pooled_prompt_embed_out
+        ov::Tensor pooled_prompt_embed_out = split_2d_by_batch(text_encoder_1_output, 1);
+
+        // text_encoder_1_hidden_state - stores positive and negative prompt_embeds
+        size_t idx_hidden_state_1 = m_clip_text_encoder_1->get_config().num_hidden_layers + 1;
+        ov::Tensor text_encoder_1_hidden_state = m_clip_text_encoder_1->get_output_tensor(idx_hidden_state_1);
+        // get positive prompt_embed_out
+        ov::Tensor prompt_embed_out = split_3d_by_batch(text_encoder_1_hidden_state, 1);
+
+        // text_encoder_2_output - stores positive and negative pooled_prompt_2_embeds
+        ov::Tensor text_encoder_2_output =
+            m_clip_text_encoder_2->infer(prompt_2_str,
+                                         negative_prompt_2_str,
+                                         do_classifier_free_guidance(generation_config.guidance_scale));
+
+        // get positive pooled_prompt_2_embed_out
+        ov::Tensor pooled_prompt_2_embed_out = split_2d_by_batch(text_encoder_2_output, 1);
+
+        // text_encoder_2_hidden_state - stores positive and negative prompt_2_embeds
+        size_t idx_hidden_state_2 = m_clip_text_encoder_2->get_config().num_hidden_layers + 1;
+        ov::Tensor text_encoder_2_hidden_state = m_clip_text_encoder_2->get_output_tensor(idx_hidden_state_2);
+        // get positive prompt_2_embed_out
+        ov::Tensor prompt_2_embed_out = split_3d_by_batch(text_encoder_2_hidden_state, 1);
+
+        ov::Tensor pooled_prompt_embed, prompt_embed, pooled_prompt_2_embed, prompt_2_embed;
+        if (generation_config.num_images_per_prompt == 1) {
+            pooled_prompt_embed = pooled_prompt_embed_out;
+            prompt_embed = prompt_embed_out;
+            pooled_prompt_2_embed = pooled_prompt_2_embed_out;
+            prompt_2_embed = prompt_2_embed_out;
+        } else {
+            pooled_prompt_embed = tensor_batch_copy(pooled_prompt_embed_out,
+                                                    generation_config.num_images_per_prompt,
+                                                    batch_size_multiplier);
+            prompt_embed =
+                tensor_batch_copy(prompt_embed_out, generation_config.num_images_per_prompt, batch_size_multiplier);
+            pooled_prompt_2_embed = tensor_batch_copy(pooled_prompt_2_embed_out,
+                                                      generation_config.num_images_per_prompt,
+                                                      batch_size_multiplier);
+            prompt_2_embed =
+                tensor_batch_copy(prompt_2_embed_out, generation_config.num_images_per_prompt, batch_size_multiplier);
+        }
+
+        // concatenate hidden_states from two encoders
+        ov::Shape pr_emb_shape = prompt_embed.get_shape();
+        ov::Shape pr_emb_2_shape = prompt_2_embed.get_shape();
+
+        ov::Shape clip_prompt_embeds_shape = {pr_emb_shape[0], pr_emb_shape[1], pr_emb_shape[2] + pr_emb_2_shape[2]};
+        ov::Tensor clip_prompt_embeds(prompt_embed.get_element_type(), clip_prompt_embeds_shape);
+
+        const float* pr_emb_1_data = prompt_embed.data<const float>();
+        const float* pr_emb_2_data = prompt_2_embed.data<const float>();
+        float* clip_prompt_embeds_data = clip_prompt_embeds.data<float>();
+
+        concat_3d_by_rows(pr_emb_1_data, pr_emb_2_data, clip_prompt_embeds_data, pr_emb_shape, pr_emb_2_shape);
+
+        // TODO: text_encoder_3
+        ov::Shape t5_prompt_embed_shape = {generation_config.num_images_per_prompt,
+                                           m_clip_text_encoder_1->get_config().max_position_embeddings,
+                                           transformer_config.joint_attention_dim};
+
+        std::vector<float> t5_prompt_embed(
+            t5_prompt_embed_shape[0] * t5_prompt_embed_shape[1] * t5_prompt_embed_shape[2],
+            0.0f);
+
+        // padding for clip_prompt_embeds
+        ov::Shape pad_embeds_shape = {clip_prompt_embeds_shape[0],
+                                      clip_prompt_embeds_shape[1],
+                                      t5_prompt_embed_shape[2]};
+
+        std::vector<float> pad_embeds(pad_embeds_shape[0] * pad_embeds_shape[1] * pad_embeds_shape[2], 0.0f);
+        padding_right(clip_prompt_embeds_data, pad_embeds.data(), clip_prompt_embeds_shape, pad_embeds_shape);
+
+        // prompt_embeds = torch.cat([pad_embeds, t5_prompt_embed], dim=-2)
+        ov::Shape prompt_embeds_shape = {pad_embeds_shape[0],
+                                         pad_embeds_shape[1] + t5_prompt_embed_shape[1],
+                                         pad_embeds_shape[2]};
+        ov::Tensor prompt_embeds(ov::element::f32, prompt_embeds_shape);
+        float* prompt_embeds_data = prompt_embeds.data<float>();
+        concat_3d_by_cols(pad_embeds.data(),
+                          t5_prompt_embed.data(),
+                          prompt_embeds_data,
+                          pad_embeds_shape,
+                          t5_prompt_embed_shape);
+
+        // pooled_prompt_embeds = torch.cat([pooled_prompt_embed, pooled_prompt_2_embed], dim=-1)
+        ov::Shape p_pr_emb_shape = pooled_prompt_embed.get_shape();
+        ov::Shape p_pr_emb_2_shape = pooled_prompt_2_embed.get_shape();
+
+        const float* pooled_prompt_embed_data = pooled_prompt_embed.data<float>();
+        const float* pooled_prompt_2_embed_data = pooled_prompt_2_embed.data<float>();
+
+        ov::Shape pooled_prompt_embeds_shape = {p_pr_emb_shape[0], p_pr_emb_shape[1] + p_pr_emb_2_shape[1]};
+        ov::Tensor pooled_prompt_embeds(ov::element::f32, pooled_prompt_embeds_shape);
+        float* pooled_prompt_embeds_data = pooled_prompt_embeds.data<float>();
+
+        concat_2d_by_rows(pooled_prompt_embed_data,
+                          pooled_prompt_2_embed_data,
+                          pooled_prompt_embeds_data,
+                          p_pr_emb_shape,
+                          p_pr_emb_2_shape);
+        // From steps above we'll use prompt_embeds and pooled_prompt_embeds tensors
+
+        if (do_classifier_free_guidance(generation_config.guidance_scale)) {
+            // 2. Encode negative prompt:
+
+            ov::Tensor negative_pooled_prompt_embed_out = split_2d_by_batch(text_encoder_1_output, 0);
+            ov::Tensor negative_prompt_embed_out = split_3d_by_batch(text_encoder_1_hidden_state, 0);
+            ov::Tensor negative_pooled_prompt_2_embed_out = split_2d_by_batch(text_encoder_2_output, 0);
+            ov::Tensor negative_prompt_2_embed_out = split_3d_by_batch(text_encoder_2_hidden_state, 0);
+
+            ov::Tensor negative_pooled_prompt_embed, negative_prompt_embed, negative_pooled_prompt_2_embed,
+                negative_prompt_2_embed;
+            if (generation_config.num_images_per_prompt == 1) {
+                negative_pooled_prompt_embed = negative_pooled_prompt_embed_out;
+                negative_prompt_embed = negative_prompt_embed_out;
+                negative_pooled_prompt_2_embed = negative_pooled_prompt_2_embed_out;
+                negative_prompt_2_embed = negative_prompt_2_embed_out;
+            } else {
+                negative_pooled_prompt_embed = tensor_batch_copy(negative_pooled_prompt_embed_out,
+                                                                 generation_config.num_images_per_prompt,
+                                                                 batch_size_multiplier);
+                negative_prompt_embed = tensor_batch_copy(negative_prompt_embed_out,
+                                                          generation_config.num_images_per_prompt,
+                                                          batch_size_multiplier);
+                negative_pooled_prompt_2_embed = tensor_batch_copy(negative_pooled_prompt_2_embed_out,
+                                                                   generation_config.num_images_per_prompt,
+                                                                   batch_size_multiplier);
+                negative_prompt_2_embed = tensor_batch_copy(negative_prompt_2_embed_out,
+                                                            generation_config.num_images_per_prompt,
+                                                            batch_size_multiplier);
+            }
+
+            // concatenate hidden_states from two encoders
+            ov::Shape n_pr_emb_1_shape = negative_prompt_embed.get_shape();
+            ov::Shape n_pr_emb_2_shape = negative_prompt_2_embed.get_shape();
+
+            ov::Shape neg_clip_prompt_embeds_shape = {n_pr_emb_1_shape[0],
+                                                      n_pr_emb_1_shape[1],
+                                                      n_pr_emb_1_shape[2] + n_pr_emb_2_shape[2]};
+            ov::Tensor neg_clip_prompt_embeds(prompt_embed.get_element_type(), neg_clip_prompt_embeds_shape);
+
+            const float* neg_pr_emb_1_data = negative_prompt_embed.data<const float>();
+            const float* neg_pr_emb_2_data = negative_prompt_2_embed.data<const float>();
+            float* neg_clip_prompt_embeds_data = neg_clip_prompt_embeds.data<float>();
+
+            concat_3d_by_rows(neg_pr_emb_1_data,
+                              neg_pr_emb_2_data,
+                              neg_clip_prompt_embeds_data,
+                              n_pr_emb_1_shape,
+                              n_pr_emb_2_shape);
+
+            std::vector<float> t5_neg_prompt_embed(
+                t5_prompt_embed_shape[0] * t5_prompt_embed_shape[1] * t5_prompt_embed_shape[2],
+                0.0f);
+
+            // padding for neg_clip_prompt_embeds
+            ov::Shape neg_pad_embeds_shape = {neg_clip_prompt_embeds_shape[0],
+                                              neg_clip_prompt_embeds_shape[1],
+                                              t5_prompt_embed_shape[2]};
+
+            std::vector<float> neg_pad_embeds(
+                neg_pad_embeds_shape[0] * neg_pad_embeds_shape[1] * neg_pad_embeds_shape[2],
+                0.0f);
+
+            padding_right(neg_clip_prompt_embeds_data,
+                          neg_pad_embeds.data(),
+                          neg_clip_prompt_embeds_shape,
+                          neg_pad_embeds_shape);
+
+            // negative_prompt_embeds = torch.cat([negative_clip_prompt_embeds, t5_negative_prompt_embed], dim=-2)
+            ov::Shape neg_prompt_embeds_shape = {neg_pad_embeds_shape[0],
+                                                 neg_pad_embeds_shape[1] + t5_prompt_embed_shape[1],
+                                                 neg_pad_embeds_shape[2]};
+            ov::Tensor neg_prompt_embeds(ov::element::f32, neg_prompt_embeds_shape);
+            float* neg_prompt_embeds_data = neg_prompt_embeds.data<float>();
+
+            concat_3d_by_cols(neg_pad_embeds.data(),
+                              t5_neg_prompt_embed.data(),
+                              neg_prompt_embeds_data,
+                              neg_pad_embeds_shape,
+                              t5_prompt_embed_shape);
+
+            // neg_pooled_prompt_embeds = torch.cat([negative_pooled_prompt_embed, negative_pooled_prompt_2_embed],
+            // dim=-1)
+            ov::Shape neg_pooled_pr_emb_shape = negative_pooled_prompt_embed.get_shape();
+            ov::Shape neg_pooled_pr_2_emb_shape = negative_pooled_prompt_2_embed.get_shape();
+
+            const float* neg_pooled_pr_emb_data = negative_pooled_prompt_embed.data<float>();
+            const float* neg_pooled_pr_2_emb_data = negative_pooled_prompt_2_embed.data<float>();
+
+            ov::Shape neg_pooled_prompt_embeds_shape = {neg_pooled_pr_emb_shape[0],
+                                                        neg_pooled_pr_emb_shape[1] + neg_pooled_pr_2_emb_shape[1]};
+            ov::Tensor neg_pooled_prompt_embeds(ov::element::f32, neg_pooled_prompt_embeds_shape);
+            float* neg_pooled_prompt_embeds_data = neg_pooled_prompt_embeds.data<float>();
+
+            concat_2d_by_rows(neg_pooled_pr_emb_data,
+                              neg_pooled_pr_2_emb_data,
+                              neg_pooled_prompt_embeds_data,
+                              neg_pooled_pr_emb_shape,
+                              neg_pooled_pr_2_emb_shape);
+            // From steps above we'll use neg_prompt_embeds and neg_pooled_prompt_embeds tensors
+
+            // Fill in transformer inputs: concat positive and negative prompt_embeds
+            ov::Shape prompt_embeds_inp_shape = {prompt_embeds_shape[0] + neg_prompt_embeds_shape[0],
+                                                 prompt_embeds_shape[1],
+                                                 prompt_embeds_shape[2]};
+            prompt_embeds_inp = ov::Tensor(ov::element::f32, prompt_embeds_inp_shape);
+            float* prompt_embeds_inp_data = prompt_embeds_inp.data<float>();
+            concat_3d_by_channels(neg_prompt_embeds_data,
+                                  prompt_embeds_data,
+                                  prompt_embeds_inp_data,
+                                  neg_prompt_embeds_shape,
+                                  prompt_embeds_shape);
+
+            ov::Shape pooled_prompt_embeds_inp_shape = {
+                neg_pooled_prompt_embeds_shape[0] + pooled_prompt_embeds_shape[0],
+                pooled_prompt_embeds_shape[1]};
+
+            pooled_prompt_embeds_inp = ov::Tensor(ov::element::f32, pooled_prompt_embeds_inp_shape);
+            float* pooled_prompt_embeds_input_data = pooled_prompt_embeds_inp.data<float>();
+            concat_2d_by_channels(neg_pooled_prompt_embeds_data,
+                                  pooled_prompt_embeds_data,
+                                  pooled_prompt_embeds_input_data,
+                                  neg_pooled_prompt_embeds_shape,
+                                  pooled_prompt_embeds_shape);
+        } else {
+            // Fill in transformer inputs
+            prompt_embeds_inp = prompt_embeds;
+            pooled_prompt_embeds_inp = pooled_prompt_embeds;
+        }
+
+        // 3. Prepare timesteps
+        m_scheduler->set_timesteps(generation_config.num_inference_steps);
+        std::vector<float> timesteps = m_scheduler->get_float_timesteps();
+
+        // 4. Set model inputs
+        m_transformer->set_hidden_states("encoder_hidden_states", prompt_embeds_inp);
+        m_transformer->set_hidden_states("pooled_projections", pooled_prompt_embeds_inp);
+
+        // 5. Prepare latent variables
+        size_t num_channels_latents = m_transformer->get_config().in_channels;
+        ov::Shape latent_shape{generation_config.num_images_per_prompt,
+                               num_channels_latents,
+                               generation_config.height / vae_scale_factor,
+                               generation_config.width / vae_scale_factor};
+
+        ov::Shape latent_shape_cfg = latent_shape;
+        latent_shape_cfg[0] *= batch_size_multiplier;
+
+        ov::Tensor latent(ov::element::f32, latent_shape), latent_cfg(ov::element::f32, latent_shape_cfg);
+        std::generate_n(latent.data<float>(), latent.get_size(), [&]() -> float {
+            return generation_config.random_generator->next() * m_scheduler->get_init_noise_sigma();
+        });
+
+        // 6. Denoising loop
+        ov::Tensor noisy_residual_tensor(ov::element::f32, {});
+        ov::Tensor timestep;
+
+        for (size_t inference_step = 0; inference_step < generation_config.num_inference_steps; ++inference_step) {
+            // concat the same latent twice along a batch dimension in case of CFG
+            if (batch_size_multiplier > 1) {
+                batch_copy(latent, latent_cfg, 0, 0, generation_config.num_images_per_prompt);
+                batch_copy(latent,
+                           latent_cfg,
+                           0,
+                           generation_config.num_images_per_prompt,
+                           generation_config.num_images_per_prompt);
+
+                size_t timestep_size = generation_config.num_images_per_prompt * batch_size_multiplier;
+                timestep = ov::Tensor(ov::element::f32, {timestep_size});
+                float* timestep_data = timestep.data<float>();
+                std::fill_n(timestep_data, timestep_size, timesteps[inference_step]);
+            } else {
+                // just assign to save memory copy
+                latent_cfg = latent;
+                timestep = ov::Tensor(ov::element::f32, {1}, &timesteps[inference_step]);
+            }
+
+            ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep);
+
+            ov::Shape noise_pred_shape = noise_pred_tensor.get_shape();
+            noise_pred_shape[0] /= batch_size_multiplier;
+            noisy_residual_tensor.set_shape(noise_pred_shape);
+
+            if (batch_size_multiplier > 1) {
+                // perform guidance
+                float* noisy_residual = noisy_residual_tensor.data<float>();
+                const float* noise_pred_uncond = noise_pred_tensor.data<const float>();
+                const float* noise_pred_text = noise_pred_uncond + noisy_residual_tensor.get_size();
+
+                for (size_t i = 0; i < noisy_residual_tensor.get_size(); ++i) {
+                    noisy_residual[i] = noise_pred_uncond[i] +
+                                        generation_config.guidance_scale * (noise_pred_text[i] - noise_pred_uncond[i]);
+                }
+            } else {
+                noisy_residual_tensor = noise_pred_tensor;
+            }
+
+            auto scheduler_step_result = m_scheduler->step(noisy_residual_tensor, latent, inference_step);
+            latent = scheduler_step_result["latent"];
+        }
+
+        float* latent_data = latent.data<float>();
+        for (size_t i = 0; i < latent.get_size(); ++i) {
+            latent_data[i] = (latent_data[i] / m_vae_decoder->get_config().scaling_factor) +
+                             m_vae_decoder->get_config().shift_factor;
+        }
+
+        return m_vae_decoder->infer(latent);
+    }
+
+private:
+    bool do_classifier_free_guidance(float guidance_scale) const {
+        return guidance_scale >= 1.0;
+    }
+
+    void initialize_generation_config(const std::string& class_name) override {
+        assert(m_transformer != nullptr);
+        assert(m_vae_decoder != nullptr);
+
+        const auto& transformer_config = m_transformer->get_config();
+        const size_t vae_scale_factor = m_transformer->get_vae_scale_factor();
+
+        m_generation_config.height = transformer_config.sample_size * vae_scale_factor;
+        m_generation_config.width = transformer_config.sample_size * vae_scale_factor;
+
+        if (class_name == "StableDiffusion3Pipeline") {
+            m_generation_config.guidance_scale = 7.0f;
+            m_generation_config.num_inference_steps = 28;
+        } else {
+            OPENVINO_THROW("Unsupported class_name '", class_name, "'. Please, contact OpenVINO GenAI developers");
+        }
+    }
+
+    void check_image_size(const int height, const int width) const override {
+        assert(m_transformer != nullptr);
+        const size_t vae_scale_factor = m_transformer->get_vae_scale_factor();
+        const size_t patch_size = m_transformer->get_config().patch_size;
+        OPENVINO_ASSERT((height % (vae_scale_factor * patch_size) == 0 || height < 0) &&
+                            (width % (vae_scale_factor * patch_size) == 0 || width < 0),
+                        "Both 'width' and 'height' must be divisible by",
+                        vae_scale_factor);
+    }
+
+    void check_inputs(const GenerationConfig& generation_config) const override {
+        check_image_size(generation_config.width, generation_config.height);
+
+        const bool is_classifier_free_guidance = do_classifier_free_guidance(generation_config.guidance_scale);
+        const char* const pipeline_name = "Stable Diffusion 3";
+
+        OPENVINO_ASSERT(
+            generation_config.prompt_3 == std::nullopt || generation_config.negative_prompt_3 == std::nullopt,
+            "T5Encoder is not currently supported, 'prompt_3' and 'negative_prompt_3' can't be used. Please, add "
+            "support.");
+        OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt.empty(),
+                        "Negative prompt is not used when guidance scale < 1.0");
+        OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt_2 == std::nullopt,
+                        "Negative prompt 2 is not used when guidance scale < 1.0");
+        OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt_3 == std::nullopt,
+                        "Negative prompt 3 is not used when guidance scale < 1.0");
+    }
+
+    std::shared_ptr<SD3Transformer2DModel> m_transformer;
+    std::shared_ptr<CLIPTextModelWithProjection> m_clip_text_encoder_1;
+    std::shared_ptr<CLIPTextModelWithProjection> m_clip_text_encoder_2;
+    // TODO:
+    // std::shared_ptr<T5EncoderModel> m_t5_encoder_model;
+    std::shared_ptr<AutoencoderKL> m_vae_decoder;
+};
+
+}  // namespace genai
+}  // namespace ov
diff --git a/src/cpp/src/text2image/stable_diffusion_pipeline.hpp b/src/cpp/src/text2image/stable_diffusion_pipeline.hpp
index ed1508534f..6d0624adce 100644
--- a/src/cpp/src/text2image/stable_diffusion_pipeline.hpp
+++ b/src/cpp/src/text2image/stable_diffusion_pipeline.hpp
@@ -295,8 +295,8 @@ class Text2ImagePipeline::StableDiffusionPipeline : public Text2ImagePipeline::D
         } else if (!is_classifier_free_guidance) {
             OPENVINO_ASSERT(generation_config.negative_prompt.empty(), "Negative prompt is not used when guidance scale < 1.0");
         }
-        OPENVINO_ASSERT(generation_config.negative_prompt_2.empty(), "Negative prompt 2 is not used by ", pipeline_name);
-        OPENVINO_ASSERT(generation_config.negative_prompt_3.empty(), "Negative prompt 3 is not used by ", pipeline_name);
+        OPENVINO_ASSERT(generation_config.negative_prompt_2 == std::nullopt, "Negative prompt 2 is not used by ", pipeline_name);
+        OPENVINO_ASSERT(generation_config.negative_prompt_3 == std::nullopt, "Negative prompt 3 is not used by ", pipeline_name);
     }
 
     std::shared_ptr<CLIPTextModel> m_clip_text_encoder;
diff --git a/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp
index 8a239f418f..8f8af97e52 100644
--- a/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp
+++ b/src/cpp/src/text2image/stable_diffusion_xl_pipeline.hpp
@@ -358,8 +358,8 @@ class Text2ImagePipeline::StableDiffusionXLPipeline : public Text2ImagePipeline:
 
         OPENVINO_ASSERT(generation_config.prompt_3 == std::nullopt, "Prompt 3 is not used by ", pipeline_name);
         OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt.empty(), "Negative prompt is not used when guidance scale < 1.0");
-        OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt_2.empty(), "Negative prompt 2 is not used when guidance scale < 1.0");
-        OPENVINO_ASSERT(generation_config.negative_prompt_3.empty(), "Negative prompt 3 is not used by ", pipeline_name);
+        OPENVINO_ASSERT(is_classifier_free_guidance || generation_config.negative_prompt_2 == std::nullopt, "Negative prompt 2 is not used when guidance scale < 1.0");
+        OPENVINO_ASSERT(generation_config.negative_prompt_3 == std::nullopt, "Negative prompt 3 is not used by ", pipeline_name);
     }
 
     ov::AnyMap properties_for_text_encoder(ov::AnyMap properties, const std::string& tensor_name_prefix) {
diff --git a/src/cpp/src/text2image/text2image_pipeline.cpp b/src/cpp/src/text2image/text2image_pipeline.cpp
index a50ca564af..7c92166171 100644
--- a/src/cpp/src/text2image/text2image_pipeline.cpp
+++ b/src/cpp/src/text2image/text2image_pipeline.cpp
@@ -3,6 +3,7 @@
 
 #include "text2image/stable_diffusion_pipeline.hpp"
 #include "text2image/stable_diffusion_xl_pipeline.hpp"
+#include "text2image/stable_diffusion_3_pipeline.hpp"
 
 #include <ctime>
 #include <cstdlib>
@@ -57,6 +58,8 @@ void Text2ImagePipeline::GenerationConfig::update_generation_config(const ov::An
 
 void Text2ImagePipeline::GenerationConfig::validate() const {
     OPENVINO_ASSERT(guidance_scale >= 1.0f || negative_prompt.empty(), "Guidance scale < 1.0 ignores negative prompt");
+    OPENVINO_ASSERT(guidance_scale >= 1.0f || negative_prompt_2 == std::nullopt, "Guidance scale < 1.0 ignores negative prompt");
+    OPENVINO_ASSERT(guidance_scale >= 1.0f || negative_prompt_3 == std::nullopt, "Guidance scale < 1.0 ignores negative prompt");
 }
 
 //
@@ -71,6 +74,8 @@ Text2ImagePipeline::Text2ImagePipeline(const std::filesystem::path& root_dir) {
         m_impl = std::make_shared<StableDiffusionPipeline>(root_dir);
     } else if (class_name == "StableDiffusionXLPipeline") {
         m_impl = std::make_shared<StableDiffusionXLPipeline>(root_dir);
+    } else if (class_name == "StableDiffusion3Pipeline") {
+        m_impl = std::make_shared<StableDiffusion3Pipeline>(root_dir);
     } else {
         OPENVINO_THROW("Unsupported text to image generation pipeline '", class_name, "'");
     }
@@ -84,6 +89,8 @@ Text2ImagePipeline::Text2ImagePipeline(const std::filesystem::path& root_dir, co
         m_impl = std::make_shared<StableDiffusionPipeline>(root_dir, device, properties);
     } else if (class_name == "StableDiffusionXLPipeline") {
         m_impl = std::make_shared<StableDiffusionXLPipeline>(root_dir, device, properties);
+    } else if (class_name == "StableDiffusion3Pipeline") {
+        m_impl = std::make_shared<StableDiffusion3Pipeline>(root_dir, device, properties);
     } else {
         OPENVINO_THROW("Unsupported text to image generation pipeline '", class_name, "'");
     }
@@ -117,10 +124,10 @@ Text2ImagePipeline Text2ImagePipeline::latent_consistency_model(
 
 Text2ImagePipeline Text2ImagePipeline::stable_diffusion_xl(
     const std::shared_ptr<Scheduler>& scheduler,
-        const CLIPTextModel& clip_text_model,
-        const CLIPTextModelWithProjection& clip_text_model_with_projection,
-        const UNet2DConditionModel& unet,
-        const AutoencoderKL& vae_decoder) {
+    const CLIPTextModel& clip_text_model,
+    const CLIPTextModelWithProjection& clip_text_model_with_projection,
+    const UNet2DConditionModel& unet,
+    const AutoencoderKL& vae_decoder) {
     auto impl = std::make_shared<StableDiffusionXLPipeline>(clip_text_model, clip_text_model_with_projection, unet, vae_decoder);
 
     assert(scheduler != nullptr);
@@ -129,6 +136,20 @@ Text2ImagePipeline Text2ImagePipeline::stable_diffusion_xl(
     return Text2ImagePipeline(impl);
 }
 
+Text2ImagePipeline Text2ImagePipeline::stable_diffusion_3(
+    const std::shared_ptr<Scheduler>& scheduler,
+    const CLIPTextModelWithProjection& clip_text_model_1,
+    const CLIPTextModelWithProjection& clip_text_model_2,
+    const SD3Transformer2DModel& transformer,
+    const AutoencoderKL& vae_decoder){
+    auto impl = std::make_shared<StableDiffusion3Pipeline>(clip_text_model_1, clip_text_model_2, transformer, vae_decoder);
+
+    assert(scheduler != nullptr);
+    impl->set_scheduler(scheduler);
+
+    return Text2ImagePipeline(impl);
+}
+
 Text2ImagePipeline::GenerationConfig Text2ImagePipeline::get_generation_config() const {
     return m_impl->get_generation_config();
 }
diff --git a/src/cpp/src/visual_language/input_embedder.cpp b/src/cpp/src/visual_language/input_embedder.cpp
index 28f3e8661a..2f1924fd84 100644
--- a/src/cpp/src/visual_language/input_embedder.cpp
+++ b/src/cpp/src/visual_language/input_embedder.cpp
@@ -130,6 +130,46 @@ class InputsEmbedder::IInputsEmbedder {
         }
         return encoded_input_ids;
     }
+
+    /**
+    * @brief Unpads an image tensor of a padded and resized image.
+    * Used for packing image features of llava_next models.
+    *
+    * @param tensor An image tensor with a shape (embed_dim, height, width)
+    * @param original_size A size of original image
+    * @return An unpadded image tensor with a shape (embed_dim, new_height, new_width)
+    */
+
+    /**
+    * @brief Converts a vector of batched images ([NHWC]) into a vector of individual image tensors ([1HWC]).
+    *
+    * @param images A vector of tensors representing the images. Each tensor can have a shape of either [NHWC] or [HWC].
+    * @return A vector of tensors where each tensor represents a single image with a shape of [1, H, W, C].
+    */
+    std::vector<ov::Tensor> to_single_image_tensors(const std::vector<ov::Tensor>& images) {
+        std::vector<ov::Tensor> single_image_tensors;
+        for (const auto& image : images) {
+            ov::Tensor reshaped_image = image;
+            ov::Shape image_shape = image.get_shape();
+            switch (image_shape.size()) {
+                case 3:
+                    reshaped_image.set_shape({1, image_shape.at(0), image_shape.at(1), image_shape.at(2)});
+                    break;
+                case 4: break;
+                default: OPENVINO_THROW("Input image must have [NHWC] or [HWC] layout");
+            }
+            ov::Shape reshaped_image_shape = reshaped_image.get_shape();
+            for (size_t batch_idx = 0; batch_idx < reshaped_image_shape.at(0); ++batch_idx) {
+                ov::Tensor single_image{
+                    ov::element::u8,
+                    {1, reshaped_image_shape.at(1), reshaped_image_shape.at(2), reshaped_image_shape.at(3)},
+                    reshaped_image.data<uint8_t>() + batch_idx * reshaped_image_shape.at(1) * reshaped_image_shape.at(2) * reshaped_image_shape.at(3)
+                };
+                single_image_tensors.push_back(std::move(single_image));
+            }
+        }
+        return single_image_tensors;
+    }
 };
 
 class InputsEmbedderMiniCPM : public InputsEmbedder::IInputsEmbedder {
@@ -161,49 +201,35 @@ class InputsEmbedderMiniCPM : public InputsEmbedder::IInputsEmbedder {
     virtual ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images) override {
         std::string images_prompt;
         std::vector<EncodedImage> embeds;
-        for (const ov::Tensor& rgb : images) {
-            ov::Tensor reshaped = rgb;
-            ov::Shape rgb_shape = rgb.get_shape();
-            switch (rgb_shape.size()) {
-                case 3:
-                    reshaped.set_shape({1, rgb_shape.at(0), rgb_shape.at(1), rgb_shape.at(2)});
-                    break;
-                case 4: break;
-                default: OPENVINO_THROW("Input image must have [NHWC] or [HWC] layout");
+
+        std::vector<ov::Tensor> single_images = to_single_image_tensors(images);
+
+        for (const ov::Tensor& image : single_images) {
+            EncodedImage encoded_image = m_vision_encoder.encode(image);
+            if (m_vlm_config.use_image_id) {
+                images_prompt += m_vlm_config.im_id_start + std::to_string(m_image_id) + m_vlm_config.im_id_end;
+                ++m_image_id;
             }
-            ov::Shape reshaped_shape = reshaped.get_shape();
-            for (size_t batch_idx = 0; batch_idx < reshaped_shape.at(0); ++batch_idx) {
-                ov::Tensor single_image{
-                    ov::element::u8,
-                    {1, reshaped_shape.at(1), reshaped_shape.at(2), reshaped_shape.at(3)},
-                    reshaped.data<uint8_t>() + batch_idx * reshaped_shape.at(1) * reshaped_shape.at(1) * reshaped_shape.at(1)
-                };
-                EncodedImage encoded_image = m_vision_encoder.encode(single_image);
-                if (m_vlm_config.use_image_id) {
-                    images_prompt += m_vlm_config.im_id_start + std::to_string(m_image_id) + m_vlm_config.im_id_end;
-                    ++m_image_id;
-                }
-                std::string unk64;
-                for (size_t idx = 0; idx < m_vlm_config.query_num; ++idx) {
-                    unk64 += m_vlm_config.unk;
-                }
-                images_prompt += m_vlm_config.im_start + unk64 + m_vlm_config.im_end;
-                if (encoded_image.slices) {
-                    ov::Shape slices_shape = encoded_image.slices.get_shape();
-                    for (size_t row_idx = 0; row_idx < slices_shape.at(0); ++row_idx) {
-                        for (size_t col_idx = 0; col_idx < slices_shape.at(1); ++col_idx) {
-                            images_prompt += m_vlm_config.slice_start + unk64 + m_vlm_config.slice_end;
-                        }
-                        images_prompt += '\n';
+            std::string unk64;
+            for (size_t idx = 0; idx < m_vlm_config.query_num; ++idx) {
+                unk64 += m_vlm_config.unk;
+            }
+            images_prompt += m_vlm_config.im_start + unk64 + m_vlm_config.im_end;
+            if (encoded_image.slices) {
+                ov::Shape slices_shape = encoded_image.slices.get_shape();
+                for (size_t row_idx = 0; row_idx < slices_shape.at(0); ++row_idx) {
+                    for (size_t col_idx = 0; col_idx < slices_shape.at(1); ++col_idx) {
+                        images_prompt += m_vlm_config.slice_start + unk64 + m_vlm_config.slice_end;
                     }
-                }
-                if ('\n' != *(images_prompt.end() - 1)) {
-                    // Image wasn't sliced, add \n to the end of image anyway.
-                    // Strangely, \n isn't placed between </image><slice>.
                     images_prompt += '\n';
                 }
-                embeds.push_back(std::move(encoded_image));
             }
+            if ('\n' != *(images_prompt.end() - 1)) {
+                // Image wasn't sliced, add \n to the end of image anyway.
+                // Strangely, \n isn't placed between </image><slice>.
+                images_prompt += '\n';
+            }
+            embeds.push_back(std::move(encoded_image));
         }
         images_prompt += prompt;
 
@@ -461,69 +487,86 @@ class InputsEmbedderLLaVA : public InputsEmbedder::IInputsEmbedder {
 
     virtual ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images) override {
         std::string image_token = m_vlm_config.im_start;
-        std::string formatted_prompt = images.empty() ? prompt : image_token + "\n" + prompt;
-
-        // std::string chat_template_fallback = m_templated_chat_history + " USER: " + formatted_prompt + " ASSISTANT: ";
-        // chat_template_fallback = chat_template_fallback.erase(0, chat_template_fallback.find_first_not_of(' '));
-
         // Adapted from llava-1.5-7b-hf chat_template.json
         std::string chat_template_fallback = "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'ASSISTANT: ' + message['content'] + ' ' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}";
-        ov::Tensor input_ids = get_encoded_input_ids(formatted_prompt, chat_template_fallback);
+        
+        std::vector<ov::Tensor> single_images = to_single_image_tensors(images);
 
-        if (images.empty()) {
-            return m_embedding.infer(input_ids);
-        } else {
-            OPENVINO_ASSERT(1 == images.size(), "Only a single image allowed");
-            EncodedImage encoded_image = m_vision_encoder.encode(images.at(0));
-            ov::Tensor image_embeds = encoded_image.resized_source;
+        std::string formatted_prompt;
+        std::vector<ov::Tensor> image_embeds;
+        image_embeds.reserve(single_images.size());
 
-            ov::Tensor text_embeds = m_embedding.infer(input_ids);
+        for (const auto& image : single_images) {
+            EncodedImage encoded_image = m_vision_encoder.encode(image);
+            image_embeds.push_back(std::move(encoded_image.resized_source));
+            formatted_prompt += image_token + "\n";
+        }
+        formatted_prompt += prompt;
 
-            ov::Tensor encoded_image_token = m_tokenizer.encode(image_token, ov::genai::add_special_tokens(false)).input_ids;
-            int64_t image_token_id = encoded_image_token.data<int64_t>()[encoded_image_token.get_size() - 1];
+        ov::Tensor input_ids = get_encoded_input_ids(formatted_prompt, chat_template_fallback);
+        ov::Tensor text_embeds = m_embedding.infer(input_ids);
 
-            return merge_text_and_image_embeddings_llava(input_ids, text_embeds, image_embeds, image_token_id);
+        if (images.empty()) {
+            return text_embeds;
         }
+
+        ov::Tensor encoded_image_token = m_tokenizer.encode(m_vlm_config.im_start, ov::genai::add_special_tokens(false)).input_ids;
+        int64_t image_token_id = encoded_image_token.data<int64_t>()[encoded_image_token.get_size() - 1];
+
+        return merge_text_and_image_embeddings_llava(input_ids, text_embeds, image_embeds, image_token_id);
     }
 
 protected:
     ov::Tensor merge_text_and_image_embeddings_llava(
         const ov::Tensor& input_ids,
         const ov::Tensor& text_embeds,
-        const ov::Tensor& image_embeds,
+        const std::vector<ov::Tensor>& image_embeds,
         int64_t image_token_id
     ) {
         auto text_embeds_shape = text_embeds.get_shape();
-        auto image_embeds_shape = image_embeds.get_shape();
+        size_t text_embeds_seq_length = text_embeds_shape[1];
+        size_t hidden_size = text_embeds_shape[2];
 
+        const int64_t* input_ids_data = input_ids.data<const int64_t>();
+        const float* text_embeds_data = text_embeds.data<const float>();
+
+        size_t num_image_tokens = 0;
+        for (size_t s = 0; s < text_embeds_seq_length; ++s) {
+            if (input_ids_data[s] == image_token_id) {
+                num_image_tokens++;
+            }
+        }
+        auto num_images = image_embeds.size();
         OPENVINO_ASSERT(
-            text_embeds_shape[2] == image_embeds_shape[2],
-            "Incompatible shapes between text_embeds and image_embeds"
+            num_image_tokens == num_images,
+            "Number of image tokens in input_ids different from num_images."
         );
 
-        size_t text_embeds_seq_length = text_embeds_shape[1];
-        size_t hidden_size = text_embeds_shape[2];
-        size_t image_embeds_seq_length = image_embeds_shape[1];
-
-        size_t merged_seq_length = text_embeds_seq_length + (image_embeds_seq_length - 1);
+        size_t total_image_seq_length = 0;
+        for (const auto& single_image_embeds : image_embeds) {
+            OPENVINO_ASSERT(
+                text_embeds_shape[2] == single_image_embeds.get_shape().at(2),
+                "Incompatible shapes between text_embeds and image_embeds"
+            );
+            total_image_seq_length += single_image_embeds.get_shape().at(1);
+        }
+        size_t merged_seq_length = text_embeds_seq_length + total_image_seq_length - num_image_tokens;
 
         ov::Tensor merged_embeds(text_embeds.get_element_type(), {BATCH_SIZE, merged_seq_length, hidden_size});
-
-        const int64_t* input_ids_data = input_ids.data<const int64_t>();
-        const float* text_embeds_data = text_embeds.data<const float>();
-        const float* image_embeds_data = image_embeds.data<const float>();
         float* merged_data = merged_embeds.data<float>();
 
-
         size_t merged_idx = 0;
+        size_t image_idx = 0;
         for (size_t s = 0; s < text_embeds_seq_length; ++s) {
             if (input_ids_data[s] == image_token_id) {
-                for (size_t i = 0; i < image_embeds_seq_length; ++i) {
-                    std::copy_n(image_embeds_data + i * hidden_size,
-                                hidden_size,
-                                merged_data + merged_idx * hidden_size);
-                    merged_idx++;
-                }
+                const float* image_embeds_data = image_embeds[image_idx].data<const float>();
+                size_t image_seq_length = image_embeds[image_idx].get_shape()[1];
+
+                std::copy_n(image_embeds_data,
+                            image_seq_length * hidden_size,
+                            merged_data + merged_idx * hidden_size);
+                merged_idx += image_seq_length;
+                image_idx++;
             } else {
                 std::copy_n(text_embeds_data + s * hidden_size,
                             hidden_size,
@@ -547,35 +590,47 @@ class InputsEmbedderLLaVANext : public InputsEmbedderLLaVA {
 
     virtual ov::Tensor get_inputs_embeds(const std::string& prompt, const std::vector<ov::Tensor>& images) override {
         std::string image_token = m_vlm_config.im_start;
-        std::string formatted_prompt = images.empty() ? prompt : image_token + "\n" + prompt;
-
         // Adapted from llava-1.5-7b-hf chat_template.json
         std::string chat_template_fallback = "{% for message in messages %}{% if message['role'] == 'user' %}{{ 'USER: ' + message['content'] + ' ' }}{% else %}{{ 'ASSISTANT: ' + message['content'] + ' ' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ 'ASSISTANT:' }}{% endif %}";
-        ov::Tensor input_ids = get_encoded_input_ids(formatted_prompt, chat_template_fallback);
 
-        if (images.empty()) {
-            return m_embedding.infer(input_ids);
-        } else {
-            OPENVINO_ASSERT(1 == images.size(), "Only a single image allowed");
-            EncodedImage encoded_image = m_vision_encoder.encode(images.at(0));
+        std::vector<ov::Tensor> single_images = to_single_image_tensors(images);
 
-            // Create image_newline tensor with data from config
-            size_t embed_dim = encoded_image.resized_source.get_shape().at(2);
-            ov::Tensor image_newline(encoded_image.resized_source.get_element_type(), {embed_dim});
-            float* image_newline_data = image_newline.data<float>();
-            std::copy(m_vlm_config.image_newline.begin(), m_vlm_config.image_newline.end(), image_newline_data);
+        std::string formatted_prompt;
+        std::vector<ov::Tensor> image_embeds;
+        image_embeds.reserve(single_images.size());
+        
+        ov::Tensor image_newline;
 
-            ImageSize original_image_size{images.at(0).get_shape().at(1), images.at(0).get_shape().at(2)}; // [height, width]
+        for (const auto& image : single_images) {
+            EncodedImage encoded_image = m_vision_encoder.encode(image);
 
-            ov::Tensor image_features = pack_image_features_llava_next(encoded_image, original_image_size, image_newline);
+            if (!image_newline) {
+                size_t embed_dim = encoded_image.resized_source.get_shape().at(2);
+                image_newline = ov::Tensor(encoded_image.resized_source.get_element_type(), {embed_dim});
+                float* image_newline_data = image_newline.data<float>();
+                std::copy(m_vlm_config.image_newline.begin(), m_vlm_config.image_newline.end(), image_newline_data);
+            }
 
-            ov::Tensor text_embeds = m_embedding.infer(input_ids);
+            ImageSize original_image_size{image.get_shape().at(1), image.get_shape().at(2)}; // [height, width]
 
-            ov::Tensor encoded_image_token = m_tokenizer.encode(image_token, ov::genai::add_special_tokens(false)).input_ids;
-            int64_t image_token_id = encoded_image_token.data<int64_t>()[encoded_image_token.get_size() - 1];
+            ov::Tensor packed_features = pack_image_features_llava_next(encoded_image, original_image_size, image_newline);
 
-            return merge_text_and_image_embeddings_llava(input_ids, text_embeds, image_features, image_token_id);
+            image_embeds.push_back(std::move(packed_features));
+            formatted_prompt += image_token + "\n";
         }
+        formatted_prompt += prompt;
+
+        ov::Tensor input_ids = get_encoded_input_ids(formatted_prompt, chat_template_fallback);
+        ov::Tensor text_embeds = m_embedding.infer(input_ids);
+
+        if (images.empty()) {
+            return text_embeds;
+        }
+
+        ov::Tensor encoded_image_token = m_tokenizer.encode(m_vlm_config.im_start, ov::genai::add_special_tokens(false)).input_ids;
+        int64_t image_token_id = encoded_image_token.data<int64_t>()[encoded_image_token.get_size() - 1];
+
+        return merge_text_and_image_embeddings_llava(input_ids, text_embeds, image_embeds, image_token_id);
     }
 
 private:
diff --git a/src/cpp/src/visual_language/vision_encoder.cpp b/src/cpp/src/visual_language/vision_encoder.cpp
index 5f2b9232a8..0b6b169f18 100644
--- a/src/cpp/src/visual_language/vision_encoder.cpp
+++ b/src/cpp/src/visual_language/vision_encoder.cpp
@@ -657,10 +657,13 @@ EncodedImage VisionEncoder::encode_llava(const ov::Tensor& image, const Processo
     m_vision_encoder.set_tensor("pixel_values", pixel_values);
     m_vision_encoder.infer();
 
-    ov::Tensor image_features = m_vision_encoder.get_output_tensor();
+    const ov::Tensor& infer_output = m_vision_encoder.get_output_tensor();
+    ov::Tensor image_features(infer_output.get_element_type(), infer_output.get_shape());
+    std::memcpy(image_features.data(), infer_output.data(), infer_output.get_byte_size());
+
     ImageSize resized_source_size{config.crop_size_height / config.patch_size, config.crop_size_width / config.patch_size};
 
-    return {image_features, resized_source_size};
+    return {std::move(image_features), resized_source_size};
 }
 
 EncodedImage VisionEncoder::encode_llava_next(const ov::Tensor& image, const ProcessorConfig& config) {
@@ -669,7 +672,10 @@ EncodedImage VisionEncoder::encode_llava_next(const ov::Tensor& image, const Pro
     m_vision_encoder.set_tensor("pixel_values", pixel_values);
     m_vision_encoder.infer();
 
-    ov::Tensor image_features = m_vision_encoder.get_output_tensor();
+    const ov::Tensor& infer_output = m_vision_encoder.get_output_tensor();
+    ov::Tensor image_features(infer_output.get_element_type(), infer_output.get_shape());
+    std::memcpy(image_features.data(), infer_output.data(), infer_output.get_byte_size());
+
     ImageSize resized_source_size{config.crop_size_height / config.patch_size, config.crop_size_width / config.patch_size};
 
     // Gen number of patches
@@ -679,7 +685,7 @@ EncodedImage VisionEncoder::encode_llava_next(const ov::Tensor& image, const Pro
     int num_patches_h = best_resolution.second / config.size_shortest_edge;
 
     EncodedImage encoded_image;
-    encoded_image.resized_source = image_features;
+    encoded_image.resized_source = std::move(image_features);
     encoded_image.resized_source_size = resized_source_size;
     encoded_image.patches_grid = {num_patches_h, num_patches_w};
     return encoded_image;
@@ -691,8 +697,11 @@ EncodedImage VisionEncoder::encode_internvl(const ov::Tensor& image, const Proce
     m_vision_encoder.set_tensor("pixel_values", pixel_values);
     m_vision_encoder.infer();
 
-    ov::Tensor image_features = m_vision_encoder.get_output_tensor();
+    const ov::Tensor& infer_output = m_vision_encoder.get_output_tensor();
+    ov::Tensor image_features(infer_output.get_element_type(), infer_output.get_shape());
+    std::memcpy(image_features.data(), infer_output.data(), infer_output.get_byte_size());
+
     ImageSize resized_source_size{config.crop_size_height / config.patch_size, config.crop_size_width / config.patch_size};
 
-    return {image_features, resized_source_size};
+    return {std::move(image_features), resized_source_size};
 }
diff --git a/src/docs/SUPPORTED_MODELS.md b/src/docs/SUPPORTED_MODELS.md
index 6412ec39d5..e1f2483d8e 100644
--- a/src/docs/SUPPORTED_MODELS.md
+++ b/src/docs/SUPPORTED_MODELS.md
@@ -193,6 +193,14 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
         </ul>
       </td>
     </tr>
+    <tr>
+      <td><code>Stable Diffusion 3</code></td>
+      <td>
+        <ul>
+          <li><a href="https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers"><code>stabilityai/stable-diffusion-3-medium-diffusers</code></a></li>
+        </ul>
+      </td>
+    </tr>
   </tbody>
 </table>
 
diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
index 527dccf55a..774175dd95 100644
--- a/src/python/openvino_genai/__init__.py
+++ b/src/python/openvino_genai/__init__.py
@@ -40,5 +40,4 @@
     Generator,
     CppStdGenerator,
     draft_model
-
 )
diff --git a/tests/cpp/speculative_decoding.cpp b/tests/cpp/speculative_decoding.cpp
index 08ce6aaf66..bb10c2cc8f 100644
--- a/tests/cpp/speculative_decoding.cpp
+++ b/tests/cpp/speculative_decoding.cpp
@@ -28,6 +28,7 @@ class CBForSDTest : public testing::Test, public ov::genai::ContinuousBatchingPi
                 std::lock_guard<std::mutex> lock{m_awaiting_requests_mutex};
                 m_awaiting_requests.push_back(sequence_group);
             }
+            pull_awaiting_requests();
             return std::make_shared<ov::genai::GenerationHandleImpl>(sequence_group->get_generation_stream(), sampling_params);
         };
 
diff --git a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
index 7c3e75eafa..27c64d04a8 100644
--- a/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
+++ b/tools/continuous_batching/benchmark/continuous_batching_benchmark.cpp
@@ -256,8 +256,15 @@ class GenerationInfoCollector {
         this->start_time = start_time;
     }
 
-    void add_generation(ov::genai::ContinuousBatchingPipeline* pipe, Dataset* dataset, size_t request_id) {
-        ov::genai::GenerationHandle generation_handle = pipe->add_request(request_id, dataset->m_prompts[request_id], dataset->m_sampling_params[request_id]);
+    void add_generation(ov::genai::ContinuousBatchingPipeline* pipe, Dataset* dataset, size_t request_id, bool is_speculative_decoding_enabled) {
+        auto sampling_params = dataset->m_sampling_params[request_id];
+        if (is_speculative_decoding_enabled) {
+            // to enable static speculative decoding
+            sampling_params.num_assistant_tokens = 5;
+            // to enable dynamic speculative decoding
+            // sampling_params.assistant_confidence_threshold = 0.4f;
+        }
+        ov::genai::GenerationHandle generation_handle = pipe->add_request(request_id, dataset->m_prompts[request_id], sampling_params);
         std::lock_guard<std::mutex> lock(mutex);
         generations_info.emplace_back(std::move(generation_handle), dataset->m_input_lens[request_id]);
     }
@@ -306,7 +313,7 @@ class GenerationInfoCollector {
     }
 };
 
-void trafficSimulator(ov::genai::ContinuousBatchingPipeline* pipe, Dataset* dataset, std::string request_rate, GenerationInfoCollector* generation_info_collector) {
+void trafficSimulator(ov::genai::ContinuousBatchingPipeline* pipe, Dataset* dataset, std::string request_rate, GenerationInfoCollector* generation_info_collector, bool is_speculative_decoding_enabled) {
     double numeric_request_rate;
     std::random_device rd;
     std::mt19937 gen(rd());
@@ -333,7 +340,7 @@ void trafficSimulator(ov::genai::ContinuousBatchingPipeline* pipe, Dataset* data
     generation_info_collector->set_start_time(std::chrono::steady_clock::now());
     for (size_t request_id = 0; request_id < dataset->size(); ++request_id) {
         std::cout << "Traffic thread adding request to the queue..." << std::endl;
-        generation_info_collector->add_generation(pipe, dataset, request_id);
+        generation_info_collector->add_generation(pipe, dataset, request_id, is_speculative_decoding_enabled);
         if (numeric_request_rate > 0)
             std::this_thread::sleep_for(std::chrono::milliseconds(int(distribution(gen) * 1000)));
     }
@@ -434,6 +441,7 @@ int main(int argc, char* argv[]) try {
     ("b,max_batch_size", "A maximum number of batched tokens", cxxopts::value<size_t>()->default_value("256"))
     ("dynamic_split_fuse", "Whether to use dynamic split-fuse or vLLM scheduling", cxxopts::value<bool>()->default_value("true"))
     ("m,model", "Path to model and tokenizers base directory", cxxopts::value<std::string>()->default_value("."))
+    ("draft_model", "Path to assistant model directory", cxxopts::value<std::string>()->default_value(""))
     ("dataset", "Path to dataset .json file", cxxopts::value<std::string>()->default_value("./ShareGPT_V3_unfiltered_cleaned_split.json"))
     ("max_input_len", "Max input length take from dataset", cxxopts::value<size_t>()->default_value("1024"))
     ("max_output_len", "Max output length", cxxopts::value<size_t>()->default_value("2048"))
@@ -462,6 +470,7 @@ int main(int argc, char* argv[]) try {
     const size_t max_batch_size = result["max_batch_size"].as<size_t>();
     const bool dynamic_split_fuse = result["dynamic_split_fuse"].as<bool>();
     const std::string models_path = result["model"].as<std::string>();
+    const std::string draft_model_path = result["draft_model"].as<std::string>();
     const std::string dataset_path = result["dataset"].as<std::string>();
     const size_t max_input_len = result["max_input_len"].as<size_t>();
     const size_t max_output_len = result["max_output_len"].as<size_t>();
@@ -471,6 +480,8 @@ int main(int argc, char* argv[]) try {
     const size_t cache_size = result["cache_size"].as<size_t>();
     const bool use_cache_eviction = result["use_cache_eviction"].as<bool>();
 
+    bool is_speculative_decoding_enabled = !draft_model_path.empty();
+
     // Create requests for generation
     Dataset dataset = filtered_dataset(models_path, dataset_path, num_prompts, max_input_len, max_output_len);
 
@@ -509,6 +520,9 @@ int main(int argc, char* argv[]) try {
     std::cout << "\tPlugin configuration JSON: " << device_config << std::endl;
 
     ov::AnyMap device_config_map = {};
+    if (is_speculative_decoding_enabled) {
+        device_config_map.insert({ ov::genai::draft_model(draft_model_path) });
+    }
     if (!parse_plugin_config_string(device_config, device_config_map)) {
         std::cout << "ERROR: Wrong json parameter in device_config." << std::endl;
         return EXIT_FAILURE;
@@ -524,14 +538,14 @@ int main(int argc, char* argv[]) try {
 
     std::atomic<bool> finishGenerationThread{false};
     if (request_rate == "inf") {
-        std::thread trafficSimulatorThread(trafficSimulator, &pipe, &dataset, request_rate, &generation_info_collector);
+        std::thread trafficSimulatorThread(trafficSimulator, &pipe, &dataset, request_rate, &generation_info_collector, is_speculative_decoding_enabled);
         trafficSimulatorThread.join();
     }
     
     std::thread lmmEngineThread(llmEngineLoop, &pipe, &dataset, &finishGenerationThread);
     std::thread statisticsReporterThread(statisticsReporter, &generation_info_collector, num_prompts);
     if (request_rate != "inf") {
-        std::thread trafficSimulatorThread(trafficSimulator, &pipe, &dataset, request_rate, &generation_info_collector);
+        std::thread trafficSimulatorThread(trafficSimulator, &pipe, &dataset, request_rate, &generation_info_collector, is_speculative_decoding_enabled);
         trafficSimulatorThread.join();
     }
     statisticsReporterThread.join();
diff --git a/tools/who_what_benchmark/README.md b/tools/who_what_benchmark/README.md
index b5cad666c8..012782bad3 100644
--- a/tools/who_what_benchmark/README.md
+++ b/tools/who_what_benchmark/README.md
@@ -1,7 +1,12 @@
-# Simple Accuracy Benchmark for Generative AI models
+#  Who What Benchmark (WWB) - Simple Accuracy Benchmarking Tool for Generative AI models
+The main idea of the benchmark is to estimate the similarity score between embedding computed by for data generated by two models, for example, baseline and optimized. In general, this can be the data created with the model inferred with different tools. Thus, this similarity allows to understand how different data in general.
+
+WWB provides default datasets for the supported use cases. However, it is relatively easy to plug and use custom datasets.
+
 
 ## Features
 
+* Command-line interface for Hugging Face and OpenVINO models and API to support broader inference backends.
 * Simple and quick accuracy test for compressed, quantized, pruned, distilled LLMs. It works with any model that supports HuggingFace Transformers text generation API including:
     * HuggingFace Transformers compressed models via [Bitsandbytes](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.BitsAndBytesConfig)
     * [GPTQ](https://huggingface.co/docs/transformers/main_classes/quantization#transformers.GPTQConfig) via HuggingFace API
@@ -11,8 +16,46 @@
 * Validation of text-to-image pipelines. Computes similarity score between generated images:
     * Supports Diffusers library and Optimum-Intel via `Text2ImageEvaluator` class.
 
-The main idea is to compare similarity of text generation between baseline and optimized LLMs.
+### Installation
+Install WWB and its requirements from the source using `pip` or any other package manager. For example,
+
+* `python -m venv eval_env`
+* `source eval_env/bin/activate`
+* `pip install -r requirements.txt`
+* `pip install openvino.genai` to validate with OpenVINO GenAI API
+* `pip install .`
+
+## Usage
+### Compare Text-generation Models (LLMs)
+```sh
+# Collect ground truth from the baseline Hugging Face Transformer model 
+wwb --base-model microsoft/Phi-3-mini-4k-instruct --gt-data gt.csv --model-type text --hf
+
+# Convert model to Optimum-Intel (quantized to 8-bit by default)
+optimum-cli export openvino -m microsoft/Phi-3-mini-4k-instruct phi-3-openvino
+
+# Measure similarity metric for Optimum-OpenVINO inference backend
+wwb --target-model phi-3-openvino --gt-data gt.csv --model-type text
 
+# Measure similarity metric for OpenVINO GenAI inference backend
+wwb --target-model phi-3-openvino --gt-data gt.csv --model-type text --genai
+```
+
+### Compare Text-to-image models (Diffusers)
+```sh
+# Export FP16 model to OpenVINO
+optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 sd-lcm-fp16
+# Export model with 8-bit quantized weights to OpenVINO
+optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format int8 sd-lcm-int8
+# Collect the references and save the mappling in the .json file. 
+# Reference images will be stored in the "reference" subfolder under the same path with .json.
+wwb --base-model sd-lcm-fp16 --gt-data lcm_test/sd_xl.json --model-type text-to-image
+# Compute the metric
+# Target images will be stored in the "target" subfolder under the same path with .json.
+wwb --target-model sd-lcm-int8 --gt-data lcm_test/sd_xl.json --model-type text-to-image
+```
+
+### API
 The API provides a way to access to investigate the worst generated text examples.
 
 ```python
@@ -49,13 +92,7 @@ prompts = val["text"]
 metrics_per_prompt, metrics = evaluator.score(optimized_model, test_data=prompts)
 ```
 
-### Installing
-
-* python -m venv eval_env
-* source eval_env/bin/activate
-* pip install -r requirements.txt
-
-### CLI example for text-generation models
+### Advaned CLI usage
 
 ```sh
 wwb --help
@@ -92,18 +129,6 @@ wwb --base-model meta-llama/Llama-2-7b-chat-hf --gt-data llama_2_7b_wwb_gt.csv -
 wwb --base-model meta-llama/Llama-2-7b-chat-hf --gt-data llama_2_7b_wwb_gt.csv --hf
 ```
 
-### Example of Stable Diffusion comparison
-```sh
-# Export FP16 model
-optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format fp16 sd-lcm-fp16
-# Export INT8 WOQ model
-optimum-cli export openvino -m SimianLuo/LCM_Dreamshaper_v7 --weight-format int8 sd-lcm-int8
-# Collect the references
-wwb --base-model sd-lcm-fp16 --gt-data lcm_test/sd_xl.json --model-type text-to-image
-# Compute the metric
-wwb --target-model sd-lcm-int8 --gt-data lcm_test/sd_xl.json --model-type text-to-image
-```
-
 ### Supported metrics
 
 * `similarity` - averaged similarity measured by neural network trained for sentence embeddings. The best is 1.0, the minimum is 0.0, higher-better.
diff --git a/tools/who_what_benchmark/requirements.txt b/tools/who_what_benchmark/requirements.txt
index 0ffa906756..bea6453c6b 100644
--- a/tools/who_what_benchmark/requirements.txt
+++ b/tools/who_what_benchmark/requirements.txt
@@ -7,3 +7,4 @@ openvino-tokenizers
 pandas>=2.0.3
 numpy>=1.23.5
 tqdm>=4.66.1
+diffusers
diff --git a/tools/who_what_benchmark/whowhatbench/wwb.py b/tools/who_what_benchmark/whowhatbench/wwb.py
index 6e65e477b9..4d8c52fe21 100644
--- a/tools/who_what_benchmark/whowhatbench/wwb.py
+++ b/tools/who_what_benchmark/whowhatbench/wwb.py
@@ -251,61 +251,61 @@ def parse_args():
     parser.add_argument(
         "--base-model",
         default=None,
-        help="Model to ground truth generation.",
+        help="Model for ground truth generation.",
     )
     parser.add_argument(
         "--target-model",
         default=None,
-        help="Model to comparison with base_model. Usually it is compressed, quantized version of base_model.",
+        help="Model to compare against the base_model. Usually it is compressed, quantized version of base_model.",
     )
     parser.add_argument(
         "--tokenizer",
         default=None,
-        help="Tokenizer for divergency metric. If not defined then will be load from base_model or target_model.",
+        help="Tokenizer for divergency metric. If not provided, it will be load from base_model or target_model.",
     )
 
     parser.add_argument(
         "--gt-data",
         default=None,
-        help="CSV file with base_model generation. If defined and exists then base_model will not used."
-        "I defined and not exists them will be generated by base_model evaluation.",
+        help="CSV file containing GT outputs from base_model. If defined and exists then base_model will not used."
+        " If the files does not exist, it will be generated by base_model evaluation.",
     )
     parser.add_argument(
         "--model-type",
         type=str,
         choices=["text", "text-to-image", "visual-text"],
         default="text",
-        help="Indicated the model type, e.g. 'text' - for causal text generation, 'text-to-image' - for image generation.",
+        help="Indicated the model type: 'text' - for causal text generation, 'text-to-image' - for image generation.",
     )
     parser.add_argument(
         "--data-encoder",
         type=str,
         default="sentence-transformers/all-mpnet-base-v2",
         help="Model for measurement of similarity between base_model and target_model."
-        "By default it is sentence-transformers/all-mpnet-base-v2,"
-        "but for Chinese LLMs better to use sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2.",
+        " By default it is sentence-transformers/all-mpnet-base-v2,"
+        " but for Chinese LLMs, better to use sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2.",
     )
     parser.add_argument(
         "--dataset",
         type=str,
         default=None,
         help="Name of the dataset with prompts. The interface for dataset is load_dataset from datasets library."
-        "Please provide this argument in format path,name (for example wikitext,wikitext-2-v1)."
-        "If None then internal list of prompts will be used.",
+        " Please provide this argument in format path,name (for example wikitext,wikitext-2-v1)."
+        " If None then internal list of prompts will be used.",
     )
     parser.add_argument(
         "--dataset-field",
         type=str,
         default="text",
         help="The name of field in dataset for prompts. For example question or context in squad."
-        "Will be used only if dataset is defined.",
+        " Will be used only if dataset is defined.",
     )
     parser.add_argument(
         "--split",
         type=str,
         default=None,
         help="Split of prompts from dataset (for example train, validation, train[:32])."
-        "Will be used only if dataset is defined.",
+        " Will be used only if dataset is defined.",
     )
     parser.add_argument(
         "--output",
@@ -377,8 +377,12 @@ def parse_args():
 
 
 def check_args(args):
-    assert not (args.base_model is None and args.target_model is None)
-    assert not (args.base_model is None and args.gt_data is None)
+    if args.base_model is None and args.target_model is None:
+        raise ValueError("Wether --base-model or --target-model should be provided")
+    if args.base_model is None and args.gt_data is None:
+        raise ValueError("Wether --base-model or --gt-data should be provided")
+    if args.target_model is None and args.gt_data is None:
+        raise ValueError("Wether --target-model or --gt-data should be provided")
 
 
 def load_tokenizer(args):