diff --git a/tests/python_tests/common.py b/tests/python_tests/common.py
index 3a3f94fb2b..dc58d1ad2f 100644
--- a/tests/python_tests/common.py
+++ b/tests/python_tests/common.py
@@ -388,10 +388,9 @@ def compare_generation_results(prompts: List[str], hf_results: List[GenerationRe
         compare_generation_result(ref_result, ov_result, generation_config)
 
 
-def get_hugging_face_models(model_id: str, use_optimum = True):
+def get_hugging_face_models(model_id: str):
     hf_tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
-    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, ov_config=get_default_properties()) if use_optimum else \
-                AutoModelForCausalLM.from_pretrained(model_id)
+    opt_model = OVModelForCausalLM.from_pretrained(model_id, export=True, trust_remote_code=True, ov_config=get_default_properties())
     return opt_model, hf_tokenizer
 
 
@@ -414,15 +413,13 @@ def convert_models(opt_model : OVModelForCausalLM, hf_tokenizer : AutoTokenizer,
 
 
 def run_llm_pipeline_with_ref(model_id: str, prompts: List[str], generation_config: GenerationConfig | dict, tmp_path: Path, use_cb : bool = False):
-    use_optimum = True
     models_path : Path = tmp_path / model_id
-    opt_model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum)
+    opt_model, hf_tokenizer = get_hugging_face_models(model_id)
 
     if type(generation_config) is dict:
         generation_config = GenerationConfig(**generation_config)
 
-    if use_optimum:
-        convert_models(opt_model, hf_tokenizer, models_path)
+    convert_models(opt_model, hf_tokenizer, models_path)
 
     ov_results = run_llm_pipeline(models_path, prompts, generation_config, use_cb)
     hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_config)
@@ -440,12 +437,10 @@ def run_cb_pipeline_with_ref(tmp_path: str, model_id: str, scheduler_params: dic
             generation_config = GenerationConfig(**generation_config)
         generation_configs = [generation_config] * len(prompts)
 
-    use_optimum = True
     models_path : Path = tmp_path / model_id
-    opt_model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum)
+    opt_model, hf_tokenizer = get_hugging_face_models(model_id)
 
-    if use_optimum:
-        convert_models(opt_model, hf_tokenizer, models_path)
+    convert_models(opt_model, hf_tokenizer, models_path)
 
     hf_results = run_hugging_face(opt_model, hf_tokenizer, prompts, generation_configs)
     ov_results = run_continuous_batching(models_path, scheduler_config, prompts, generation_configs)
diff --git a/tests/python_tests/test_continuous_batching.py b/tests/python_tests/test_continuous_batching.py
index 1482856fcb..fabcf06b71 100644
--- a/tests/python_tests/test_continuous_batching.py
+++ b/tests/python_tests/test_continuous_batching.py
@@ -150,7 +150,7 @@ def test_post_oom_health(tmp_path, sampling_config):
     scheduler_config.num_kv_blocks = 10 # Low cache size to trigger OOM quickly
 
     model_id : str = "facebook/opt-125m"
-    opt_model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum=True)
+    opt_model, hf_tokenizer = get_hugging_face_models(model_id)
 
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
@@ -251,7 +251,7 @@ def test_preemption_with_multinomial(tmp_path, dynamic_split_fuse):
     for config in generation_configs:
         config.max_new_tokens = 30
     model_id : str = "facebook/opt-125m"
-    model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum=True)
+    model, hf_tokenizer = get_hugging_face_models(model_id)
 
     models_path : Path = tmp_path / model_id
     convert_models(model, hf_tokenizer, models_path)
@@ -329,7 +329,7 @@ def test_preemption_with_multinomial(tmp_path, dynamic_split_fuse):
 @pytest.mark.skip(reason="Random sampling results are non deterministic due to: discrete_distribution impl depends on platform, model inference results may depend on CPU. Test passes on CI but fails locally.")
 def test_preemption_with_multinomial_n_seq(tmp_path, dynamic_split_fuse):
     model_id : str = "facebook/opt-125m"
-    opt_model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum=True)
+    opt_model, hf_tokenizer = get_hugging_face_models(model_id)
 
     models_path : Path = tmp_path / model_id
     convert_models(opt_model, hf_tokenizer, models_path)
diff --git a/tests/python_tests/test_sampling.py b/tests/python_tests/test_sampling.py
index 3df4223219..004d4f9d9d 100644
--- a/tests/python_tests/test_sampling.py
+++ b/tests/python_tests/test_sampling.py
@@ -320,7 +320,7 @@ def test_multinomial_sampling_against_reference(tmp_path, test_struct: RandomSam
     generation_config.rng_seed = 0
     generation_configs = generation_config
     model_id : str = "facebook/opt-125m"
-    model, hf_tokenizer = get_hugging_face_models(model_id, use_optimum=True)
+    model, hf_tokenizer = get_hugging_face_models(model_id)
 
     models_path : Path = tmp_path / model_id
     convert_models(model, hf_tokenizer, models_path)
diff --git a/tools/who_what_benchmark/tests/test_cli_image.py b/tools/who_what_benchmark/tests/test_cli_image.py
index 7b966f049e..fec9e96f4c 100644
--- a/tools/who_what_benchmark/tests/test_cli_image.py
+++ b/tools/who_what_benchmark/tests/test_cli_image.py
@@ -42,8 +42,8 @@ def teardown_module():
         ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "hf"),
         ("hf-internal-testing/tiny-stable-diffusion-torch", "text-to-image", "openvino"),
         ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "text-to-image", "hf"),
-        ("hf-internal-testing/tiny-stable-diffusion-torch", "image-inpainting", "hf"),
-        ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-inpainting", "hf"),
+        # ("hf-internal-testing/tiny-stable-diffusion-torch", "image-inpainting", "hf"),
+        # ("hf-internal-testing/tiny-stable-diffusion-xl-pipe", "image-inpainting", "hf"),
     ],
 )
 def test_image_model_types(model_id, model_type, backend):
@@ -88,7 +88,10 @@ def test_image_model_types(model_id, model_type, backend):
 @pytest.mark.parametrize(
     ("model_id", "model_type"),
     list(itertools.product(OV_IMAGE_MODELS,
-                           ["image-to-image", "text-to-image", "image-inpainting"])),
+                           ["image-to-image",
+                            "text-to-image",
+                            # "image-inpainting"
+                            ])),
 )
 def test_image_model_genai(model_id, model_type):
     with tempfile.TemporaryDirectory() as temp_dir: