diff --git a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py index c6748ad418fe..6c36ec173539 100644 --- a/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py +++ b/src/diffusers/pipelines/stable_diffusion_gligen/pipeline_stable_diffusion_gligen_text_image.py @@ -446,13 +446,14 @@ def prepare_extra_step_kwargs(self, generator, eta): extra_step_kwargs["generator"] = generator return extra_step_kwargs - # Copied from diffusers.pipelines.stable_diffusion_k_diffusion.pipeline_stable_diffusion_k_diffusion.StableDiffusionKDiffusionPipeline.check_inputs def check_inputs( self, prompt, height, width, callback_steps, + gligen_images, + gligen_phrases, negative_prompt=None, prompt_embeds=None, negative_prompt_embeds=None, @@ -499,6 +500,13 @@ def check_inputs( f" {negative_prompt_embeds.shape}." ) + if gligen_images is not None and gligen_phrases is not None: + if len(gligen_images) != len(gligen_phrases): + raise ValueError( + "`gligen_images` and `gligen_phrases` must have the same length when both are provided, but" + f" got: `gligen_images` with length {len(gligen_images)} != `gligen_phrases` with length {len(gligen_phrases)}." + ) + # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.prepare_latents def prepare_latents(self, batch_size, num_channels_latents, height, width, dtype, device, generator, latents=None): shape = ( @@ -814,6 +822,8 @@ def __call__( height, width, callback_steps, + gligen_images, + gligen_phrases, negative_prompt, prompt_embeds, negative_prompt_embeds,