Skip to content

Commit

Permalink
fix a few issues in controlnet inpaint pipelines (huggingface#5470)
Browse files Browse the repository at this point in the history
* add

* Update docs/source/en/api/pipelines/controlnet_sdxl.md

Co-authored-by: Patrick von Platen <[email protected]>

---------

Co-authored-by: yiyixuxu <yixu310@gmail,com>
Co-authored-by: Patrick von Platen <[email protected]>
  • Loading branch information
3 people authored and linoytsaban committed Oct 24, 2023
1 parent c520bee commit 8890540
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 26 deletions.
9 changes: 9 additions & 0 deletions docs/source/en/api/pipelines/controlnet_sdxl.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ Make sure to check out the Schedulers [guide](../../using-diffusers/schedulers)
- all
- __call__

## StableDiffusionXLControlNetImg2ImgPipeline
[[autodoc]] StableDiffusionXLControlNetImg2ImgPipeline
- all
- __call__

## StableDiffusionXLControlNetInpaintPipeline
[[autodoc]] StableDiffusionXLControlNetInpaintPipeline
- all
- __call__
## StableDiffusionPipelineOutput

[[autodoc]] pipelines.stable_diffusion.StableDiffusionPipelineOutput
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,16 @@
>>> mask_image = mask_image.resize((512, 512))
>>> def make_inpaint_condition(image, image_mask):
... image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
... image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
... assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
... image[image_mask > 0.5] = -1.0 # set as masked pixel
... image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
... image = torch.from_numpy(image)
>>> def make_canny_condition(image):
... image = np.array(image)
... image = cv2.Canny(image, 100, 200)
... image = image[:, :, None]
... image = np.concatenate([image, image, image], axis=2)
... image = Image.fromarray(image)
... return image
>>> control_image = make_inpaint_condition(init_image, mask_image)
>>> control_image = make_canny_condition(init_image)
>>> controlnet = ControlNetModel.from_pretrained(
... "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,27 +75,24 @@
>>> mask_image = mask_image.resize((1024, 1024))
>>> def make_inpaint_condition(image, image_mask):
... image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
... image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
... assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
... image[image_mask < 0.5] = 0 # set as masked pixel
... image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
... image = torch.from_numpy(image)
>>> def make_canny_condition(image):
... image = np.array(image)
... image = cv2.Canny(image, 100, 200)
... image = image[:, :, None]
... image = np.concatenate([image, image, image], axis=2)
... image = Image.fromarray(image)
... return image
>>> control_image = make_inpaint_condition(init_image, mask_image)
>>> control_image = make_canny_condition(init_image)
>>> controlnet = ControlNetModel.from_pretrained(
... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float32
... "diffusers/controlnet-canny-sdxl-1.0", torch_dtype=torch.float16
... )
>>> pipe = StableDiffusionXLControlNetInpaintPipeline.from_pretrained(
... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float32
... "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, torch_dtype=torch.float16
... )
>>> pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
>>> pipe.enable_model_cpu_offload()
>>> # generate image
Expand Down Expand Up @@ -777,13 +774,14 @@ def prepare_latents(
"However, either the image or the noise timestep has not been provided."
)

if image.shape[1] == 4:
image_latents = image.to(device=device, dtype=dtype)
elif return_image_latents or (latents is None and not is_strength_max):
if return_image_latents or (latents is None and not is_strength_max):
image = image.to(device=device, dtype=dtype)
image_latents = self._encode_vae_image(image=image, generator=generator)

image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1)
if image.shape[1] == 4:
image_latents = image
else:
image_latents = self._encode_vae_image(image=image, generator=generator)
image_latents = image_latents.repeat(batch_size // image_latents.shape[0], 1, 1, 1)

if latents is None and add_noise:
noise = randn_tensor(shape, generator=generator, device=device, dtype=dtype)
Expand Down

0 comments on commit 8890540

Please sign in to comment.