Skip to content

Commit

Permalink
LoRA test fixes (huggingface#6163)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

---------

Co-authored-by: Sayak Paul <[email protected]>
  • Loading branch information
2 people authored and Jimmy committed Apr 26, 2024
1 parent 74f278d commit 69e39f2
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 30 deletions.
29 changes: 22 additions & 7 deletions tests/lora/test_lora_layers_old_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,21 @@ def test_stable_diffusion_attn_processors(self):
image = sd_pipe(**inputs).images
assert image.shape == (1, 64, 64, 3)

@unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda")
def test_stable_diffusion_set_xformers_attn_processors(self):
# disable_full_determinism()
device = "cuda" # ensure determinism for the device-dependent torch.Generator
components, _ = self.get_dummy_components()
sd_pipe = StableDiffusionPipeline(**components)
sd_pipe = sd_pipe.to(device)
sd_pipe.set_progress_bar_config(disable=None)

_, _, inputs = self.get_dummy_inputs()

# run normal sd pipe
image = sd_pipe(**inputs).images
assert image.shape == (1, 64, 64, 3)

# run lora xformers attention
attn_processors, _ = create_unet_lora_layers(sd_pipe.unet)
attn_processors = {
Expand Down Expand Up @@ -607,7 +622,7 @@ def test_unload_lora_sd(self):
orig_image_slice, orig_image_slice_two, atol=1e-3
), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters."

@unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
@unittest.skipIf(torch_device != "cuda" or not is_xformers_available(), "This test is supposed to run on GPU")
def test_lora_unet_attn_processors_with_xformers(self):
with tempfile.TemporaryDirectory() as tmpdirname:
self.create_lora_weight_file(tmpdirname)
Expand Down Expand Up @@ -644,7 +659,7 @@ def test_lora_unet_attn_processors_with_xformers(self):
if isinstance(module, Attention):
self.assertIsInstance(module.processor, XFormersAttnProcessor)

@unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
@unittest.skipIf(torch_device != "cuda" or not is_xformers_available(), "This test is supposed to run on GPU")
def test_lora_save_load_with_xformers(self):
pipeline_components, lora_components = self.get_dummy_components()
sd_pipe = StableDiffusionPipeline(**pipeline_components)
Expand Down Expand Up @@ -2270,8 +2285,8 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()

start_time = time.time()
Expand All @@ -2284,13 +2299,13 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):

del pipe

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.fuse_lora()
pipe.enable_model_cpu_offload()

start_time = time.time()
generator = torch.Generator().manual_seed(0)
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
Expand Down
46 changes: 23 additions & 23 deletions tests/lora/test_lora_layers_peft.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
floats_tensor,
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
slow,
Expand Down Expand Up @@ -1713,7 +1714,7 @@ def test_sdxl_0_9_lora_three(self):
release_memory(pipe)

def test_sdxl_1_0_lora(self):
generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
pipe.enable_model_cpu_offload()
Expand All @@ -1736,7 +1737,7 @@ def test_sdxl_lcm_lora(self):
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)

lora_model_id = "latent-consistency/lcm-lora-sdxl"

Expand All @@ -1753,7 +1754,8 @@ def test_sdxl_lcm_lora(self):
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)

self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2))
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4

pipe.unload_lora_weights()

Expand All @@ -1764,7 +1766,7 @@ def test_sdv1_5_lcm_lora(self):
pipe.to("cuda")
pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)

generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)

lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
Expand All @@ -1780,7 +1782,8 @@ def test_sdv1_5_lcm_lora(self):
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)

self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2))
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4

pipe.unload_lora_weights()

Expand All @@ -1795,7 +1798,7 @@ def test_sdv1_5_lcm_lora_img2img(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.png"
)

generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)

lora_model_id = "latent-consistency/lcm-lora-sdv1-5"
pipe.load_lora_weights(lora_model_id)
Expand All @@ -1816,7 +1819,8 @@ def test_sdv1_5_lcm_lora_img2img(self):
image_np = pipe.image_processor.pil_to_numpy(image)
expected_image_np = pipe.image_processor.pil_to_numpy(expected_image)

self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2))
max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten())
assert max_diff < 1e-4

pipe.unload_lora_weights()

Expand Down Expand Up @@ -1849,7 +1853,7 @@ def test_sdxl_1_0_lora_fusion(self):
release_memory(pipe)

def test_sdxl_1_0_lora_unfusion(self):
generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)

pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0")
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
Expand All @@ -1860,16 +1864,16 @@ def test_sdxl_1_0_lora_unfusion(self):
pipe.enable_model_cpu_offload()

images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_with_fusion = images[0, -3:, -3:, -1].flatten()
images_with_fusion = images.flatten()

pipe.unfuse_lora()
generator = torch.Generator().manual_seed(0)
generator = torch.Generator("cpu").manual_seed(0)
images = pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3
).images
images_without_fusion = images[0, -3:, -3:, -1].flatten()
images_without_fusion = images.flatten()

self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3))
release_memory(pipe)
Expand Down Expand Up @@ -1913,10 +1917,8 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):
lora_model_id = "hf-internal-testing/sdxl-1.0-lora"
lora_filename = "sd_xl_offset_example-lora_1.0.safetensors"

pipe = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()

start_time = time.time()
Expand All @@ -1929,19 +1931,17 @@ def test_sdxl_1_0_lora_fusion_efficiency(self):

del pipe

pipe = DiffusionPipeline.from_pretrained(
"stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16
)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16)
pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16)
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16)
pipe.fuse_lora()

# We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being
# silently deleted - otherwise this will CPU OOM
pipe.unload_lora_weights()

pipe.enable_model_cpu_offload()

start_time = time.time()
generator = torch.Generator().manual_seed(0)
start_time = time.time()
for _ in range(3):
pipe(
"masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2
Expand Down

0 comments on commit 69e39f2

Please sign in to comment.