diff --git a/tests/lora/test_lora_layers_old_backend.py b/tests/lora/test_lora_layers_old_backend.py index 19505a1d906d4..3d3b858fa0fdd 100644 --- a/tests/lora/test_lora_layers_old_backend.py +++ b/tests/lora/test_lora_layers_old_backend.py @@ -343,6 +343,21 @@ def test_stable_diffusion_attn_processors(self): image = sd_pipe(**inputs).images assert image.shape == (1, 64, 64, 3) + @unittest.skipIf(not torch.cuda.is_available() or not is_xformers_available(), reason="xformers requires cuda") + def test_stable_diffusion_set_xformers_attn_processors(self): + # disable_full_determinism() + device = "cuda" # ensure determinism for the device-dependent torch.Generator + components, _ = self.get_dummy_components() + sd_pipe = StableDiffusionPipeline(**components) + sd_pipe = sd_pipe.to(device) + sd_pipe.set_progress_bar_config(disable=None) + + _, _, inputs = self.get_dummy_inputs() + + # run normal sd pipe + image = sd_pipe(**inputs).images + assert image.shape == (1, 64, 64, 3) + # run lora xformers attention attn_processors, _ = create_unet_lora_layers(sd_pipe.unet) attn_processors = { @@ -607,7 +622,7 @@ def test_unload_lora_sd(self): orig_image_slice, orig_image_slice_two, atol=1e-3 ), "Unloading LoRA parameters should lead to results similar to what was obtained with the pipeline without any LoRA parameters." - @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") + @unittest.skipIf(torch_device != "cuda" or not is_xformers_available(), "This test is supposed to run on GPU") def test_lora_unet_attn_processors_with_xformers(self): with tempfile.TemporaryDirectory() as tmpdirname: self.create_lora_weight_file(tmpdirname) @@ -644,7 +659,7 @@ def test_lora_unet_attn_processors_with_xformers(self): if isinstance(module, Attention): self.assertIsInstance(module.processor, XFormersAttnProcessor) - @unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU") + @unittest.skipIf(torch_device != "cuda" or not is_xformers_available(), "This test is supposed to run on GPU") def test_lora_save_load_with_xformers(self): pipeline_components, lora_components = self.get_dummy_components() sd_pipe = StableDiffusionPipeline(**pipeline_components) @@ -2270,8 +2285,8 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): lora_model_id = "hf-internal-testing/sdxl-1.0-lora" lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16) pipe.enable_model_cpu_offload() start_time = time.time() @@ -2284,13 +2299,13 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): del pipe - pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16) pipe.fuse_lora() pipe.enable_model_cpu_offload() - start_time = time.time() generator = torch.Generator().manual_seed(0) + start_time = time.time() for _ in range(3): pipe( "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 diff --git a/tests/lora/test_lora_layers_peft.py b/tests/lora/test_lora_layers_peft.py index 48ae5d197273e..6d3ac8b4592a1 100644 --- a/tests/lora/test_lora_layers_peft.py +++ b/tests/lora/test_lora_layers_peft.py @@ -46,6 +46,7 @@ floats_tensor, load_image, nightly, + numpy_cosine_similarity_distance, require_peft_backend, require_torch_gpu, slow, @@ -1713,7 +1714,7 @@ def test_sdxl_0_9_lora_three(self): release_memory(pipe) def test_sdxl_1_0_lora(self): - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") pipe.enable_model_cpu_offload() @@ -1736,7 +1737,7 @@ def test_sdxl_lcm_lora(self): pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) pipe.enable_model_cpu_offload() - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) lora_model_id = "latent-consistency/lcm-lora-sdxl" @@ -1753,7 +1754,8 @@ def test_sdxl_lcm_lora(self): image_np = pipe.image_processor.pil_to_numpy(image) expected_image_np = pipe.image_processor.pil_to_numpy(expected_image) - self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2)) + max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten()) + assert max_diff < 1e-4 pipe.unload_lora_weights() @@ -1764,7 +1766,7 @@ def test_sdv1_5_lcm_lora(self): pipe.to("cuda") pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) lora_model_id = "latent-consistency/lcm-lora-sdv1-5" pipe.load_lora_weights(lora_model_id) @@ -1780,7 +1782,8 @@ def test_sdv1_5_lcm_lora(self): image_np = pipe.image_processor.pil_to_numpy(image) expected_image_np = pipe.image_processor.pil_to_numpy(expected_image) - self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2)) + max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten()) + assert max_diff < 1e-4 pipe.unload_lora_weights() @@ -1795,7 +1798,7 @@ def test_sdv1_5_lcm_lora_img2img(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/img2img/fantasy_landscape.png" ) - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) lora_model_id = "latent-consistency/lcm-lora-sdv1-5" pipe.load_lora_weights(lora_model_id) @@ -1816,7 +1819,8 @@ def test_sdv1_5_lcm_lora_img2img(self): image_np = pipe.image_processor.pil_to_numpy(image) expected_image_np = pipe.image_processor.pil_to_numpy(expected_image) - self.assertTrue(np.allclose(image_np, expected_image_np, atol=1e-2)) + max_diff = numpy_cosine_similarity_distance(image_np.flatten(), expected_image_np.flatten()) + assert max_diff < 1e-4 pipe.unload_lora_weights() @@ -1849,7 +1853,7 @@ def test_sdxl_1_0_lora_fusion(self): release_memory(pipe) def test_sdxl_1_0_lora_unfusion(self): - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0") lora_model_id = "hf-internal-testing/sdxl-1.0-lora" @@ -1860,16 +1864,16 @@ def test_sdxl_1_0_lora_unfusion(self): pipe.enable_model_cpu_offload() images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3 ).images - images_with_fusion = images[0, -3:, -3:, -1].flatten() + images_with_fusion = images.flatten() pipe.unfuse_lora() - generator = torch.Generator().manual_seed(0) + generator = torch.Generator("cpu").manual_seed(0) images = pipe( - "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2 + "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=3 ).images - images_without_fusion = images[0, -3:, -3:, -1].flatten() + images_without_fusion = images.flatten() self.assertTrue(np.allclose(images_with_fusion, images_without_fusion, atol=1e-3)) release_memory(pipe) @@ -1913,10 +1917,8 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): lora_model_id = "hf-internal-testing/sdxl-1.0-lora" lora_filename = "sd_xl_offset_example-lora_1.0.safetensors" - pipe = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 - ) - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16) pipe.enable_model_cpu_offload() start_time = time.time() @@ -1929,19 +1931,17 @@ def test_sdxl_1_0_lora_fusion_efficiency(self): del pipe - pipe = DiffusionPipeline.from_pretrained( - "stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.bfloat16 - ) - pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.bfloat16) + pipe = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch.float16) + pipe.load_lora_weights(lora_model_id, weight_name=lora_filename, torch_dtype=torch.float16) pipe.fuse_lora() + # We need to unload the lora weights since in the previous API `fuse_lora` led to lora weights being # silently deleted - otherwise this will CPU OOM pipe.unload_lora_weights() - pipe.enable_model_cpu_offload() - start_time = time.time() generator = torch.Generator().manual_seed(0) + start_time = time.time() for _ in range(3): pipe( "masterpiece, best quality, mountain", output_type="np", generator=generator, num_inference_steps=2