From d5991456ca4b36cb72d846741c3c287990151fd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=2E=20Tolga=20Cang=C3=B6z?= Date: Thu, 2 Nov 2023 16:22:35 +0300 Subject: [PATCH] Fix typos, improve, update; better visualization --- .../en/training/distributed_inference.md | 15 +++++++++- .../en/using-diffusers/control_brightness.md | 15 +++++----- docs/source/en/using-diffusers/freeu.md | 14 ++++----- .../en/using-diffusers/reusing_seeds.md | 30 +++++++++---------- .../textual_inversion_inference.md | 28 +++++------------ .../en/using-diffusers/weighted_prompts.md | 12 ++++++-- 6 files changed, 60 insertions(+), 54 deletions(-) diff --git a/docs/source/en/training/distributed_inference.md b/docs/source/en/training/distributed_inference.md index 99c6acfe8d96..72bb5f5fd7fe 100644 --- a/docs/source/en/training/distributed_inference.md +++ b/docs/source/en/training/distributed_inference.md @@ -1,3 +1,15 @@ + + # Distributed inference with multiple GPUs On distributed setups, you can run inference across multiple GPUs with 🤗 [Accelerate](https://huggingface.co/docs/accelerate/index) or [PyTorch Distributed](https://pytorch.org/tutorials/beginner/dist_overview.html), which is useful for generating with multiple prompts in parallel. @@ -13,6 +25,7 @@ To begin, create a Python file and initialize an [`accelerate.PartialState`] to Now use the [`~accelerate.PartialState.split_between_processes`] utility as a context manager to automatically distribute the prompts between the number of processes. ```py +import torch from accelerate import PartialState from diffusers import DiffusionPipeline @@ -92,4 +105,4 @@ Once you've completed the inference script, use the `--nproc_per_node` argument ```bash torchrun run_distributed.py --nproc_per_node=2 -``` \ No newline at end of file +``` diff --git a/docs/source/en/using-diffusers/control_brightness.md b/docs/source/en/using-diffusers/control_brightness.md index 17c107ba57b8..c5f9870776dc 100644 --- a/docs/source/en/using-diffusers/control_brightness.md +++ b/docs/source/en/using-diffusers/control_brightness.md @@ -34,15 +34,15 @@ Next, configure the following parameters in the [`DDIMScheduler`]: 2. `timestep_spacing="trailing"`, starts sampling from the last timestep ```py ->>> from diffusers import DiffusionPipeline, DDIMScheduler +from diffusers import DiffusionPipeline, DDIMScheduler ->>> pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True) -# switch the scheduler in the pipeline to use the DDIMScheduler +pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True) ->>> pipeline.scheduler = DDIMScheduler.from_config( -... pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing" -... ) ->>> pipeline.to("cuda") +# switch the scheduler in the pipeline to use the DDIMScheduler +pipeline.scheduler = DDIMScheduler.from_config( + pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing" +) +pipeline.to("cuda") ``` Finally, in your call to the pipeline, set `guidance_rescale` to prevent overexposure: @@ -50,6 +50,7 @@ Finally, in your call to the pipeline, set `guidance_rescale` to prevent overexp ```py prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k" image = pipeline(prompt, guidance_rescale=0.7).images[0] +image ```
diff --git a/docs/source/en/using-diffusers/freeu.md b/docs/source/en/using-diffusers/freeu.md index 4f3c64096705..c5f3577ae3aa 100644 --- a/docs/source/en/using-diffusers/freeu.md +++ b/docs/source/en/using-diffusers/freeu.md @@ -23,7 +23,7 @@ However, the skip connection can sometimes introduce unnatural image details. [F FreeU is applied during inference and it does not require any additional training. The technique works for different tasks such as text-to-image, image-to-image, and text-to-video. -In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`]. +In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`]. You need to install Diffusers from source to run the examples below. ## StableDiffusionPipeline @@ -58,6 +58,7 @@ And then run inference: prompt = "A squirrel eating a burger" seed = 2023 image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0] +image ``` The figure below compares non-FreeU and FreeU results respectively for the same hyperparameters used above (`prompt` and `seed`): @@ -80,9 +81,9 @@ seed = 2023 pipeline.enable_freeu(s1=0.9, s2=0.2, b1=1.1, b2=1.2) image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0] +image ``` - ![](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/freeu/sdv2_1_freeu.jpg) ## Stable Diffusion XL @@ -100,13 +101,13 @@ pipeline = DiffusionPipeline.from_pretrained( prompt = "A squirrel eating a burger" seed = 2023 -# Comes from +# Comes from # https://wandb.ai/nasirk24/UNET-FreeU-SDXL/reports/FreeU-SDXL-Optimal-Parameters--Vmlldzo1NDg4NTUw pipeline.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2) image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0] +image ``` - ![](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/freeu/sdxl_freeu.jpg) ## Text-to-video generation @@ -119,8 +120,7 @@ from diffusers.utils import export_to_video import torch model_id = "cerspense/zeroscope_v2_576w" -pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16).to("cuda") -pipe = pipe.to("cuda") +pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") prompt = "an astronaut riding a horse on mars" seed = 2023 @@ -132,4 +132,4 @@ video_frames = pipe(prompt, height=320, width=576, num_frames=30, generator=torc export_to_video(video_frames, "astronaut_rides_horse.mp4") ``` -Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions. \ No newline at end of file +Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions. diff --git a/docs/source/en/using-diffusers/reusing_seeds.md b/docs/source/en/using-diffusers/reusing_seeds.md index 7cbaf2643202..d2638b469e30 100644 --- a/docs/source/en/using-diffusers/reusing_seeds.md +++ b/docs/source/en/using-diffusers/reusing_seeds.md @@ -16,7 +16,7 @@ specific language governing permissions and limitations under the License. A common way to improve the quality of generated images is with *deterministic batch generation*, generate a batch of images and select one image to improve with a more detailed prompt in a second round of inference. The key is to pass a list of [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html#generator)'s to the pipeline for batched image generation, and tie each `Generator` to a seed so you can reuse it for an image. -Let's use [`runwayml/stable-diffusion-v1-5`](runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt: +Let's use [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt: ```py prompt = "Labrador in the style of Vermeer" @@ -25,27 +25,27 @@ prompt = "Labrador in the style of Vermeer" Instantiate a pipeline with [`DiffusionPipeline.from_pretrained`] and place it on a GPU (if available): ```python ->>> from diffusers import DiffusionPipeline - ->>> pipe = DiffusionPipeline.from_pretrained( -... "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True -... ) ->>> pipe = pipe.to("cuda") +import torch +from diffusers import DiffusionPipeline +from diffusers.utils import make_image_grid + +pipe = DiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True +) +pipe = pipe.to("cuda") ``` -Now, define four different `Generator`'s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image: +Now, define four different `Generator`s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image: ```python ->>> import torch - ->>> generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)] +generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)] ``` Generate the images and have a look: ```python ->>> images = pipe(prompt, generator=generator, num_images_per_prompt=4).images ->>> images +images = pipe(prompt, generator=generator, num_images_per_prompt=4).images +make_image_grid(images, rows=2, cols=2) ``` ![img](https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/reusabe_seeds.jpg) @@ -60,8 +60,8 @@ generator = [torch.Generator(device="cuda").manual_seed(0) for i in range(4)] Create four generators with seed `0`, and generate another batch of images, all of which should look like the first image from the previous round! ```python ->>> images = pipe(prompt, generator=generator).images ->>> images +images = pipe(prompt, generator=generator).images +make_image_grid(images, rows=2, cols=2) ``` ![img](https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/reusabe_seeds_2.jpg) diff --git a/docs/source/en/using-diffusers/textual_inversion_inference.md b/docs/source/en/using-diffusers/textual_inversion_inference.md index 6e690c62f76a..7583dee63e3b 100644 --- a/docs/source/en/using-diffusers/textual_inversion_inference.md +++ b/docs/source/en/using-diffusers/textual_inversion_inference.md @@ -18,26 +18,12 @@ The [`StableDiffusionPipeline`] supports textual inversion, a technique that ena This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you're interested in teaching a model new concepts with textual inversion, take a look at the [Textual Inversion](../training/text_inversion) training guide. -Login to your Hugging Face account: - -```py -from huggingface_hub import notebook_login - -notebook_login() -``` - Import the necessary libraries: ```py -import os import torch - -import PIL -from PIL import Image - from diffusers import StableDiffusionPipeline from diffusers.utils import make_image_grid -from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer ``` ## Stable Diffusion 1 and 2 @@ -64,7 +50,7 @@ Create a prompt with the pre-learned concept by using the special placeholder to ```py prompt = "a grafitti in a favela wall with a on it" -num_samples = 2 +num_samples_per_row = 2 num_rows = 2 ``` @@ -73,10 +59,10 @@ Then run the pipeline (feel free to adjust the parameters like `num_inference_st ```py all_images = [] for _ in range(num_rows): - images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images + images = pipeline(prompt, num_images_per_prompt=num_samples_per_row, num_inference_steps=50, guidance_scale=7.5).images all_images.extend(images) -grid = make_image_grid(all_images, num_samples, num_rows) +grid = make_image_grid(all_images, num_rows, num_samples_per_row) grid ``` @@ -84,7 +70,6 @@ grid
- ## Stable Diffusion XL Stable Diffusion XL (SDXL) can also use textual inversion vectors for inference. In contrast to Stable Diffusion 1 and 2, SDXL has two text encoders so you'll need two textual inversion embeddings - one for each text encoder model. @@ -109,9 +94,9 @@ state_dict [ 0.0475, -0.0508, -0.0145, ..., 0.0070, -0.0089, -0.0163]], ``` -There are two tensors, `"clip-g"` and `"clip-l"`. -`"clip-g"` corresponds to the bigger text encoder in SDXL and refers to -`pipe.text_encoder_2` and `"clip-l"` refers to `pipe.text_encoder`. +There are two tensors, `"clip_g"` and `"clip_l"`. +`"clip_g"` corresponds to the bigger text encoder in SDXL and refers to +`pipe.text_encoder_2` and `"clip_l"` refers to `pipe.text_encoder`. Now you can load each tensor separately by passing them along with the correct text encoder and tokenizer to [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`]: @@ -129,4 +114,5 @@ pipe.load_textual_inversion(state_dict["clip_l"], token="unaestheticXLv31", text # the embedding should be used as a negative embedding, so we pass it as a negative prompt generator = torch.Generator().manual_seed(33) image = pipe("a woman standing in front of a mountain", negative_prompt="unaestheticXLv31", generator=generator).images[0] +image ``` diff --git a/docs/source/en/using-diffusers/weighted_prompts.md b/docs/source/en/using-diffusers/weighted_prompts.md index ede2c7f35169..5007d235ae99 100644 --- a/docs/source/en/using-diffusers/weighted_prompts.md +++ b/docs/source/en/using-diffusers/weighted_prompts.md @@ -41,6 +41,7 @@ import torch pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_safetensors=True) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) +pipe.to("cuda") prompt = "a red cat playing with a ball" @@ -165,7 +166,9 @@ import torch from diffusers import StableDiffusionPipeline from compel import Compel, DiffusersTextualInversionManager -pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda") +pipe = StableDiffusionPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, + use_safetensors=True, variant="fp16").to("cuda") pipe.load_textual_inversion("sd-concepts-library/midjourney-style") ``` @@ -173,7 +176,7 @@ Compel provides a `DiffusersTextualInversionManager` class to simplify prompt we ```py textual_inversion_manager = DiffusersTextualInversionManager(pipe) -compel = Compel( +compel_proc = Compel( tokenizer=pipe.tokenizer, text_encoder=pipe.text_encoder, textual_inversion_manager=textual_inversion_manager) @@ -225,6 +228,8 @@ Stable Diffusion XL (SDXL) has two tokenizers and text encoders so it's usage is ```py from compel import Compel, ReturnedEmbeddingsType from diffusers import DiffusionPipeline +from diffusers.utils import make_image_grid +import torch pipeline = DiffusionPipeline.from_pretrained( "stabilityai/stable-diffusion-xl-base-1.0", @@ -251,6 +256,7 @@ conditioning, pooled = compel(prompt) # generate image generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))] images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images +make_image_grid(images, rows=1, cols=2) ```
@@ -262,4 +268,4 @@ images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, gener
"a red cat playing with a (ball)0.6"
- \ No newline at end of file +