Fix typos, improve, update; better visualization

huggingface · Nov 2, 2023 · d599145 · d599145
1 parent 442017c
commit d599145
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 54 deletions.
diff --git a/docs/source/en/training/distributed_inference.md b/docs/source/en/training/distributed_inference.md
@@ -1,3 +1,15 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+-->
+
 # Distributed inference with multiple GPUs
 
 On distributed setups, you can run inference across multiple GPUs with 🤗 [Accelerate](https://huggingface.co/docs/accelerate/index) or [PyTorch Distributed](https://pytorch.org/tutorials/beginner/dist_overview.html), which is useful for generating with multiple prompts in parallel.
@@ -13,6 +25,7 @@ To begin, create a Python file and initialize an [`accelerate.PartialState`] to
 Now use the [`~accelerate.PartialState.split_between_processes`] utility as a context manager to automatically distribute the prompts between the number of processes.
 
 ```py
+import torch
 from accelerate import PartialState
 from diffusers import DiffusionPipeline
 
@@ -92,4 +105,4 @@ Once you've completed the inference script, use the `--nproc_per_node` argument
 
 ```bash
 torchrun run_distributed.py --nproc_per_node=2
-```
+```
diff --git a/docs/source/en/using-diffusers/control_brightness.md b/docs/source/en/using-diffusers/control_brightness.md
@@ -34,22 +34,23 @@ Next, configure the following parameters in the [`DDIMScheduler`]:
 2. `timestep_spacing="trailing"`, starts sampling from the last timestep
 
 ```py
->>> from diffusers import DiffusionPipeline, DDIMScheduler
+from diffusers import DiffusionPipeline, DDIMScheduler
 
->>> pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True)
-# switch the scheduler in the pipeline to use the DDIMScheduler
+pipeline = DiffusionPipeline.from_pretrained("ptx0/pseudo-journey-v2", use_safetensors=True)
 
->>> pipeline.scheduler = DDIMScheduler.from_config(
-...     pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
-... )
->>> pipeline.to("cuda")
+# switch the scheduler in the pipeline to use the DDIMScheduler
+pipeline.scheduler = DDIMScheduler.from_config(
+    pipeline.scheduler.config, rescale_betas_zero_snr=True, timestep_spacing="trailing"
+)
+pipeline.to("cuda")
 ```
 
 Finally, in your call to the pipeline, set `guidance_rescale` to prevent overexposure:
 
 ```py
 prompt = "A lion in galaxies, spirals, nebulae, stars, smoke, iridescent, intricate detail, octane render, 8k"
 image = pipeline(prompt, guidance_rescale=0.7).images[0]
+image
 ```
 
 <div class="flex justify-center">

diff --git a/docs/source/en/using-diffusers/freeu.md b/docs/source/en/using-diffusers/freeu.md
@@ -23,7 +23,7 @@ However, the skip connection can sometimes introduce unnatural image details. [F
 
 FreeU is applied during inference and it does not require any additional training. The technique works for different tasks such as text-to-image, image-to-image, and text-to-video.
 
-In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`].
+In this guide, you will apply FreeU to the [`StableDiffusionPipeline`], [`StableDiffusionXLPipeline`], and [`TextToVideoSDPipeline`]. You need to install Diffusers from source to run the examples below.
 
 ## StableDiffusionPipeline
 
@@ -58,6 +58,7 @@ And then run inference:
 prompt = "A squirrel eating a burger"
 seed = 2023
 image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
 ```
 
 The figure below compares non-FreeU and FreeU results respectively for the same hyperparameters used above (`prompt` and `seed`):
@@ -80,9 +81,9 @@ seed = 2023
 
 pipeline.enable_freeu(s1=0.9, s2=0.2, b1=1.1, b2=1.2)
 image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
 ```
 
-
 ![](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/freeu/sdv2_1_freeu.jpg)
 
 ## Stable Diffusion XL
@@ -100,13 +101,13 @@ pipeline = DiffusionPipeline.from_pretrained(
 prompt = "A squirrel eating a burger"
 seed = 2023
 
-# Comes from 
+# Comes from
 # https://wandb.ai/nasirk24/UNET-FreeU-SDXL/reports/FreeU-SDXL-Optimal-Parameters--Vmlldzo1NDg4NTUw
 pipeline.enable_freeu(s1=0.6, s2=0.4, b1=1.1, b2=1.2)
 image = pipeline(prompt, generator=torch.manual_seed(seed)).images[0]
+image
 ```
 
-
 ![](https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/freeu/sdxl_freeu.jpg)
 
 ## Text-to-video generation
@@ -119,8 +120,7 @@ from diffusers.utils import export_to_video
 import torch
 
 model_id = "cerspense/zeroscope_v2_576w"
-pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16).to("cuda")
-pipe = pipe.to("cuda")
+pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda")
 
 prompt = "an astronaut riding a horse on mars"
 seed = 2023
@@ -132,4 +132,4 @@ video_frames = pipe(prompt, height=320, width=576, num_frames=30, generator=torc
 export_to_video(video_frames, "astronaut_rides_horse.mp4")
 ```
 
-Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions.
+Thanks to [kadirnar](https://github.com/kadirnar/) for helping to integrate the feature, and to [justindujardin](https://github.com/justindujardin) for the helpful discussions.
diff --git a/docs/source/en/using-diffusers/reusing_seeds.md b/docs/source/en/using-diffusers/reusing_seeds.md
@@ -16,7 +16,7 @@ specific language governing permissions and limitations under the License.
 
 A common way to improve the quality of generated images is with *deterministic batch generation*, generate a batch of images and select one image to improve with a more detailed prompt in a second round of inference. The key is to pass a list of [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html#generator)'s to the pipeline for batched image generation, and tie each `Generator` to a seed so you can reuse it for an image.
 
-Let's use [`runwayml/stable-diffusion-v1-5`](runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt:
+Let's use [`runwayml/stable-diffusion-v1-5`](https://huggingface.co/runwayml/stable-diffusion-v1-5) for example, and generate several versions of the following prompt:
 
 ```py
 prompt = "Labrador in the style of Vermeer"
@@ -25,27 +25,27 @@ prompt = "Labrador in the style of Vermeer"
 Instantiate a pipeline with [`DiffusionPipeline.from_pretrained`] and place it on a GPU (if available):
 
 ```python
->>> from diffusers import DiffusionPipeline
-
->>> pipe = DiffusionPipeline.from_pretrained(
-...     "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True
-... )
->>> pipe = pipe.to("cuda")
+import torch
+from diffusers import DiffusionPipeline
+from diffusers.utils import make_image_grid
+
+pipe = DiffusionPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True
+)
+pipe = pipe.to("cuda")
 ```
 
-Now, define four different `Generator`'s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image:
+Now, define four different `Generator`s and assign each `Generator` a seed (`0` to `3`) so you can reuse a `Generator` later for a specific image:
 
 ```python
->>> import torch
-
->>> generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)]
+generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(4)]
 ```
 
 Generate the images and have a look:
 
 ```python
->>> images = pipe(prompt, generator=generator, num_images_per_prompt=4).images
->>> images
+images = pipe(prompt, generator=generator, num_images_per_prompt=4).images
+make_image_grid(images, rows=2, cols=2)
 ```
 
 ![img](https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/reusabe_seeds.jpg)
@@ -60,8 +60,8 @@ generator = [torch.Generator(device="cuda").manual_seed(0) for i in range(4)]
 Create four generators with seed `0`, and generate another batch of images, all of which should look like the first image from the previous round!
 
 ```python
->>> images = pipe(prompt, generator=generator).images
->>> images
+images = pipe(prompt, generator=generator).images
+make_image_grid(images, rows=2, cols=2)
 ```
 
 ![img](https://huggingface.co/datasets/diffusers/diffusers-images-docs/resolve/main/reusabe_seeds_2.jpg)
diff --git a/docs/source/en/using-diffusers/textual_inversion_inference.md b/docs/source/en/using-diffusers/textual_inversion_inference.md
@@ -18,26 +18,12 @@ The [`StableDiffusionPipeline`] supports textual inversion, a technique that ena
 
 This guide will show you how to run inference with textual inversion using a pre-learned concept from the Stable Diffusion Conceptualizer. If you're interested in teaching a model new concepts with textual inversion, take a look at the [Textual Inversion](../training/text_inversion) training guide.
 
-Login to your Hugging Face account:
-
-```py
-from huggingface_hub import notebook_login
-
-notebook_login()
-```
-
 Import the necessary libraries:
 
 ```py
-import os
 import torch
-
-import PIL
-from PIL import Image
-
 from diffusers import StableDiffusionPipeline
 from diffusers.utils import make_image_grid
-from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
 ```
 
 ## Stable Diffusion 1 and 2
@@ -64,7 +50,7 @@ Create a prompt with the pre-learned concept by using the special placeholder to
 ```py
 prompt = "a grafitti in a favela wall with a <cat-toy> on it"
 
-num_samples = 2
+num_samples_per_row = 2
 num_rows = 2
 ```
 
@@ -73,18 +59,17 @@ Then run the pipeline (feel free to adjust the parameters like `num_inference_st
 ```py
 all_images = []
 for _ in range(num_rows):
-    images = pipe(prompt, num_images_per_prompt=num_samples, num_inference_steps=50, guidance_scale=7.5).images
+    images = pipeline(prompt, num_images_per_prompt=num_samples_per_row, num_inference_steps=50, guidance_scale=7.5).images
     all_images.extend(images)
 
-grid = make_image_grid(all_images, num_samples, num_rows)
+grid = make_image_grid(all_images, num_rows, num_samples_per_row)
 grid
 ```
 
 <div class="flex justify-center">
     <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/textual_inversion_inference.png">
 </div>
 
-
 ## Stable Diffusion XL
 
 Stable Diffusion XL (SDXL) can also use textual inversion vectors for inference. In contrast to Stable Diffusion 1 and 2, SDXL has two text encoders so you'll need two textual inversion embeddings - one for each text encoder model.
@@ -109,9 +94,9 @@ state_dict
          [ 0.0475, -0.0508, -0.0145,  ...,  0.0070, -0.0089, -0.0163]],
 ```
 
-There are two tensors, `"clip-g"` and `"clip-l"`.
-`"clip-g"` corresponds to the bigger text encoder in SDXL and refers to 
-`pipe.text_encoder_2` and `"clip-l"` refers to `pipe.text_encoder`.
+There are two tensors, `"clip_g"` and `"clip_l"`.
+`"clip_g"` corresponds to the bigger text encoder in SDXL and refers to 
+`pipe.text_encoder_2` and `"clip_l"` refers to `pipe.text_encoder`.
 
 Now you can load each tensor separately by passing them along with the correct text encoder and tokenizer
 to [`~loaders.TextualInversionLoaderMixin.load_textual_inversion`]:
@@ -129,4 +114,5 @@ pipe.load_textual_inversion(state_dict["clip_l"], token="unaestheticXLv31", text
 # the embedding should be used as a negative embedding, so we pass it as a negative prompt
 generator = torch.Generator().manual_seed(33)
 image = pipe("a woman standing in front of a mountain", negative_prompt="unaestheticXLv31", generator=generator).images[0]
+image
 ```
diff --git a/docs/source/en/using-diffusers/weighted_prompts.md b/docs/source/en/using-diffusers/weighted_prompts.md
@@ -41,6 +41,7 @@ import torch
 
 pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", use_safetensors=True)
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
+pipe.to("cuda")
 
 prompt = "a red cat playing with a ball"
 
@@ -165,15 +166,17 @@ import torch
 from diffusers import StableDiffusionPipeline
 from compel import Compel, DiffusersTextualInversionManager
 
-pipe = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to("cuda")
+pipe = StableDiffusionPipeline.from_pretrained(
+  "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16,
+  use_safetensors=True, variant="fp16").to("cuda")
 pipe.load_textual_inversion("sd-concepts-library/midjourney-style")
 ```
 
 Compel provides a `DiffusersTextualInversionManager` class to simplify prompt weighting with textual inversion. Instantiate `DiffusersTextualInversionManager` and pass it to the `Compel` class:
 
 ```py
 textual_inversion_manager = DiffusersTextualInversionManager(pipe)
-compel = Compel(
+compel_proc = Compel(
     tokenizer=pipe.tokenizer,
     text_encoder=pipe.text_encoder,
     textual_inversion_manager=textual_inversion_manager)
@@ -225,6 +228,8 @@ Stable Diffusion XL (SDXL) has two tokenizers and text encoders so it's usage is
 ```py
 from compel import Compel, ReturnedEmbeddingsType
 from diffusers import DiffusionPipeline
+from diffusers.utils import make_image_grid
+import torch
 
 pipeline = DiffusionPipeline.from_pretrained(
   "stabilityai/stable-diffusion-xl-base-1.0",
@@ -251,6 +256,7 @@ conditioning, pooled = compel(prompt)
 # generate image
 generator = [torch.Generator().manual_seed(33) for _ in range(len(prompt))]
 images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, generator=generator, num_inference_steps=30).images
+make_image_grid(images, rows=1, cols=2)
 ```
 
 <div class="flex gap-4">
@@ -262,4 +268,4 @@ images = pipeline(prompt_embeds=conditioning, pooled_prompt_embeds=pooled, gener
     <img class="rounded-xl" src="https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/compel/sdxl_ball2.png"/>
     <figcaption class="mt-2 text-center text-sm text-gray-500">"a red cat playing with a (ball)0.6"</figcaption>
   </div>
-</div>
+</div>