From f79bebed8698a77b795511a438db7d9d8dbf0950 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 5 Nov 2022 09:49:13 -0700 Subject: [PATCH 001/199] =?UTF-8?q?lint(ldm.invoke.generator):=20?= =?UTF-8?q?=F0=9F=9A=AE=20remove=20unused=20imports?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ldm/invoke/generator/base.py | 24 ++++++++++---------- ldm/invoke/generator/embiggen.py | 14 +++++++----- ldm/invoke/generator/img2img.py | 10 ++++----- ldm/invoke/generator/inpaint.py | 34 ++++++++++++++--------------- ldm/invoke/generator/omnibus.py | 16 ++++++-------- ldm/invoke/generator/txt2img.py | 3 +-- ldm/invoke/generator/txt2img2img.py | 11 +++++----- 7 files changed, 57 insertions(+), 55 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index ba3172e9dc8..1e303530664 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -2,17 +2,19 @@ Base class for ldm.invoke.generator.* including img2img, txt2img, and inpaint ''' -import torch -import numpy as np -import random import os import os.path as osp +import random import traceback -from tqdm import tqdm, trange + +import numpy as np +import torch from PIL import Image, ImageFilter, ImageChops import cv2 as cv -from einops import rearrange, repeat +from einops import rearrange from pytorch_lightning import seed_everything +from tqdm import trange + from ldm.invoke.devices import choose_autocast from ldm.util import rand_perlin_2d @@ -103,7 +105,7 @@ def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None seed = self.new_seed() return results - + def sample_to_image(self,samples)->Image.Image: """ Given samples returned from a sampler, converts @@ -166,12 +168,12 @@ def repaste_and_color_correct(self, result: Image.Image, init_image: Image.Image blurred_init_mask = pil_init_mask multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, self.pil_image.split()[-1]) - + # Paste original on color-corrected generation (using blurred mask) matched_result.paste(init_image, (0,0), mask = multiplied_blurred_init_mask) return matched_result - + def sample_to_lowres_estimated_image(self,samples): # origingally adapted from code by @erucipe and @keturn here: @@ -219,11 +221,11 @@ def get_noise(self,width,height): (txt2img) or from the latent image (img2img, inpaint) """ raise NotImplementedError("get_noise() must be implemented in a descendent class") - + def get_perlin_noise(self,width,height): fixdevice = 'cpu' if (self.model.device.type == 'mps') else self.model.device return torch.stack([rand_perlin_2d((height, width), (8, 8), device = self.model.device).to(fixdevice) for _ in range(self.latent_channels)], dim=0).to(self.model.device) - + def new_seed(self): self.seed = random.randrange(0, np.iinfo(np.uint32).max) return self.seed @@ -325,4 +327,4 @@ def save_sample(self, sample, filepath): os.makedirs(dirname, exist_ok=True) image.save(filepath,'PNG') - + diff --git a/ldm/invoke/generator/embiggen.py b/ldm/invoke/generator/embiggen.py index dc6af35a6c9..0b9fda7ac29 100644 --- a/ldm/invoke/generator/embiggen.py +++ b/ldm/invoke/generator/embiggen.py @@ -3,14 +3,16 @@ and generates with ldm.invoke.generator.img2img ''' +import numpy as np import torch -import numpy as np +from PIL import Image from tqdm import trange -from PIL import Image -from ldm.invoke.generator.base import Generator -from ldm.invoke.generator.img2img import Img2Img + from ldm.invoke.devices import choose_autocast -from ldm.models.diffusion.ddim import DDIMSampler +from ldm.invoke.generator.base import Generator +from ldm.invoke.generator.img2img import Img2Img +from ldm.models.diffusion.ddim import DDIMSampler + class Embiggen(Generator): def __init__(self, model, precision): @@ -493,7 +495,7 @@ def make_image(): # Layer tile onto final image outputsuperimage.alpha_composite(intileimage, (left, top)) else: - print(f'Error: could not find all Embiggen output tiles in memory? Something must have gone wrong with img2img generation.') + print('Error: could not find all Embiggen output tiles in memory? Something must have gone wrong with img2img generation.') # after internal loops and patching up return Embiggen image return outputsuperimage diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 1981b4eacb6..edcc855a290 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -2,15 +2,15 @@ ldm.invoke.generator.img2img descends from ldm.invoke.generator ''' -import torch -import numpy as np import PIL -from torch import Tensor +import numpy as np +import torch from PIL import Image +from torch import Tensor + from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import Generator -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent + class Img2Img(Generator): def __init__(self, model, precision): diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 7798ed00c5e..02bac7c999b 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -3,21 +3,21 @@ ''' import math -import torch -import torchvision.transforms as T -import numpy as np -import cv2 as cv + import PIL +import cv2 as cv +import numpy as np +import torch from PIL import Image, ImageFilter, ImageOps, ImageChops -from skimage.exposure.histogram_matching import match_histograms -from einops import rearrange, repeat -from ldm.invoke.devices import choose_autocast -from ldm.invoke.generator.img2img import Img2Img -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.ksampler import KSampler +from einops import repeat + +from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import downsampling -from ldm.util import debug_image +from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.globals import Globals +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.ksampler import KSampler +from ldm.util import debug_image infill_methods: list[str] = list() @@ -59,7 +59,7 @@ def get_tile_images(self, image: np.ndarray, width=8, height=8): writeable=False ) - def infill_patchmatch(self, im: Image.Image) -> Image: + def infill_patchmatch(self, im: Image.Image) -> Image: if im.mode != 'RGBA': return im @@ -128,7 +128,7 @@ def mask_edge(self, mask: Image, edge_size: int, edge_blur: int) -> Image: # Combine npmask = npgradient + npedge - # Expand + # Expand npmask = cv.dilate(npmask, np.ones((3,3), np.uint8), iterations = int(edge_size / 2)) new_mask = Image.fromarray(npmask) @@ -221,7 +221,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, init_filled = init_filled.resize((inpaint_width, inpaint_height)) debug_image(init_filled, "init_filled", debug_status=self.enable_image_debugging) - + # Create init tensor init_image = self._image_to_tensor(init_filled.convert('RGB')) @@ -251,10 +251,10 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, # klms samplers not supported yet, so ignore previous sampler if isinstance(sampler,KSampler): print( - f">> Using recommended DDIM sampler for inpainting." + ">> Using recommended DDIM sampler for inpainting." ) sampler = DDIMSampler(self.model, device=self.model.device) - + sampler.make_schedule( ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False ) @@ -353,7 +353,7 @@ def sample_to_image(self, samples)->Image.Image: if self.pil_image is None or self.pil_mask is None: return gen_result - + corrected_result = super().repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) debug_image(corrected_result, "corrected_result", debug_status=self.enable_image_debugging) diff --git a/ldm/invoke/generator/omnibus.py b/ldm/invoke/generator/omnibus.py index 277e8970210..3a17062d013 100644 --- a/ldm/invoke/generator/omnibus.py +++ b/ldm/invoke/generator/omnibus.py @@ -1,14 +1,14 @@ """omnibus module to be used with the runwayml 9-channel custom inpainting model""" import torch -import numpy as np +from PIL import Image, ImageOps from einops import repeat -from PIL import Image, ImageOps, ImageChops + from ldm.invoke.devices import choose_autocast -from ldm.invoke.generator.base import downsampling from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.generator.txt2img import Txt2Img + class Omnibus(Img2Img,Txt2Img): def __init__(self, model, precision): super().__init__(model, precision) @@ -58,11 +58,9 @@ def get_make_image( self.mask_blur_radius = mask_blur_radius - t_enc = steps - if init_image is not None and mask_image is not None: # inpainting masked_image = init_image * (1 - mask_image) # masked image is the image masked by mask - masked regions zero - + elif init_image is not None: # img2img scope = choose_autocast(self.precision) @@ -99,7 +97,7 @@ def make_image(x_T): device=model.device, num_samples=num_samples, ) - + c = model.cond_stage_model.encode(batch["txt"]) c_cat = list() for ck in model.concat_keys: @@ -164,10 +162,10 @@ def get_noise(self, width:int, height:int): def sample_to_image(self, samples)->Image.Image: gen_result = super().sample_to_image(samples).convert('RGB') - + if self.pil_image is None or self.pil_mask is None: return gen_result corrected_result = super(Img2Img, self).repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) - + return corrected_result diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index ba49d2ef558..a04207259b8 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -3,9 +3,8 @@ ''' import torch -import numpy as np + from ldm.invoke.generator.base import Generator -from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent class Txt2Img(Generator): diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 759ba2dba4e..3da42ebb8af 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -2,14 +2,15 @@ ldm.invoke.generator.txt2img inherits from ldm.invoke.generator ''' -import torch -import numpy as np import math + +import torch +from PIL import Image + from ldm.invoke.generator.base import Generator -from ldm.models.diffusion.ddim import DDIMSampler from ldm.invoke.generator.omnibus import Omnibus -from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent -from PIL import Image +from ldm.models.diffusion.ddim import DDIMSampler + class Txt2Img2Img(Generator): def __init__(self, model, precision): From 1d43512d64b94db261db49287d516c392e815df0 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 10:08:49 -0800 Subject: [PATCH 002/199] initial commit of DiffusionPipeline class --- ldm/invoke/generator/diffusers_pipeline.py | 325 +++++++++++++++++++++ 1 file changed, 325 insertions(+) create mode 100644 ldm/invoke/generator/diffusers_pipeline.py diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py new file mode 100644 index 00000000000..b13f85b645e --- /dev/null +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -0,0 +1,325 @@ +import secrets +from dataclasses import dataclass +from typing import List, Optional, Union + +import torch +from diffusers.models import AutoencoderKL, UNet2DConditionModel +from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer + + +@dataclass +class PipelineIntermediateState: + run_id: str + step: int + timestep: int + latents: torch.Tensor + predicted_original: Optional[torch.Tensor] = None + + +class StableDiffusionGeneratorPipeline(DiffusionPipeline): + r""" + Pipeline for text-to-image generation using Stable Diffusion. + + This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the + library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.) + + Implementation note: This class started as a refactored copy of diffusers.StableDiffusionPipeline. + Hopefully future versions of diffusers provide access to more of these functions so that we don't + need to duplicate them here: https://github.com/huggingface/diffusers/issues/551#issuecomment-1281508384 + + Args: + vae ([`AutoencoderKL`]): + Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. + text_encoder ([`CLIPTextModel`]): + Frozen text-encoder. Stable Diffusion uses the text portion of + [CLIP](https://huggingface.co/docs/transformers/model_doc/clip#transformers.CLIPTextModel), specifically + the [clip-vit-large-patch14](https://huggingface.co/openai/clip-vit-large-patch14) variant. + tokenizer (`CLIPTokenizer`): + Tokenizer of class + [CLIPTokenizer](https://huggingface.co/docs/transformers/v4.21.0/en/model_doc/clip#transformers.CLIPTokenizer). + unet ([`UNet2DConditionModel`]): Conditional U-Net architecture to denoise the encoded image latents. + scheduler ([`SchedulerMixin`]): + A scheduler to be used in combination with `unet` to denoise the encoded image latens. Can be one of + [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. + safety_checker ([`StableDiffusionSafetyChecker`]): + Classification module that estimates whether generated images could be considered offsensive or harmful. + Please, refer to the [model card](https://huggingface.co/CompVis/stable-diffusion-v1-4) for details. + feature_extractor ([`CLIPFeatureExtractor`]): + Model that extracts features from generated images to be used as inputs for the `safety_checker`. + """ + + ID_LENGTH = 8 + + def __init__( + self, + vae: AutoencoderKL, + text_encoder: CLIPTextModel, + tokenizer: CLIPTokenizer, + unet: UNet2DConditionModel, + scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], + safety_checker: StableDiffusionSafetyChecker, + feature_extractor: CLIPFeatureExtractor, + ): + super().__init__() + + self.register_modules( + vae=vae, + text_encoder=text_encoder, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + + def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): + r""" + Enable sliced attention computation. + + When this option is enabled, the attention module will split the input tensor in slices, to compute attention + in several steps. This is useful to save some memory in exchange for a small speed decrease. + + Args: + slice_size (`str` or `int`, *optional*, defaults to `"auto"`): + When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If + a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, + `attention_head_dim` must be a multiple of `slice_size`. + """ + if slice_size == "auto": + # half the attention head size is usually a good trade-off between + # speed and memory + slice_size = self.unet.config.attention_head_dim // 2 + self.unet.set_attention_slice(slice_size) + + def disable_attention_slicing(self): + r""" + Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go + back to computing attention in one step. + """ + # set slice_size = `None` to disable `attention slicing` + self.enable_attention_slicing(None) + + def enable_xformers_memory_efficient_attention(self): + r""" + Enable memory efficient attention as implemented in xformers. + + When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference + time. Speed up at training time is not guaranteed. + + Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention + is used. + """ + self.unet.set_use_memory_efficient_attention_xformers(True) + + def disable_xformers_memory_efficient_attention(self): + r""" + Disable memory efficient attention as implemented in xformers. + """ + self.unet.set_use_memory_efficient_attention_xformers(False) + + @torch.no_grad() + def __call__( + self, + prompt: Union[str, List[str]], + height: Optional[int] = 512, + width: Optional[int] = 512, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + **extra_step_kwargs, + ): + r""" + Function invoked when calling the pipeline for generation. + + Args: + prompt (`str` or `List[str]`): + The prompt or prompts to guide the image generation. + height (`int`, *optional*, defaults to 512): + The height in pixels of the generated image. + width (`int`, *optional*, defaults to 512): + The width in pixels of the generated image. + num_inference_steps (`int`, *optional*, defaults to 50): + The number of denoising steps. More denoising steps usually lead to a higher quality image at the + expense of slower inference. + guidance_scale (`float`, *optional*, defaults to 7.5): + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen + Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > + 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, + usually at the expense of lower image quality. + generator (`torch.Generator`, *optional*): + A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation + deterministic. + latents (`torch.FloatTensor`, *optional*): + Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image + generation. Can be used to tweak the same generation with different prompts. If not provided, a latents + tensor will ge generated by sampling using the supplied random `generator`. + + Returns: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: + [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. + When returning a tuple, the first element is a list with the generated images, and the second element is a + list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" + (nsfw) content, according to the `safety_checker`. + """ + result = None + for result in self.generate( + prompt, height=height, width=width, num_inference_steps=num_inference_steps, + guidance_scale=guidance_scale, generator=generator, latents=latents, + **extra_step_kwargs): + pass # discarding intermediates + if result is None: + raise AssertionError("why was that an empty generator?") + return result + + def generate( + self, + prompt: Union[str, List[str]], + *, + opposing_prompt: Union[str, List[str]] = None, + height: Optional[int] = 512, + width: Optional[int] = 512, + num_inference_steps: Optional[int] = 50, + guidance_scale: Optional[float] = 7.5, + generator: Optional[torch.Generator] = None, + latents: Optional[torch.FloatTensor] = None, + run_id: str = None, + **extra_step_kwargs, + ): + if isinstance(prompt, str): + batch_size = 1 + else: + batch_size = len(prompt) + + if height % 8 != 0 or width % 8 != 0: + raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") + + if run_id is None: + run_id = secrets.token_urlsafe(self.ID_LENGTH) + + # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) + # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` + # corresponds to doing no classifier free guidance. + do_classifier_free_guidance = guidance_scale > 1.0 + text_embeddings = self.get_text_embeddings(prompt, opposing_prompt, do_classifier_free_guidance, batch_size)\ + .to(self.unet.device) + self.scheduler.set_timesteps(num_inference_steps) + latents = self.prepare_latents(latents, batch_size, height, width, + generator, self.unet.dtype) + + yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, + latents=latents) + for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): + step_output = self.step(t, latents, guidance_scale, text_embeddings, **extra_step_kwargs) + latents = step_output.prev_sample + yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, + predicted_original=step_output.pred_original_sample) + + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + torch.cuda.empty_cache() + + image = self.decode_to_image(latents) + output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + yield self.check_for_safety(output) + + @torch.inference_mode() + def step(self, t, latents: torch.Tensor, guidance_scale, text_embeddings: torch.Tensor, **extra_step_kwargs): + do_classifier_free_guidance = guidance_scale > 1.0 + + # expand the latents if we are doing classifier free guidance + latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents + latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + + # predict the noise residual + noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample + + # perform guidance + if do_classifier_free_guidance: + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + + # compute the previous noisy sample x_t -> x_t-1 + return self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs) + + @torch.inference_mode() + def check_for_safety(self, output): + if not getattr(self, 'feature_extractor') or not getattr(self, 'safety_checker'): + return output + images = output.images + safety_checker_output = self.feature_extractor(self.numpy_to_pil(images), + return_tensors="pt").to(self.device) + screened_images, has_nsfw_concept = self.safety_checker( + images=images, clip_input=safety_checker_output.pixel_values) + return StableDiffusionPipelineOutput(screened_images, has_nsfw_concept) + + @torch.inference_mode() + def decode_to_image(self, latents): + # scale and decode the image latents with vae + latents = 1 / 0.18215 * latents + image = self.vae.decode(latents).sample + image = (image / 2 + 0.5).clamp(0, 1) + image = image.cpu().permute(0, 2, 3, 1).numpy() + return image + + @torch.inference_mode() + def get_text_embeddings(self, + prompt: Union[str, List[str]], + opposing_prompt: Union[str, List[str]], + do_classifier_free_guidance: bool, + batch_size: int): + # get prompt text embeddings + text_input = self._tokenize(prompt) + + text_embeddings = self.text_encoder(text_input.input_ids)[0] + # get unconditional embeddings for classifier free guidance + if do_classifier_free_guidance: + # opposing prompt defaults to blank caption for everything in the batch + text_anti_input = self._tokenize(opposing_prompt or [""] * batch_size) + uncond_embeddings = self.text_encoder(text_anti_input.input_ids)[0] + + # For classifier free guidance, we need to do two forward passes. + # Here we concatenate the unconditional and text embeddings into a single batch + # to avoid doing two forward passes + # FIXME: assert these two are the same size + text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) + return text_embeddings + + @torch.inference_mode() + def _tokenize(self, prompt: Union[str, List[str]]): + return self.tokenizer( + prompt, + padding="max_length", + max_length=self.tokenizer.model_max_length, + truncation=True, + return_tensors="pt", + ) + + def prepare_latents(self, latents, batch_size, height, width, generator, dtype): + # get the initial random noise unless the user supplied it + # Unlike in other pipelines, latents need to be generated in the target device + # for 1-to-1 results reproducibility with the CompVis implementation. + # However this currently doesn't work in `mps`. + latents_shape = (batch_size, self.unet.in_channels, height // 8, width // 8) + if latents is None: + latents = torch.randn( + latents_shape, + generator=generator, + device=self.unet.device, + dtype=dtype + ) + else: + if latents.shape != latents_shape: + raise ValueError( + f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") + if latents.device != self.unet.device: + raise ValueError(f"Unexpected latents device, got {latents.device}, " + f"expected {self.unet.device}") + + # scale the initial noise by the standard deviation required by the scheduler + latents *= self.scheduler.init_noise_sigma + return latents From 58ea3bf4c8e5dc5475c17fa17bb94988f133fd1d Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 11:33:19 -0800 Subject: [PATCH 003/199] spike: proof of concept using diffusers for txt2img --- .../environment-lin-cuda.yml | 10 +++ .../requirements-base.txt | 2 +- ldm/invoke/generator/diffusers_pipeline.py | 36 +++++++--- ldm/invoke/generator/txt2img.py | 66 ++++++++++--------- 4 files changed, 75 insertions(+), 39 deletions(-) diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 17b2cddc95d..64bbabe902d 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -12,6 +12,7 @@ dependencies: - pytorch=1.12.1 - cudatoolkit=11.6 - pip: + - accelerate~=0.13 - albumentations==0.4.3 - dependency_injector==4.40.0 - diffusers==0.6.0 @@ -39,6 +40,15 @@ dependencies: - torch-fidelity==0.3.0 - torchmetrics==0.7.0 - transformers==4.21.3 + - diffusers~=0.7 + - torchmetrics==0.7.0 + - flask==2.1.3 + - flask_socketio==5.3.0 + - flask_cors==3.0.10 + - dependency_injector==4.40.0 + - eventlet + - getpass_asterisk + - kornia==0.6.0 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index 9c306c42cab..8b7d83aa94a 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,7 +1,7 @@ # pip will resolve the version which matches torch albumentations dependency_injector==4.40.0 -diffusers +diffusers[torch]~=0.7 einops eventlet facexlib diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index b13f85b645e..d4d60b761a7 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,6 +1,6 @@ import secrets from dataclasses import dataclass -from typing import List, Optional, Union +from typing import List, Optional, Union, Callable import torch from diffusers.models import AutoencoderKL, UNet2DConditionModel @@ -131,6 +131,7 @@ def __call__( guidance_scale: Optional[float] = 7.5, generator: Optional[torch.Generator] = None, latents: Optional[torch.FloatTensor] = None, + callback: Optional[Callable[[PipelineIntermediateState], None]] = None, **extra_step_kwargs, ): r""" @@ -172,7 +173,22 @@ def __call__( prompt, height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator, latents=latents, **extra_step_kwargs): - pass # discarding intermediates + if callback is not None: + callback(result) + if result is None: + raise AssertionError("why was that an empty generator?") + return result + + def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, + text_embeddings: torch.Tensor, guidance_scale: float, + *, callback: Callable[[PipelineIntermediateState], None]=None, run_id=None, + **extra_step_kwargs) -> StableDiffusionPipelineOutput: + self.scheduler.set_timesteps(num_inference_steps) + result = None + for result in self.generate_from_embeddings( + latents, text_embeddings, guidance_scale, run_id, **extra_step_kwargs): + if callback is not None: + callback(result) if result is None: raise AssertionError("why was that an empty generator?") return result @@ -199,9 +215,6 @@ def generate( if height % 8 != 0 or width % 8 != 0: raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - if run_id is None: - run_id = secrets.token_urlsafe(self.ID_LENGTH) - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. @@ -209,16 +222,23 @@ def generate( text_embeddings = self.get_text_embeddings(prompt, opposing_prompt, do_classifier_free_guidance, batch_size)\ .to(self.unet.device) self.scheduler.set_timesteps(num_inference_steps) - latents = self.prepare_latents(latents, batch_size, height, width, - generator, self.unet.dtype) + latents = self.prepare_latents(latents, batch_size, height, width, generator, self.unet.dtype) + yield from self.generate_from_embeddings(latents, text_embeddings, guidance_scale, run_id, **extra_step_kwargs) + + def generate_from_embeddings(self, latents: torch.Tensor, text_embeddings: torch.Tensor, guidance_scale: float, + run_id: str = None, **extra_step_kwargs): + if run_id is None: + run_id = secrets.token_urlsafe(self.ID_LENGTH) yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, latents=latents) + # NOTE: Depends on scheduler being already initialized! for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): step_output = self.step(t, latents, guidance_scale, text_embeddings, **extra_step_kwargs) latents = step_output.prev_sample + predicted_original = getattr(step_output, 'pred_original_sample', None) yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, - predicted_original=step_output.pred_original_sample) + predicted_original=predicted_original) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index a04207259b8..a882b156713 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -1,10 +1,11 @@ ''' ldm.invoke.generator.txt2img inherits from ldm.invoke.generator ''' - +import PIL.Image import torch -from ldm.invoke.generator.base import Generator +from .base import Generator +from .diffusers_pipeline import StableDiffusionGeneratorPipeline class Txt2Img(Generator): @@ -13,7 +14,8 @@ def __init__(self, model, precision): @torch.no_grad() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, - conditioning,width,height,step_callback=None,threshold=0.0,perlin=0.0,**kwargs): + conditioning,width,height,step_callback=None,threshold=0.0,perlin=0.0, + **kwargs): """ Returns a function returning an image derived from the prompt and the initial image Return value depends on the seed at the time you call it @@ -22,38 +24,42 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.perlin = perlin uc, c, extra_conditioning_info = conditioning - @torch.no_grad() - def make_image(x_T): - shape = [ - self.latent_channels, - height // self.downsampling_factor, - width // self.downsampling_factor, - ] + # FIXME: this should probably be either passed in to __init__ instead of model & precision, + # or be constructed in __init__ from those inputs. + pipeline = StableDiffusionGeneratorPipeline.from_pretrained( + "runwayml/stable-diffusion-v1-5", + revision="fp16", torch_dtype=torch.float16, + safety_checker=None, # TODO + # scheduler=sampler + ddim_eta, # TODO + # TODO: local_files_only=True + ) + pipeline.unet.to("cuda") + pipeline.vae.to("cuda") + + def make_image(x_T) -> PIL.Image.Image: + # FIXME: restore free_gpu_mem functionality + # if self.free_gpu_mem and self.model.model.device != self.model.device: + # self.model.model.to(self.model.device) - if self.free_gpu_mem and self.model.model.device != self.model.device: - self.model.model.to(self.model.device) - - sampler.make_schedule(ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False) + # FIXME: how the embeddings are combined should be internal to the pipeline + combined_text_embeddings = torch.cat([uc, c]) - samples, _ = sampler.sample( - batch_size = 1, - S = steps, - x_T = x_T, - conditioning = c, - shape = shape, - verbose = False, - unconditional_guidance_scale = cfg_scale, - unconditional_conditioning = uc, - extra_conditioning_info = extra_conditioning_info, - eta = ddim_eta, - img_callback = step_callback, - threshold = threshold, + pipeline_output = pipeline.image_from_embeddings( + latents=x_T, + num_inference_steps=steps, + text_embeddings=combined_text_embeddings, + guidance_scale=cfg_scale, + callback=step_callback, + # TODO: extra_conditioning_info = extra_conditioning_info, + # TODO: eta = ddim_eta, + # TODO: threshold = threshold, ) - if self.free_gpu_mem: - self.model.model.to("cpu") + # FIXME: restore free_gpu_mem functionality + # if self.free_gpu_mem: + # self.model.model.to("cpu") - return self.sample_to_image(samples) + return pipeline.numpy_to_pil(pipeline_output.images)[0] return make_image From dcfdb83513d8db6ece04a2b71bbe8354709f5ac1 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 15:23:42 -0800 Subject: [PATCH 004/199] doc: type hints for Generator --- ldm/invoke/generator/base.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 1e303530664..0a60fea45f8 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -16,13 +16,19 @@ from tqdm import trange from ldm.invoke.devices import choose_autocast +from ldm.models.diffusion.ddpm import DiffusionWrapper from ldm.util import rand_perlin_2d downsampling = 8 CAUTION_IMG = 'assets/caution.png' -class Generator(): - def __init__(self, model, precision): +class Generator: + downsampling_factor: int + latent_channels: int + precision: str + model: DiffusionWrapper + + def __init__(self, model: DiffusionWrapper, precision: str): self.model = model self.precision = precision self.seed = None From 9b274bd57c5e4711729532a8d9ac9aac691fc4fd Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 15:25:56 -0800 Subject: [PATCH 005/199] refactor(model_cache): factor out load_ckpt --- ldm/invoke/model_cache.py | 90 +++++++++++++++++++++------------------ 1 file changed, 48 insertions(+), 42 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 7d1654718a3..c730c7b7759 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -1,5 +1,5 @@ ''' -Manage a cache of Stable Diffusion model files for fast switching. +Manage a cache of Stable Diffusion model files for fast switching. They are moved between GPU and CPU as necessary. If CPU memory falls below a preset minimum, the least recently used model will be cleared and loaded from disk when next needed. @@ -51,7 +51,7 @@ def valid_model(self, model_name:str)->bool: identifier. ''' return model_name in self.config - + def get_model(self, model_name:str): ''' Given a model named identified in models.yaml, return @@ -66,7 +66,7 @@ def get_model(self, model_name:str): if model_name not in self.models: # make room for a new one self._make_cache_room() self.offload_model(self.current_model) - + if model_name in self.models: requested_model = self.models[model_name]['model'] print(f'>> Retrieving model {model_name} from system RAM cache') @@ -92,7 +92,7 @@ def get_model(self, model_name:str): print(f'** restoring {self.current_model}') self.get_model(self.current_model) return - + self.current_model = model_name self._push_newest_model(model_name) return { @@ -102,7 +102,7 @@ def get_model(self, model_name:str): 'hash': hash } - def default_model(self) -> str: + def default_model(self) -> str | None: ''' Returns the name of the default model, or None if none is defined. @@ -191,25 +191,13 @@ def add_model(self, model_name:str, model_attributes:dict, clobber=False) -> Non omega[model_name] = config if clobber: self._invalidate_cached_model(model_name) - + def _load_model(self, model_name:str): """Load and initialize the model from configuration variables passed at object creation time""" if model_name not in self.config: print(f'"{model_name}" is not a known model name. Please check your models.yaml file') mconfig = self.config[model_name] - config = mconfig.config - weights = mconfig.weights - vae = mconfig.get('vae') - width = mconfig.width - height = mconfig.height - - if not os.path.isabs(weights): - weights = os.path.normpath(os.path.join(Globals.root,weights)) - # scan model - self.scan_model(model_name, weights) - - print(f'>> Loading {model_name} from {weights}') # for usage statistics if self._has_cuda(): @@ -219,12 +207,39 @@ def _load_model(self, model_name:str): tic = time.time() # this does the work - if not os.path.isabs(config): - config = os.path.join(Globals.root,config) - omega_config = OmegaConf.load(config) - with open(weights,'rb') as f: + model_format = mconfig.get('format', 'ckpt') + if model_format == 'ckpt': + weights = mconfig.weights + print(f'>> Loading {model_name} from {weights}') + model, width, height, model_hash = self._load_ckpt_model(mconfig) + elif model_format == 'diffusers': + model, width, height, model_hash = self._load_diffusers_model(mconfig) + else: + raise NotImplementedError(f"Unknown model format {model_name}: {model_format}") + + # usage statistics + toc = time.time() + print(f'>> Model loaded in', '%4.2fs' % (toc - tic)) + if self._has_cuda(): + print( + '>> Max VRAM used to load the model:', + '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9), + '\n>> Current VRAM usage:' + '%4.2fG' % (torch.cuda.memory_allocated() / 1e9), + ) + return model, width, height, model_hash + + def _load_ckpt_model(self, mconfig): + config = mconfig.config + weights = mconfig.weights + vae = mconfig.get('vae', None) + width = mconfig.width + height = mconfig.height + + c = OmegaConf.load(config) + with open(weights, 'rb') as f: weight_bytes = f.read() - model_hash = self._cached_sha256(weights,weight_bytes) + model_hash = self._cached_sha256(weights, weight_bytes) sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu') del weight_bytes sd = sd['state_dict'] @@ -252,28 +267,19 @@ def _load_model(self, model_name:str): model.to(self.device) # model.to doesn't change the cond_stage_model.device used to move the tokenizer output, so set it here model.cond_stage_model.device = self.device - + model.eval() for module in model.modules(): if isinstance(module, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)): module._orig_padding_mode = module.padding_mode - # usage statistics - toc = time.time() - print(f'>> Model loaded in', '%4.2fs' % (toc - tic)) + return model, width, height, model_hash - if self._has_cuda(): - print( - '>> Max VRAM used to load the model:', - '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9), - '\n>> Current VRAM usage:' - '%4.2fG' % (torch.cuda.memory_allocated() / 1e9), - ) + def _load_diffusers_model(self, mconfig): + raise NotImplementedError() # return pipeline, width, height, model_hash - return model, width, height, model_hash - - def offload_model(self, model_name:str) -> None: + def offload_model(self, model_name:str): ''' Offload the indicated model to CPU. Will call _make_cache_room() to free space if needed. @@ -288,7 +294,7 @@ def offload_model(self, model_name:str) -> None: gc.collect() if self._has_cuda(): torch.cuda.empty_cache() - + def scan_model(self, model_name, checkpoint): # scan model print(f'>> Scanning Model: {model_name}') @@ -318,7 +324,7 @@ def _make_cache_room(self) -> None: if least_recent_model is not None: del self.models[least_recent_model] gc.collect() - + def print_vram_usage(self) -> None: if self._has_cuda: print('>> Current VRAM usage: ','%4.2fG' % (torch.cuda.memory_allocated() / 1e9)) @@ -353,12 +359,12 @@ def _invalidate_cached_model(self,model_name:str) -> None: if model_name in self.stack: self.stack.remove(model_name) self.models.pop(model_name,None) - + def _model_to_cpu(self,model): if self.device != 'cpu': model.cond_stage_model.device = 'cpu' model.first_stage_model.to('cpu') - model.cond_stage_model.to('cpu') + model.cond_stage_model.to('cpu') model.model.to('cpu') return model.to('cpu') else: @@ -388,7 +394,7 @@ def _push_newest_model(self,model_name:str) -> None: with contextlib.suppress(ValueError): self.stack.remove(model_name) self.stack.append(model_name) - + def _has_cuda(self) -> bool: return self.device.type == 'cuda' From 4c3858e079255e096327e53ad2f7fbd653755782 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 17:17:52 -0800 Subject: [PATCH 006/199] model_cache: add ability to load a diffusers model pipeline and update associated things in Generate & Generator to not instantly fail when that happens --- ldm/generate.py | 53 +++++++++++++++++++++- ldm/invoke/generator/base.py | 8 ++-- ldm/invoke/generator/diffusers_pipeline.py | 28 ++++++++++++ ldm/invoke/generator/txt2img.py | 13 +----- ldm/invoke/model_cache.py | 43 +++++++++++++++++- 5 files changed, 126 insertions(+), 19 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 8d68314f386..04805439891 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -18,6 +18,8 @@ import hashlib import cv2 import skimage +from diffusers import DiffusionPipeline, DDIMScheduler, LMSDiscreteScheduler, EulerDiscreteScheduler, \ + EulerAncestralDiscreteScheduler from omegaconf import OmegaConf from ldm.invoke.generator.base import downsampling @@ -402,7 +404,10 @@ def process_image(image,seed): width = width or self.width height = height or self.height - configure_model_padding(model, seamless, seamless_axes) + if isinstance(model, DiffusionPipeline): + configure_model_padding(model.unet, seamless, seamless_axes) + else: + configure_model_padding(model, seamless, seamless_axes) assert cfg_scale > 1.0, 'CFG_Scale (-C) must be >1.0' assert threshold >= 0.0, '--threshold must be >=0.0' @@ -962,9 +967,15 @@ def sample_to_image(self, samples): def sample_to_lowres_estimated_image(self, samples): return self._make_base().sample_to_lowres_estimated_image(samples) + def _set_sampler(self): + if isinstance(self.model, DiffusionPipeline): + return self._set_scheduler() + else: + return self._set_sampler_legacy() + # very repetitive code - can this be simplified? The KSampler names are # consistent, at least - def _set_sampler(self): + def _set_sampler_legacy(self): msg = f'>> Setting Sampler to {self.sampler_name}' if self.sampler_name == 'plms': self.sampler = PLMSSampler(self.model, device=self.device) @@ -992,6 +1003,44 @@ def _set_sampler(self): print(msg) + def _set_scheduler(self): + msg = f'>> Setting Sampler to {self.sampler_name}' + default = self.model.scheduler + # TODO: Test me! Not all schedulers take the same args. + scheduler_args = dict( + num_train_timesteps=default.num_train_timesteps, + beta_start=default.beta_start, + beta_end=default.beta_end, + beta_schedule=default.beta_schedule, + ) + trained_betas = getattr(self.model.scheduler, 'trained_betas') + if trained_betas is not None: + scheduler_args.update(trained_betas=trained_betas) + if self.sampler_name == 'plms': + raise NotImplementedError("What's the diffusers implementation of PLMS?") + elif self.sampler_name == 'ddim': + self.sampler = DDIMScheduler(**scheduler_args) + elif self.sampler_name == 'k_dpm_2_a': + raise NotImplementedError("no diffusers implementation of dpm_2 samplers") + elif self.sampler_name == 'k_dpm_2': + raise NotImplementedError("no diffusers implementation of dpm_2 samplers") + elif self.sampler_name == 'k_euler_a': + self.sampler = EulerAncestralDiscreteScheduler(**scheduler_args) + elif self.sampler_name == 'k_euler': + self.sampler = EulerDiscreteScheduler(**scheduler_args) + elif self.sampler_name == 'k_heun': + raise NotImplementedError("no diffusers implementation of Heun's sampler") + elif self.sampler_name == 'k_lms': + self.sampler = LMSDiscreteScheduler(**scheduler_args) + else: + msg = f'>> Unsupported Sampler: {self.sampler_name}, Defaulting to {default}' + + print(msg) + + if not hasattr(self.sampler, 'uses_inpainting_model'): + # FIXME: terrible kludge! + self.sampler.uses_inpainting_model = lambda: False + def _load_img(self, img)->Image: if isinstance(img, Image.Image): image = img diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 0a60fea45f8..c8b79f24e14 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -9,8 +9,8 @@ import numpy as np import torch -from PIL import Image, ImageFilter, ImageChops -import cv2 as cv +from PIL import Image, ImageFilter +from diffusers import DiffusionPipeline from einops import rearrange from pytorch_lightning import seed_everything from tqdm import trange @@ -26,9 +26,9 @@ class Generator: downsampling_factor: int latent_channels: int precision: str - model: DiffusionWrapper + model: DiffusionWrapper | DiffusionPipeline - def __init__(self, model: DiffusionWrapper, precision: str): + def __init__(self, model: DiffusionWrapper | DiffusionPipeline, precision: str): self.model = model self.precision = precision self.seed = None diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index d4d60b761a7..c9fff12e0cb 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,4 +1,5 @@ import secrets +import warnings from dataclasses import dataclass from typing import List, Optional, Union, Callable @@ -309,6 +310,28 @@ def get_text_embeddings(self, text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) return text_embeddings + def get_learned_conditioning(self, c: List[List[str]], return_tokens=True, + fragment_weights=None, **kwargs): + """ + Compatibility function for ldm.models.diffusion.ddpm.LatentDiffusion. + """ + assert return_tokens == True + if fragment_weights: + weights = fragment_weights[0] + if any(weight != 1.0 for weight in weights): + warnings.warn(f"fragment weights not implemented yet {fragment_weights}", stacklevel=2) + + if kwargs: + warnings.warn(f"unsupported args {kwargs}", stacklevel=2) + + text_fragments = c[0] + text_input = self._tokenize(text_fragments) + + with torch.inference_mode(): + token_ids = text_input.input_ids.to(self.text_encoder.device) + text_embeddings = self.text_encoder(token_ids)[0] + return text_embeddings, text_input.input_ids + @torch.inference_mode() def _tokenize(self, prompt: Union[str, List[str]]): return self.tokenizer( @@ -319,6 +342,11 @@ def _tokenize(self, prompt: Union[str, List[str]]): return_tensors="pt", ) + @property + def channels(self) -> int: + """Compatible with DiffusionWrapper""" + return self.unet.in_channels + def prepare_latents(self, latents, batch_size, height, width, generator, dtype): # get the initial random noise unless the user supplied it # Unlike in other pipelines, latents need to be generated in the target device diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index a882b156713..7b36d37df44 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -24,17 +24,8 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.perlin = perlin uc, c, extra_conditioning_info = conditioning - # FIXME: this should probably be either passed in to __init__ instead of model & precision, - # or be constructed in __init__ from those inputs. - pipeline = StableDiffusionGeneratorPipeline.from_pretrained( - "runwayml/stable-diffusion-v1-5", - revision="fp16", torch_dtype=torch.float16, - safety_checker=None, # TODO - # scheduler=sampler + ddim_eta, # TODO - # TODO: local_files_only=True - ) - pipeline.unet.to("cuda") - pipeline.vae.to("cuda") + pipeline = self.model + # TODO: customize a new pipeline for the given sampler (Scheduler) def make_image(x_T) -> PIL.Image.Image: # FIXME: restore free_gpu_mem functionality diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index c730c7b7759..56c0f74c109 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -4,6 +4,7 @@ below a preset minimum, the least recently used model will be cleared and loaded from disk when next needed. ''' +from pathlib import Path import torch import os @@ -20,6 +21,8 @@ from typing import Union from omegaconf import OmegaConf from omegaconf.errors import ConfigAttributeError + +from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline from ldm.util import instantiate_from_config, ask_user from ldm.invoke.globals import Globals from picklescan.scanner import scan_file_path @@ -91,7 +94,7 @@ def get_model(self, model_name:str): assert self.current_model,'** FATAL: no current model to restore to' print(f'** restoring {self.current_model}') self.get_model(self.current_model) - return + return None self.current_model = model_name self._push_newest_model(model_name) @@ -277,7 +280,43 @@ def _load_ckpt_model(self, mconfig): return model, width, height, model_hash def _load_diffusers_model(self, mconfig): - raise NotImplementedError() # return pipeline, width, height, model_hash + pipeline_args = {} + + if 'repo_name' in mconfig: + name_or_path = mconfig['repo_name'] + model_hash = "FIXME" + # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash + elif 'path' in mconfig: + name_or_path = Path(mconfig['path']) + # FIXME: What should the model_hash be? A hash of the unet weights? Of all files of all + # the submodels hashed together? The commit ID from the repo? + model_hash = "FIXME TOO" + else: + raise ValueError("Model config must specify either repo_name or path.") + + print(f'>> Loading diffusers model from {name_or_path}') + + if self.precision == 'float16': + print(' | Using faster float16 precision') + pipeline_args.update(revision="fp16", torch_dtype=torch.float16) + else: + # TODO: more accurately, "using the model's default precision." + # How do we find out what that is? + print(' | Using more accurate float32 precision') + + pipeline = StableDiffusionGeneratorPipeline.from_pretrained( + name_or_path, + safety_checker=None, # TODO + # TODO: alternate VAE + # TODO: local_files_only=True + **pipeline_args + ) + pipeline.to(self.device) + + width = pipeline.vae.sample_size + height = pipeline.vae.sample_size + + return pipeline, width, height, model_hash def offload_model(self, model_name:str): ''' From ae9b482acf6da63d8670942651a9aa573af4fb9a Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 9 Nov 2022 19:21:58 -0800 Subject: [PATCH 007/199] model_cache: fix model default image dimensions --- ldm/invoke/generator/txt2img.py | 1 - ldm/invoke/model_cache.py | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 7b36d37df44..15650ccbd98 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -5,7 +5,6 @@ import torch from .base import Generator -from .diffusers_pipeline import StableDiffusionGeneratorPipeline class Txt2Img(Generator): diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 56c0f74c109..6d8ce990f05 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -313,8 +313,8 @@ def _load_diffusers_model(self, mconfig): ) pipeline.to(self.device) - width = pipeline.vae.sample_size - height = pipeline.vae.sample_size + width = pipeline.vae.block_out_channels[-1] + height = pipeline.vae.block_out_channels[-1] return pipeline, width, height, model_hash From d55e22981aee5994eb5589e9ed7bb9a422b31e4f Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 10 Nov 2022 14:36:45 -0800 Subject: [PATCH 008/199] txt2img: support switching diffusers schedulers --- backend/modules/parameters.py | 3 ++ ldm/generate.py | 59 +++++++++++++++++---------------- ldm/invoke/args.py | 3 ++ ldm/invoke/generator/txt2img.py | 2 +- ldm/invoke/model_cache.py | 12 +++++++ 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/backend/modules/parameters.py b/backend/modules/parameters.py index 10af5ece3a6..130d04d64ac 100644 --- a/backend/modules/parameters.py +++ b/backend/modules/parameters.py @@ -12,6 +12,9 @@ "k_heun", "k_lms", "plms", + # diffusers: + "ipndm", + "pndm", ] diff --git a/ldm/generate.py b/ldm/generate.py index 04805439891..9300458611c 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -19,7 +19,7 @@ import cv2 import skimage from diffusers import DiffusionPipeline, DDIMScheduler, LMSDiscreteScheduler, EulerDiscreteScheduler, \ - EulerAncestralDiscreteScheduler + EulerAncestralDiscreteScheduler, PNDMScheduler, IPNDMScheduler from omegaconf import OmegaConf from ldm.invoke.generator.base import downsampling @@ -1004,36 +1004,39 @@ def _set_sampler_legacy(self): print(msg) def _set_scheduler(self): - msg = f'>> Setting Sampler to {self.sampler_name}' default = self.model.scheduler - # TODO: Test me! Not all schedulers take the same args. - scheduler_args = dict( - num_train_timesteps=default.num_train_timesteps, - beta_start=default.beta_start, - beta_end=default.beta_end, - beta_schedule=default.beta_schedule, + + higher_order_samplers = [ + 'k_dpm_2', + 'k_dpm_2_a', + 'k_heun', + 'plms', # Its first step is like Heun + ] + scheduler_map = dict( + ddim=DDIMScheduler, + ipndm=IPNDMScheduler, + k_euler=EulerDiscreteScheduler, + k_euler_a=EulerAncestralDiscreteScheduler, + k_lms=LMSDiscreteScheduler, + pndm=PNDMScheduler, ) - trained_betas = getattr(self.model.scheduler, 'trained_betas') - if trained_betas is not None: - scheduler_args.update(trained_betas=trained_betas) - if self.sampler_name == 'plms': - raise NotImplementedError("What's the diffusers implementation of PLMS?") - elif self.sampler_name == 'ddim': - self.sampler = DDIMScheduler(**scheduler_args) - elif self.sampler_name == 'k_dpm_2_a': - raise NotImplementedError("no diffusers implementation of dpm_2 samplers") - elif self.sampler_name == 'k_dpm_2': - raise NotImplementedError("no diffusers implementation of dpm_2 samplers") - elif self.sampler_name == 'k_euler_a': - self.sampler = EulerAncestralDiscreteScheduler(**scheduler_args) - elif self.sampler_name == 'k_euler': - self.sampler = EulerDiscreteScheduler(**scheduler_args) - elif self.sampler_name == 'k_heun': - raise NotImplementedError("no diffusers implementation of Heun's sampler") - elif self.sampler_name == 'k_lms': - self.sampler = LMSDiscreteScheduler(**scheduler_args) + + if self.sampler_name in scheduler_map: + sampler_class = scheduler_map[self.sampler_name] + msg = f'>> Setting Sampler to {self.sampler_name} ({sampler_class.__name__})' + self.sampler = sampler_class.from_config( + self.model_cache.model_name_or_path(self.model_name), + subfolder="scheduler" + ) + elif self.sampler_name in higher_order_samplers: + msg = (f'>> Unsupported Sampler: {self.sampler_name} ' + f'— diffusers does not yet support higher-order samplers, ' + f'Defaulting to {default}') + self.sampler = default else: - msg = f'>> Unsupported Sampler: {self.sampler_name}, Defaulting to {default}' + msg = (f'>> Unsupported Sampler: {self.sampler_name} ' + f'Defaulting to {default}') + self.sampler = default print(msg) diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index e746e5bab3c..a58617a511a 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -108,6 +108,9 @@ 'k_heun', 'k_lms', 'plms', + # diffusers: + "ipndm", + "pndm", ] PRECISION_CHOICES = [ diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 15650ccbd98..219e8131724 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -24,7 +24,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, uc, c, extra_conditioning_info = conditioning pipeline = self.model - # TODO: customize a new pipeline for the given sampler (Scheduler) + pipeline.scheduler = sampler def make_image(x_T) -> PIL.Image.Image: # FIXME: restore free_gpu_mem functionality diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 6d8ce990f05..8ad8e3913b1 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -318,6 +318,18 @@ def _load_diffusers_model(self, mconfig): return pipeline, width, height, model_hash + def model_name_or_path(self, model_name:str) -> str | Path: + if model_name not in self.config: + raise ValueError(f'"{model_name}" is not a known model name. Please check your models.yaml file') + + mconfig = self.config[model_name] + if 'repo_name' in mconfig: + return mconfig['repo_name'] + elif 'path' in mconfig: + return Path(mconfig['path']) + else: + raise ValueError("Model config must specify either repo_name or path.") + def offload_model(self, model_name:str): ''' Offload the indicated model to CPU. Will call From 1e98f4bafce68ab71d46f48f4e1340e58f14300d Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 10 Nov 2022 15:27:25 -0800 Subject: [PATCH 009/199] diffusers: let the scheduler do its scaling of the initial latents Remove IPNDM scheduler; it is not behaving. --- backend/modules/parameters.py | 1 - ldm/invoke/args.py | 1 - ldm/invoke/generator/diffusers_pipeline.py | 2 ++ 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/modules/parameters.py b/backend/modules/parameters.py index 130d04d64ac..9055297671d 100644 --- a/backend/modules/parameters.py +++ b/backend/modules/parameters.py @@ -13,7 +13,6 @@ "k_lms", "plms", # diffusers: - "ipndm", "pndm", ] diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index a58617a511a..f1b5e9029d8 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -109,7 +109,6 @@ 'k_lms', 'plms', # diffusers: - "ipndm", "pndm", ] diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index c9fff12e0cb..bad21b09565 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -231,6 +231,8 @@ def generate_from_embeddings(self, latents: torch.Tensor, text_embeddings: torch run_id: str = None, **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) + # scale the initial noise by the standard deviation required by the scheduler + latents *= self.scheduler.init_noise_sigma yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, latents=latents) # NOTE: Depends on scheduler being already initialized! From 05a1d68ef4d6ea39688c8fd8d4eb30d726eb6b94 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 10 Nov 2022 15:28:22 -0800 Subject: [PATCH 010/199] web server: update image_progress callback for diffusers data --- backend/invoke_ai_web_server.py | 5 ++++- ldm/invoke/generator/diffusers_pipeline.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py index ac8edc6a324..d525cf87f89 100644 --- a/backend/invoke_ai_web_server.py +++ b/backend/invoke_ai_web_server.py @@ -19,6 +19,7 @@ from threading import Event from ldm.invoke.args import Args, APP_ID, APP_VERSION, calculate_init_img_hash +from ldm.invoke.generator.diffusers_pipeline import PipelineIntermediateState from ldm.invoke.pngwriter import PngWriter, retrieve_metadata from ldm.invoke.prompt_parser import split_weighted_subprompts from ldm.invoke.generator.inpaint import infill_methods @@ -847,7 +848,9 @@ def generate_images( init_img_path = self.get_image_path_from_url(init_img_url) generation_parameters["init_img"] = Image.open(init_img_path).convert('RGB') - def image_progress(sample, step): + def image_progress(progress_state: PipelineIntermediateState): + step = progress_state.step + sample = progress_state.latents if self.canceled.is_set(): raise CanceledException diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index bad21b09565..20caecada0b 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -188,7 +188,7 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, result = None for result in self.generate_from_embeddings( latents, text_embeddings, guidance_scale, run_id, **extra_step_kwargs): - if callback is not None: + if callback is not None and isinstance(result, PipelineIntermediateState): callback(result) if result is None: raise AssertionError("why was that an empty generator?") From e99faeb8d7d633c8a7017b7b6f26002127f3920e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 11 Nov 2022 13:16:09 -0800 Subject: [PATCH 011/199] diffusers: restore prompt weighting feature --- ldm/invoke/generator/diffusers_pipeline.py | 29 ++++++++-------------- ldm/modules/encoders/modules.py | 14 +++++++---- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 20caecada0b..6846ff84567 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,5 +1,4 @@ import secrets -import warnings from dataclasses import dataclass from typing import List, Optional, Union, Callable @@ -11,6 +10,8 @@ from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer +from ldm.modules.encoders.modules import WeightedFrozenCLIPEmbedder + @dataclass class PipelineIntermediateState: @@ -76,6 +77,11 @@ def __init__( safety_checker=safety_checker, feature_extractor=feature_extractor, ) + # InvokeAI's interface for text embeddings and whatnot + self.clip_embedder = WeightedFrozenCLIPEmbedder( + tokenizer=self.tokenizer, + transformer=self.text_encoder + ) def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" @@ -312,27 +318,12 @@ def get_text_embeddings(self, text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) return text_embeddings - def get_learned_conditioning(self, c: List[List[str]], return_tokens=True, - fragment_weights=None, **kwargs): + @torch.inference_mode() + def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fragment_weights=None): """ Compatibility function for ldm.models.diffusion.ddpm.LatentDiffusion. """ - assert return_tokens == True - if fragment_weights: - weights = fragment_weights[0] - if any(weight != 1.0 for weight in weights): - warnings.warn(f"fragment weights not implemented yet {fragment_weights}", stacklevel=2) - - if kwargs: - warnings.warn(f"unsupported args {kwargs}", stacklevel=2) - - text_fragments = c[0] - text_input = self._tokenize(text_fragments) - - with torch.inference_mode(): - token_ids = text_input.input_ids.to(self.text_encoder.device) - text_embeddings = self.text_encoder(token_ids)[0] - return text_embeddings, text_input.input_ids + return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights) @torch.inference_mode() def _tokenize(self, prompt: Union[str, List[str]]): diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index be9f88cdd2e..ca9a027f138 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -239,22 +239,22 @@ class FrozenCLIPEmbedder(AbstractEncoder): def __init__( self, version='openai/clip-vit-large-patch14', - device=choose_torch_device(), max_length=77, + tokenizer=None, + transformer=None, ): super().__init__() cache = os.path.join(Globals.root,'models',version) - self.tokenizer = CLIPTokenizer.from_pretrained( + self.tokenizer = tokenizer or CLIPTokenizer.from_pretrained( version, cache_dir=cache, local_files_only=True ) - self.transformer = CLIPTextModel.from_pretrained( + self.transformer = transformer or CLIPTextModel.from_pretrained( version, cache_dir=cache, local_files_only=True ) - self.device = device self.max_length = max_length self.freeze() @@ -460,6 +460,10 @@ def forward(self, text, **kwargs): def encode(self, text, **kwargs): return self(text, **kwargs) + @property + def device(self): + return self.transformer.device + class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder): fragment_weights_key = "fragment_weights" @@ -548,7 +552,7 @@ def forward(self, text: list, **kwargs): #print(f"assembled tokens for '{fragments}' into tensor of shape {lerped_embeddings.shape}") - # append to batch + # append to batch batch_z = lerped_embeddings.unsqueeze(0) if batch_z is None else torch.cat([batch_z, lerped_embeddings.unsqueeze(0)], dim=1) batch_tokens = tokens.unsqueeze(0) if batch_tokens is None else torch.cat([batch_tokens, tokens.unsqueeze(0)], dim=1) From 97dd4a25898a0bc5faffbec7615ccc22f3fcd2fc Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 11 Nov 2022 13:17:36 -0800 Subject: [PATCH 012/199] diffusers: fix set-sampler error following model switch --- ldm/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/generate.py b/ldm/generate.py index 9300458611c..d99954c2497 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -864,8 +864,8 @@ def set_model(self,model_name): self.embedding_path, self.precision == 'float32' or self.precision == 'autocast' ) - self._set_sampler() self.model_name = model_name + self._set_sampler() # requires self.model_name to be set first return self.model def load_concepts(self,concepts:list[str]): From b6b1a8d97ca55f179f0080e331328daa8bcb16ec Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 11 Nov 2022 16:25:27 -0800 Subject: [PATCH 013/199] diffusers: use InvokeAIDiffuserComponent for conditioning --- ldm/invoke/generator/diffusers_pipeline.py | 160 +++++++++--------- ldm/invoke/generator/txt2img.py | 12 +- .../diffusion/shared_invokeai_diffusion.py | 1 + 3 files changed, 83 insertions(+), 90 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 6846ff84567..0bd096ff6b2 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,4 +1,5 @@ import secrets +import warnings from dataclasses import dataclass from typing import List, Optional, Union, Callable @@ -10,6 +11,7 @@ from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer +from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from ldm.modules.encoders.modules import WeightedFrozenCLIPEmbedder @@ -82,6 +84,7 @@ def __init__( tokenizer=self.tokenizer, transformer=self.text_encoder ) + self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): r""" @@ -128,72 +131,36 @@ def disable_xformers_memory_efficient_attention(self): """ self.unet.set_use_memory_efficient_attention_xformers(False) - @torch.no_grad() - def __call__( - self, - prompt: Union[str, List[str]], - height: Optional[int] = 512, - width: Optional[int] = 512, - num_inference_steps: Optional[int] = 50, - guidance_scale: Optional[float] = 7.5, - generator: Optional[torch.Generator] = None, - latents: Optional[torch.FloatTensor] = None, - callback: Optional[Callable[[PipelineIntermediateState], None]] = None, - **extra_step_kwargs, - ): + def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, callback: Callable[[PipelineIntermediateState], None]=None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo=None, + run_id=None, + **extra_step_kwargs) -> StableDiffusionPipelineOutput: r""" Function invoked when calling the pipeline for generation. - Args: - prompt (`str` or `List[str]`): - The prompt or prompts to guide the image generation. - height (`int`, *optional*, defaults to 512): - The height in pixels of the generated image. - width (`int`, *optional*, defaults to 512): - The width in pixels of the generated image. - num_inference_steps (`int`, *optional*, defaults to 50): - The number of denoising steps. More denoising steps usually lead to a higher quality image at the - expense of slower inference. - guidance_scale (`float`, *optional*, defaults to 7.5): - Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen - Paper](https://arxiv.org/pdf/2205.11487.pdf). Guidance scale is enabled by setting `guidance_scale > - 1`. Higher guidance scale encourages to generate images that are closely linked to the text `prompt`, - usually at the expense of lower image quality. - generator (`torch.Generator`, *optional*): - A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation - deterministic. - latents (`torch.FloatTensor`, *optional*): - Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image - generation. Can be used to tweak the same generation with different prompts. If not provided, a latents - tensor will ge generated by sampling using the supplied random `generator`. - - Returns: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`: - [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] if `return_dict` is True, otherwise a `tuple. - When returning a tuple, the first element is a list with the generated images, and the second element is a - list of `bool`s denoting whether the corresponding generated image likely represents "not-safe-for-work" - (nsfw) content, according to the `safety_checker`. + :param latents: Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for + image generation. Can be used to tweak the same generation with different prompts. + :param num_inference_steps: The number of denoising steps. More denoising steps usually lead to a higher quality + image at the expense of slower inference. + :param text_embeddings: + :param guidance_scale: Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf). + Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate + images that are closely linked to the text `prompt`, usually at the expense of lower image quality. + :param callback: + :param extra_conditioning_info: + :param run_id: + :param extra_step_kwargs: """ - result = None - for result in self.generate( - prompt, height=height, width=width, num_inference_steps=num_inference_steps, - guidance_scale=guidance_scale, generator=generator, latents=latents, - **extra_step_kwargs): - if callback is not None: - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result - - def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, - text_embeddings: torch.Tensor, guidance_scale: float, - *, callback: Callable[[PipelineIntermediateState], None]=None, run_id=None, - **extra_step_kwargs) -> StableDiffusionPipelineOutput: - self.scheduler.set_timesteps(num_inference_steps) + self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) result = None for result in self.generate_from_embeddings( - latents, text_embeddings, guidance_scale, run_id, **extra_step_kwargs): + latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + run_id=run_id, **extra_step_kwargs): if callback is not None and isinstance(result, PipelineIntermediateState): callback(result) if result is None: @@ -226,24 +193,40 @@ def generate( # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - text_embeddings = self.get_text_embeddings(prompt, opposing_prompt, do_classifier_free_guidance, batch_size)\ + text_embeddings, unconditioned_embeddings = self.get_text_embeddings(prompt, opposing_prompt, do_classifier_free_guidance, batch_size)\ .to(self.unet.device) self.scheduler.set_timesteps(num_inference_steps) latents = self.prepare_latents(latents, batch_size, height, width, generator, self.unet.dtype) - yield from self.generate_from_embeddings(latents, text_embeddings, guidance_scale, run_id, **extra_step_kwargs) - - def generate_from_embeddings(self, latents: torch.Tensor, text_embeddings: torch.Tensor, guidance_scale: float, - run_id: str = None, **extra_step_kwargs): + yield from self.generate_from_embeddings(latents, text_embeddings, unconditioned_embeddings, + guidance_scale, run_id=run_id, **extra_step_kwargs) + + def generate_from_embeddings( + self, + latents: torch.Tensor, + text_embeddings: torch.Tensor, + unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, + run_id: str = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) # scale the initial noise by the standard deviation required by the scheduler latents *= self.scheduler.init_noise_sigma yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, latents=latents) + + batch_size = latents.shape[0] + batched_t = torch.full((batch_size,), self.scheduler.timesteps[0], + dtype=self.scheduler.timesteps.dtype, device=self.unet.device) # NOTE: Depends on scheduler being already initialized! for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): - step_output = self.step(t, latents, guidance_scale, text_embeddings, **extra_step_kwargs) + batched_t.fill_(t) + step_output = self.step(batched_t, latents, guidance_scale, + text_embeddings, unconditioned_embeddings, + i, **extra_step_kwargs) latents = step_output.prev_sample predicted_original = getattr(step_output, 'pred_original_sample', None) yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, @@ -257,23 +240,30 @@ def generate_from_embeddings(self, latents: torch.Tensor, text_embeddings: torch yield self.check_for_safety(output) @torch.inference_mode() - def step(self, t, latents: torch.Tensor, guidance_scale, text_embeddings: torch.Tensor, **extra_step_kwargs): - do_classifier_free_guidance = guidance_scale > 1.0 + def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + step_index:int | None = None, + **extra_step_kwargs): + # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value + timestep = t[0] - # expand the latents if we are doing classifier free guidance - latent_model_input = torch.cat([latents] * 2) if do_classifier_free_guidance else latents - latent_model_input = self.scheduler.scale_model_input(latent_model_input, t) + # TODO: should this scaling happen here or inside self._unet_forward? + # i.e. before or after passing it to InvokeAIDiffuserComponent + latent_model_input = self.scheduler.scale_model_input(latents, timestep) # predict the noise residual - noise_pred = self.unet(latent_model_input, t, encoder_hidden_states=text_embeddings).sample - - # perform guidance - if do_classifier_free_guidance: - noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) - noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond) + noise_pred = self.invokeai_diffuser.do_diffusion_step( + latent_model_input, t, + unconditioned_embeddings, text_embeddings, + guidance_scale, + step_index=step_index) # compute the previous noisy sample x_t -> x_t-1 - return self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs) + return self.scheduler.step(noise_pred, timestep, latents, **extra_step_kwargs) + + def _unet_forward(self, latents, t, text_embeddings): + # predict the noise residual + return self.unet(latents, t, encoder_hidden_states=text_embeddings).sample @torch.inference_mode() def check_for_safety(self, output): @@ -310,13 +300,10 @@ def get_text_embeddings(self, # opposing prompt defaults to blank caption for everything in the batch text_anti_input = self._tokenize(opposing_prompt or [""] * batch_size) uncond_embeddings = self.text_encoder(text_anti_input.input_ids)[0] + else: + uncond_embeddings = None - # For classifier free guidance, we need to do two forward passes. - # Here we concatenate the unconditional and text embeddings into a single batch - # to avoid doing two forward passes - # FIXME: assert these two are the same size - text_embeddings = torch.cat([uncond_embeddings, text_embeddings]) - return text_embeddings + return text_embeddings, uncond_embeddings @torch.inference_mode() def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fragment_weights=None): @@ -325,6 +312,11 @@ def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fr """ return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights) + @property + def cond_stage_model(self): + warnings.warn("legacy compatibility layer", DeprecationWarning) + return self.clip_embedder + @torch.inference_mode() def _tokenize(self, prompt: Union[str, List[str]]): return self.tokenizer( diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 219e8131724..f9af1ac3ed7 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -5,6 +5,7 @@ import torch from .base import Generator +from .diffusers_pipeline import StableDiffusionGeneratorPipeline class Txt2Img(Generator): @@ -23,7 +24,8 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.perlin = perlin uc, c, extra_conditioning_info = conditioning - pipeline = self.model + # noinspection PyTypeChecker + pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler def make_image(x_T) -> PIL.Image.Image: @@ -31,16 +33,14 @@ def make_image(x_T) -> PIL.Image.Image: # if self.free_gpu_mem and self.model.model.device != self.model.device: # self.model.model.to(self.model.device) - # FIXME: how the embeddings are combined should be internal to the pipeline - combined_text_embeddings = torch.cat([uc, c]) - pipeline_output = pipeline.image_from_embeddings( latents=x_T, num_inference_steps=steps, - text_embeddings=combined_text_embeddings, + text_embeddings=c, + unconditioned_embeddings=uc, guidance_scale=cfg_scale, callback=step_callback, - # TODO: extra_conditioning_info = extra_conditioning_info, + extra_conditioning_info=extra_conditioning_info, # TODO: eta = ddim_eta, # TODO: threshold = threshold, ) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index d748c9a6735..d6ec1ea44bf 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -35,6 +35,7 @@ def __init__(self, model, model_forward_callback: :param model: the unet model to pass through to cross attention control :param model_forward_callback: a lambda with arguments (x, sigma, conditioning_to_apply). will be called repeatedly. most likely, this should simply call model.forward(x, sigma, conditioning) """ + self.conditioning = None self.model = model self.model_forward_callback = model_forward_callback self.cross_attention_control_context = None From 95db6e80ee8c20196e637a2d1aa1421d3b931044 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 12 Nov 2022 10:10:46 -0800 Subject: [PATCH 014/199] cross_attention_control: stub (no-op) implementations for diffusers --- ldm/invoke/generator/diffusers_pipeline.py | 7 ++++ .../diffusion/cross_attention_control.py | 40 +++++++++++++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 0bd096ff6b2..861bf22a7a7 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -213,6 +213,13 @@ def generate_from_embeddings( **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) + + if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control: + self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, + step_count=len(self.scheduler.timesteps)) + else: + self.invokeai_diffuser.remove_cross_attention_control() + # scale the initial noise by the standard deviation required by the scheduler latents *= self.scheduler.init_noise_sigma yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index a4362e07704..3284f990cef 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -1,4 +1,5 @@ import enum +import warnings from typing import Optional import torch @@ -244,19 +245,32 @@ def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim return attention_slice - for name, module in unet.named_modules(): - module_name = type(module).__name__ - if module_name == "CrossAttention": - module.identifier = name - module.set_attention_slice_wrangler(attention_slice_wrangler) - module.set_slicing_strategy_getter(lambda module, module_identifier=name: \ - context.get_slicing_strategy(module_identifier)) + cross_attention_modules = [(name, module) for (name, module) in unet.named_modules() + if type(module).__name__ == "CrossAttention"] + for identifier, module in cross_attention_modules: + module.identifier = identifier + try: + module.set_attention_slice_wrangler(attention_slice_wrangler) + module.set_slicing_strategy_getter( + lambda module: context.get_slicing_strategy(identifier) + ) + except AttributeError as e: + if e.name == 'set_attention_slice_wrangler': + warnings.warn(f"TODO: implement for {type(module)}") # TODO + else: + raise def remove_attention_function(unet): - # clear wrangler callback - for name, module in unet.named_modules(): - module_name = type(module).__name__ - if module_name == "CrossAttention": - module.set_attention_slice_wrangler(None) - module.set_slicing_strategy_getter(None) + cross_attention_modules = [module for (_, module) in unet.named_modules() + if type(module).__name__ == "CrossAttention"] + for module in cross_attention_modules: + try: + # clear wrangler callback + module.set_attention_slice_wrangler(None) + module.set_slicing_strategy_getter(None) + except AttributeError as e: + if e.name == 'set_attention_slice_wrangler': + warnings.warn(f"TODO: implement for {type(module)}") # TODO + else: + raise From 01ff1cff587861941f9822781bcfe80290bcc880 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 12 Nov 2022 10:11:50 -0800 Subject: [PATCH 015/199] model_cache: let offload_model work with DiffusionPipeline, sorta. --- ldm/invoke/model_cache.py | 12 ++++++++---- ldm/modules/encoders/modules.py | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 8ad8e3913b1..181c4d39b46 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -4,6 +4,7 @@ below a preset minimum, the least recently used model will be cleared and loaded from disk when next needed. ''' +import warnings from pathlib import Path import torch @@ -413,10 +414,13 @@ def _invalidate_cached_model(self,model_name:str) -> None: def _model_to_cpu(self,model): if self.device != 'cpu': - model.cond_stage_model.device = 'cpu' - model.first_stage_model.to('cpu') - model.cond_stage_model.to('cpu') - model.model.to('cpu') + try: + model.cond_stage_model.device = 'cpu' + model.first_stage_model.to('cpu') + model.cond_stage_model.to('cpu') + model.model.to('cpu') + except AttributeError as e: + warnings.warn(f"TODO: clean up legacy model-management: {e}") return model.to('cpu') else: return model diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index ca9a027f138..cf460519333 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -1,5 +1,7 @@ import math import os.path +from typing import Optional + import torch import torch.nn as nn from functools import partial @@ -235,13 +237,15 @@ def encode(self, x): class FrozenCLIPEmbedder(AbstractEncoder): """Uses the CLIP transformer encoder for text (from Hugging Face)""" + tokenizer: CLIPTokenizer + transformer: CLIPTextModel def __init__( self, - version='openai/clip-vit-large-patch14', - max_length=77, - tokenizer=None, - transformer=None, + version:str='openai/clip-vit-large-patch14', + max_length:int=77, + tokenizer:Optional[CLIPTokenizer]=None, + transformer:Optional[CLIPTextModel]=None, ): super().__init__() cache = os.path.join(Globals.root,'models',version) @@ -464,6 +468,10 @@ def encode(self, text, **kwargs): def device(self): return self.transformer.device + @device.setter + def device(self, device): + self.transformer.to(device=device) + class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder): fragment_weights_key = "fragment_weights" From aca6d213d77cb25c1ac24b2955d3a071dbe4c7d7 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 13 Nov 2022 15:19:38 -0800 Subject: [PATCH 016/199] models.yaml.example: add diffusers-format model, set as default --- configs/models.yaml.example | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/configs/models.yaml.example b/configs/models.yaml.example index 31401cd02d8..1eb2781f4a8 100644 --- a/configs/models.yaml.example +++ b/configs/models.yaml.example @@ -5,6 +5,11 @@ # model requires a model config file, a weights file, # and the width and height of the images it # was trained on. +diffusers-1.5: + description: Diffusers version of Stable Diffusion version 1.5 + format: diffusers + repo_name: runwayml/stable-diffusion-v1-5 + default: true stable-diffusion-1.5: description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt From c5274e6e50078497edafc20f5c8a0b7e8a935ba5 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 13 Nov 2022 15:36:51 -0800 Subject: [PATCH 017/199] test-invoke-conda: use diffusers-format model test-invoke-conda: put huggingface-token where the library can use it --- .github/workflows/test-invoke-conda.yml | 71 +++++++++++++------------ 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 41f72c5a84d..009df909e00 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -1,17 +1,16 @@ name: Test invoke.py -on: - push: - branches: - - 'main' - - 'development' - - 'fix-gh-actions-fork' - pull_request: - branches: - - 'main' - - 'development' +on: [push, pull_request] jobs: matrix: + # Run on: + # - pull requests + # - pushes to forks (will run in the forked project with that fork's secrets) + # - pushes to branches that are *not* pull requests + if: | + github.event_name == 'pull_request' + || github.repository != 'invoke-ai/InvokeAI' + || github.ref_protected strategy: matrix: stable-diffusion-model: @@ -30,15 +29,19 @@ jobs: - environment-yaml: environment-mac.yml os: macos-12 default-shell: bash -l {0} - - stable-diffusion-model: stable-diffusion-1.5 - stable-diffusion-model-url: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt - stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1 - stable-diffusion-model-dl-name: v1-5-pruned-emaonly.ckpt - name: ${{ matrix.environment-yaml }} on ${{ matrix.os }} + # - stable-diffusion-model: https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt + # stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1/sd-v1-4.ckpt + # stable-diffusion-model-switch: stable-diffusion-1.4 + - stable-diffusion-model: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt + stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt + stable-diffusion-model-switch: diffusers-1.5 + name: ${{ matrix.os }} with ${{ matrix.stable-diffusion-model-switch }} runs-on: ${{ matrix.os }} env: CONDA_ENV_NAME: invokeai INVOKEAI_ROOT: '${{ github.workspace }}/invokeai' + PYTHONUNBUFFERED: 1 + HAVE_SECRETS: ${{ secrets.HUGGINGFACE_TOKEN != '' }} defaults: run: shell: ${{ matrix.default-shell }} @@ -55,6 +58,19 @@ jobs: - name: create environment.yml run: cp "environments-and-requirements/${{ matrix.environment-yaml }}" environment.yml + - name: Use Cached Stable Diffusion Model + id: cache-sd-model + uses: actions/cache@v3 + env: + cache-name: huggingface-${{ matrix.stable-diffusion-model-switch }} + with: + path: ~/.cache/huggingface + key: ${{ env.cache-name }} + + - name: Check model availability + if: steps.cache-sd-model.outputs.cache-hit != true && env.HAVE_SECRETS != 'true' + run: echo -e '\a ⛔ GitHub model cache not found, and no HUGGINGFACE_TOKEN is available. Will not be able to load Stable Diffusion.' ; exit 1 + - name: Use cached conda packages id: use-cached-conda-packages uses: actions/cache@v3 @@ -82,28 +98,13 @@ jobs: if: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/development' }} run: echo "TEST_PROMPTS=tests/validate_pr_prompt.txt" >> $GITHUB_ENV - - name: Use Cached Stable Diffusion Model - id: cache-sd-model - uses: actions/cache@v3 - env: - cache-name: cache-${{ matrix.stable-diffusion-model }} - with: - path: ${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }} - key: ${{ env.cache-name }} - - - name: Download ${{ matrix.stable-diffusion-model }} - id: download-stable-diffusion-model - if: ${{ steps.cache-sd-model.outputs.cache-hit != 'true' }} - run: | - mkdir -p "${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }}" - curl \ - -H "Authorization: Bearer ${{ secrets.HUGGINGFACE_TOKEN }}" \ - -o "${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }}/${{ matrix.stable-diffusion-model-dl-name }}" \ - -L ${{ matrix.stable-diffusion-model-url }} - - - name: run configure_invokeai.py + - name: run preload_models.py id: run-preload-models run: | + if [ "${HAVE_SECRETS}" == true ] ; then + mkdir -p ~/.huggingface + echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token + fi python scripts/configure_invokeai.py --no-interactive --yes - name: cat ~/.invokeai From 94f57345e3b8637a6596e5108f6e16482067e1f8 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 13 Nov 2022 23:35:33 -0800 Subject: [PATCH 018/199] environment-mac: upgrade to diffusers 0.7 (from 0.6) this was already done for linux; mac must have been lost in the merge. --- environments-and-requirements/environment-mac.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index ca1dcc78868..95f714a73d6 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -22,7 +22,7 @@ dependencies: - albumentations=1.2 - coloredlogs=15.0 - - diffusers=0.6 + - diffusers~=0.7 - einops=0.3 - eventlet - grpcio=1.46 From a6a766dfa2bdbfff35759546453fada09b4ced76 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 15 Nov 2022 22:04:38 -0800 Subject: [PATCH 019/199] preload_models: explicitly load diffusers models In non-interactive mode too, as long as you're logged in. --- .github/workflows/test-invoke-conda.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 009df909e00..b690f190454 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -105,7 +105,9 @@ jobs: mkdir -p ~/.huggingface echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token fi - python scripts/configure_invokeai.py --no-interactive --yes + python scripts/preload_models.py \ + --no-interactive \ + --full-precision # can't use fp16 weights without a GPU - name: cat ~/.invokeai id: cat-invokeai @@ -113,6 +115,10 @@ jobs: - name: Run the tests id: run-tests + env: + HF_HUB_OFFLINE: 1 + HF_DATASETS_OFFLINE: 1 + TRANSFORMERS_OFFLINE: 1 run: | time python scripts/invoke.py \ --no-patchmatch \ From f3f6213b972867df13b41406a6b16e770d521052 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 21 Nov 2022 16:46:32 -0800 Subject: [PATCH 020/199] fix(model_cache): don't check `model.config` in diffusers format clean-up from recent merge. --- ldm/invoke/model_cache.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 181c4d39b46..199ef4307d3 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -4,29 +4,28 @@ below a preset minimum, the least recently used model will be cleared and loaded from disk when next needed. ''' +import gc +import hashlib +import io +import os +import sys +import time +import traceback import warnings from pathlib import Path import torch -import os -import io -import time -import gc -import hashlib -import psutil -import sys import transformers -import traceback import textwrap import contextlib from typing import Union from omegaconf import OmegaConf from omegaconf.errors import ConfigAttributeError +from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.util import instantiate_from_config, ask_user from ldm.invoke.globals import Globals -from picklescan.scanner import scan_file_path +from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -240,6 +239,13 @@ def _load_ckpt_model(self, mconfig): width = mconfig.width height = mconfig.height + if not os.path.isabs(config): + config = os.path.join(Globals.root,config) + if not os.path.isabs(weights): + weights = os.path.normpath(os.path.join(Globals.root,weights)) + # scan model + self._scan_model(model_name, weights) + c = OmegaConf.load(config) with open(weights, 'rb') as f: weight_bytes = f.read() From efbb807905c3005d789bcfe4f2dbaf4241edac29 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 23 Nov 2022 17:38:31 -0800 Subject: [PATCH 021/199] diffusers integration: support img2img --- ldm/invoke/generator/diffusers_pipeline.py | 74 +++++++++++++++++- ldm/invoke/generator/img2img.py | 89 +++++++++------------- 2 files changed, 106 insertions(+), 57 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 861bf22a7a7..911de67601a 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -3,10 +3,12 @@ from dataclasses import dataclass from typing import List, Optional, Union, Callable +import PIL.Image import torch from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipeline_utils import DiffusionPipeline from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput +from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -210,6 +212,7 @@ def generate_from_embeddings( *, run_id: str = None, extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + timesteps = None, **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) @@ -220,16 +223,19 @@ def generate_from_embeddings( else: self.invokeai_diffuser.remove_cross_attention_control() + if timesteps is None: + timesteps = self.scheduler.timesteps + # scale the initial noise by the standard deviation required by the scheduler latents *= self.scheduler.init_noise_sigma yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, latents=latents) batch_size = latents.shape[0] - batched_t = torch.full((batch_size,), self.scheduler.timesteps[0], - dtype=self.scheduler.timesteps.dtype, device=self.unet.device) + batched_t = torch.full((batch_size,), timesteps[0], + dtype=timesteps.dtype, device=self.unet.device) # NOTE: Depends on scheduler being already initialized! - for i, t in enumerate(self.progress_bar(self.scheduler.timesteps)): + for i, t in enumerate(self.progress_bar(timesteps)): batched_t.fill_(t) step_output = self.step(batched_t, latents, guidance_scale, text_embeddings, unconditioned_embeddings, @@ -272,6 +278,68 @@ def _unet_forward(self, latents, t, text_embeddings): # predict the noise residual return self.unet(latents, t, encoder_hidden_states=text_embeddings).sample + def img2img_from_embeddings(self, + init_image: Union[torch.FloatTensor, PIL.Image.Image], + strength: float, + num_inference_steps: int, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, callback: Callable[[PipelineIntermediateState], None] = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + run_id=None, + noise_func=None, + **extra_step_kwargs) -> StableDiffusionPipelineOutput: + device = self.unet.device + latents_dtype = text_embeddings.dtype + batch_size = 1 + num_images_per_prompt = 1 + + if isinstance(init_image, PIL.Image.Image): + init_image = preprocess(init_image.convert('RGB')) + + self.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = self._diffusers08_get_timesteps(num_inference_steps, strength) + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + + # 6. Prepare latent variables + latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) + + result = None + for result in self.generate_from_embeddings( + latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, + run_id=run_id, **extra_step_kwargs): + if callback is not None and isinstance(result, PipelineIntermediateState): + callback(result) + if result is None: + raise AssertionError("why was that an empty generator?") + return result + + def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_func) -> torch.FloatTensor: + # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents + # because we have our own noise function + init_image = init_image.to(device=device, dtype=dtype) + with torch.inference_mode(): + init_latent_dist = self.vae.encode(init_image).latent_dist + init_latents = init_latent_dist.sample() # FIXME: uses torch.randn. make reproducible! + init_latents = 0.18215 * init_latents + + noise = noise_func(init_latents) + + return self.scheduler.add_noise(init_latents, noise, timestep) + + def _diffusers08_get_timesteps(self, num_inference_steps, strength): + # get the original timestep using init_timestep + offset = self.scheduler.config.get("steps_offset", 0) + init_timestep = int(num_inference_steps * strength) + offset + init_timestep = min(init_timestep, num_inference_steps) + + t_start = max(num_inference_steps - init_timestep + offset, 0) + timesteps = self.scheduler.timesteps[t_start:] + + return timesteps + @torch.inference_mode() def check_for_safety(self, output): if not getattr(self, 'feature_extractor') or not getattr(self, 'safety_checker'): diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index edcc855a290..6ea41fda33c 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -2,14 +2,10 @@ ldm.invoke.generator.img2img descends from ldm.invoke.generator ''' -import PIL -import numpy as np import torch -from PIL import Image -from torch import Tensor -from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import Generator +from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline class Img2Img(Generator): @@ -25,66 +21,51 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, """ self.perlin = perlin - sampler.make_schedule( - ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False - ) - - if isinstance(init_image, PIL.Image.Image): - init_image = self._image_to_tensor(init_image.convert('RGB')) - - scope = choose_autocast(self.precision) - with scope(self.model.device.type): - self.init_latent = self.model.get_first_stage_encoding( - self.model.encode_first_stage(init_image) - ) # move to latent space - - t_enc = int(strength * steps) uc, c, extra_conditioning_info = conditioning + # noinspection PyTypeChecker + pipeline: StableDiffusionGeneratorPipeline = self.model + pipeline.scheduler = sampler + def make_image(x_T): - # encode (scaled latent) - z_enc = sampler.stochastic_encode( - self.init_latent, - torch.tensor([t_enc]).to(self.model.device), - noise=x_T - ) - # decode it - samples = sampler.decode( - z_enc, - c, - t_enc, - img_callback = step_callback, - unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc, - init_latent = self.init_latent, # changes how noising is performed in ksampler - extra_conditioning_info = extra_conditioning_info, - all_timesteps_count = steps + # FIXME: use x_T for initial seeded noise + pipeline_output = pipeline.img2img_from_embeddings( + init_image, strength, steps, c, uc, cfg_scale, + extra_conditioning_info=extra_conditioning_info, + noise_func=self.get_noise_like, + callback=step_callback ) - return self.sample_to_image(samples) + return pipeline.numpy_to_pil(pipeline_output.images)[0] return make_image - def get_noise(self,width,height): - device = self.model.device - init_latent = self.init_latent - assert init_latent is not None,'call to get_noise() when init_latent not set' + def get_noise_like(self, like: torch.Tensor): + device = like.device if device.type == 'mps': - x = torch.randn_like(init_latent, device='cpu').to(device) + x = torch.randn_like(like, device='cpu').to(device) else: - x = torch.randn_like(init_latent, device=device) + x = torch.randn_like(like, device=device) if self.perlin > 0.0: - shape = init_latent.shape + shape = like.shape x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2]) return x - def _image_to_tensor(self, image:Image, normalize:bool=True)->Tensor: - image = np.array(image).astype(np.float32) / 255.0 - if len(image.shape) == 2: # 'L' image, as in a mask - image = image[None,None] - else: # 'RGB' image - image = image[None].transpose(0, 3, 1, 2) - image = torch.from_numpy(image) - if normalize: - image = 2.0 * image - 1.0 - return image.to(self.model.device) + def get_noise(self,width,height): + # copy of the Txt2Img.get_noise + device = self.model.device + if self.use_mps_noise or device.type == 'mps': + x = torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device='cpu').to(device) + else: + x = torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device=device) + if self.perlin > 0.0: + x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor) + return x From ceb53ccdfbe8cd0be89ec379a8f38fbf088a12ca Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 23 Nov 2022 14:46:41 -0800 Subject: [PATCH 022/199] dev: upgrade to diffusers 0.8 (from 0.7.1) We get to remove some code by using methods that were factored out in the base class. --- .../environment-lin-amd.yml | 2 +- .../environment-lin-cuda.yml | 2 +- .../environment-mac.yml | 2 +- .../environment-win-cuda.yml | 2 +- .../requirements-base.txt | 2 +- ldm/invoke/generator/diffusers_pipeline.py | 142 +++--------------- 6 files changed, 28 insertions(+), 124 deletions(-) diff --git a/environments-and-requirements/environment-lin-amd.yml b/environments-and-requirements/environment-lin-amd.yml index 93344666d20..8f9beb44418 100644 --- a/environments-and-requirements/environment-lin-amd.yml +++ b/environments-and-requirements/environment-lin-amd.yml @@ -11,7 +11,7 @@ dependencies: - --extra-index-url https://download.pytorch.org/whl/rocm5.2/ - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers==0.6.0 + - diffusers~=0.8 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 64bbabe902d..9a6f3da25fa 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -15,7 +15,7 @@ dependencies: - accelerate~=0.13 - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers==0.6.0 + - diffusers~=0.8 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index 95f714a73d6..3409ae3c117 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -22,7 +22,7 @@ dependencies: - albumentations=1.2 - coloredlogs=15.0 - - diffusers~=0.7 + - diffusers~=0.8 - einops=0.3 - eventlet - grpcio=1.46 diff --git a/environments-and-requirements/environment-win-cuda.yml b/environments-and-requirements/environment-win-cuda.yml index f71ca666191..e9d293cee71 100644 --- a/environments-and-requirements/environment-win-cuda.yml +++ b/environments-and-requirements/environment-win-cuda.yml @@ -15,7 +15,7 @@ dependencies: - albumentations==0.4.3 - basicsr==1.4.1 - dependency_injector==4.40.0 - - diffusers==0.6.0 + - diffusers~=0.8 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index 8b7d83aa94a..f3284474eb0 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,7 +1,7 @@ # pip will resolve the version which matches torch albumentations dependency_injector==4.40.0 -diffusers[torch]~=0.7 +diffusers[torch]~=0.8 einops eventlet facexlib diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 911de67601a..4a4700232ca 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -5,8 +5,8 @@ import PIL.Image import torch +from diffusers import StableDiffusionPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel -from diffusers.pipeline_utils import DiffusionPipeline from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker @@ -26,7 +26,7 @@ class PipelineIntermediateState: predicted_original: Optional[torch.Tensor] = None -class StableDiffusionGeneratorPipeline(DiffusionPipeline): +class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -67,10 +67,10 @@ def __init__( tokenizer: CLIPTokenizer, unet: UNet2DConditionModel, scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], - safety_checker: StableDiffusionSafetyChecker, - feature_extractor: CLIPFeatureExtractor, + safety_checker: Optional[StableDiffusionSafetyChecker], + feature_extractor: Optional[CLIPFeatureExtractor], ): - super().__init__() + super().__init__(vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor) self.register_modules( vae=vae, @@ -88,51 +88,6 @@ def __init__( ) self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) - def enable_attention_slicing(self, slice_size: Optional[Union[str, int]] = "auto"): - r""" - Enable sliced attention computation. - - When this option is enabled, the attention module will split the input tensor in slices, to compute attention - in several steps. This is useful to save some memory in exchange for a small speed decrease. - - Args: - slice_size (`str` or `int`, *optional*, defaults to `"auto"`): - When `"auto"`, halves the input to the attention heads, so attention will be computed in two steps. If - a number is provided, uses as many slices as `attention_head_dim // slice_size`. In this case, - `attention_head_dim` must be a multiple of `slice_size`. - """ - if slice_size == "auto": - # half the attention head size is usually a good trade-off between - # speed and memory - slice_size = self.unet.config.attention_head_dim // 2 - self.unet.set_attention_slice(slice_size) - - def disable_attention_slicing(self): - r""" - Disable sliced attention computation. If `enable_attention_slicing` was previously invoked, this method will go - back to computing attention in one step. - """ - # set slice_size = `None` to disable `attention slicing` - self.enable_attention_slicing(None) - - def enable_xformers_memory_efficient_attention(self): - r""" - Enable memory efficient attention as implemented in xformers. - - When this option is enabled, you should observe lower GPU memory usage and a potential speed up at inference - time. Speed up at training time is not guaranteed. - - Warning: When Memory Efficient Attention and Sliced attention are both enabled, the Memory Efficient Attention - is used. - """ - self.unet.set_use_memory_efficient_attention_xformers(True) - - def disable_xformers_memory_efficient_attention(self): - r""" - Disable memory efficient attention as implemented in xformers. - """ - self.unet.set_use_memory_efficient_attention_xformers(False) - def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, guidance_scale: float, @@ -195,10 +150,17 @@ def generate( # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` # corresponds to doing no classifier free guidance. do_classifier_free_guidance = guidance_scale > 1.0 - text_embeddings, unconditioned_embeddings = self.get_text_embeddings(prompt, opposing_prompt, do_classifier_free_guidance, batch_size)\ - .to(self.unet.device) + + combined_embeddings = self._encode_prompt(prompt, device=self._execution_device, num_images_per_prompt=1, + do_classifier_free_guidance=do_classifier_free_guidance, + negative_prompt=opposing_prompt) + text_embeddings, unconditioned_embeddings = combined_embeddings.chunk(2) self.scheduler.set_timesteps(num_inference_steps) - latents = self.prepare_latents(latents, batch_size, height, width, generator, self.unet.dtype) + latents = self.prepare_latents(batch_size=batch_size, num_channels_latents=self.unet.in_channels, + height=height, width=width, + dtype=self.unet.dtype, device=self._execution_device, + generator=generator, + latents=latents) yield from self.generate_from_embeddings(latents, text_embeddings, unconditioned_embeddings, guidance_scale, run_id=run_id, **extra_step_kwargs) @@ -248,9 +210,10 @@ def generate_from_embeddings( # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() - image = self.decode_to_image(latents) - output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) - yield self.check_for_safety(output) + with torch.inference_mode(): + image = self.decode_latents(latents) + output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + yield self.check_for_safety(output, dtype=text_embeddings.dtype) @torch.inference_mode() def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, @@ -340,46 +303,12 @@ def _diffusers08_get_timesteps(self, num_inference_steps, strength): return timesteps - @torch.inference_mode() - def check_for_safety(self, output): - if not getattr(self, 'feature_extractor') or not getattr(self, 'safety_checker'): - return output - images = output.images - safety_checker_output = self.feature_extractor(self.numpy_to_pil(images), - return_tensors="pt").to(self.device) - screened_images, has_nsfw_concept = self.safety_checker( - images=images, clip_input=safety_checker_output.pixel_values) + def check_for_safety(self, output, dtype): + with torch.inference_mode(): + screened_images, has_nsfw_concept = self.run_safety_checker( + output.images, device=self._execution_device, dtype=dtype) return StableDiffusionPipelineOutput(screened_images, has_nsfw_concept) - @torch.inference_mode() - def decode_to_image(self, latents): - # scale and decode the image latents with vae - latents = 1 / 0.18215 * latents - image = self.vae.decode(latents).sample - image = (image / 2 + 0.5).clamp(0, 1) - image = image.cpu().permute(0, 2, 3, 1).numpy() - return image - - @torch.inference_mode() - def get_text_embeddings(self, - prompt: Union[str, List[str]], - opposing_prompt: Union[str, List[str]], - do_classifier_free_guidance: bool, - batch_size: int): - # get prompt text embeddings - text_input = self._tokenize(prompt) - - text_embeddings = self.text_encoder(text_input.input_ids)[0] - # get unconditional embeddings for classifier free guidance - if do_classifier_free_guidance: - # opposing prompt defaults to blank caption for everything in the batch - text_anti_input = self._tokenize(opposing_prompt or [""] * batch_size) - uncond_embeddings = self.text_encoder(text_anti_input.input_ids)[0] - else: - uncond_embeddings = None - - return text_embeddings, uncond_embeddings - @torch.inference_mode() def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fragment_weights=None): """ @@ -406,28 +335,3 @@ def _tokenize(self, prompt: Union[str, List[str]]): def channels(self) -> int: """Compatible with DiffusionWrapper""" return self.unet.in_channels - - def prepare_latents(self, latents, batch_size, height, width, generator, dtype): - # get the initial random noise unless the user supplied it - # Unlike in other pipelines, latents need to be generated in the target device - # for 1-to-1 results reproducibility with the CompVis implementation. - # However this currently doesn't work in `mps`. - latents_shape = (batch_size, self.unet.in_channels, height // 8, width // 8) - if latents is None: - latents = torch.randn( - latents_shape, - generator=generator, - device=self.unet.device, - dtype=dtype - ) - else: - if latents.shape != latents_shape: - raise ValueError( - f"Unexpected latents shape, got {latents.shape}, expected {latents_shape}") - if latents.device != self.unet.device: - raise ValueError(f"Unexpected latents device, got {latents.device}, " - f"expected {self.unet.device}") - - # scale the initial noise by the standard deviation required by the scheduler - latents *= self.scheduler.init_noise_sigma - return latents From b7864aa1a7924a2dc558b7dedaaaae788d2e86b5 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 23 Nov 2022 20:30:06 -0800 Subject: [PATCH 023/199] refactor: remove backported img2img.get_timesteps now that we can use it directly from diffusers 0.8.1 --- ldm/invoke/generator/diffusers_pipeline.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 4a4700232ca..5d6b66ccf01 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -5,7 +5,7 @@ import PIL.Image import torch -from diffusers import StableDiffusionPipeline +from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess @@ -260,8 +260,9 @@ def img2img_from_embeddings(self, if isinstance(init_image, PIL.Image.Image): init_image = preprocess(init_image.convert('RGB')) - self.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = self._diffusers08_get_timesteps(num_inference_steps, strength) + img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) + img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # 6. Prepare latent variables @@ -292,17 +293,6 @@ def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_ return self.scheduler.add_noise(init_latents, noise, timestep) - def _diffusers08_get_timesteps(self, num_inference_steps, strength): - # get the original timestep using init_timestep - offset = self.scheduler.config.get("steps_offset", 0) - init_timestep = int(num_inference_steps * strength) + offset - init_timestep = min(init_timestep, num_inference_steps) - - t_start = max(num_inference_steps - init_timestep + offset, 0) - timesteps = self.scheduler.timesteps[t_start:] - - return timesteps - def check_for_safety(self, output, dtype): with torch.inference_mode(): screened_images, has_nsfw_concept = self.run_safety_checker( From 5cee2111be450d22e6a3c9b83b0d5773f17b73ee Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 24 Nov 2022 18:38:08 -0800 Subject: [PATCH 024/199] ci: use diffusers model --- .github/workflows/test-invoke-conda.yml | 15 ++---- .github/workflows/test-invoke-pip.yml | 71 +++++++++++++------------ 2 files changed, 42 insertions(+), 44 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index b690f190454..e1c5928b2ea 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - 'stable-diffusion-1.5' + - 'diffusers-1.5' environment-yaml: - environment-lin-amd.yml - environment-lin-cuda.yml @@ -29,13 +29,7 @@ jobs: - environment-yaml: environment-mac.yml os: macos-12 default-shell: bash -l {0} - # - stable-diffusion-model: https://huggingface.co/CompVis/stable-diffusion-v-1-4-original/resolve/main/sd-v1-4.ckpt - # stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1/sd-v1-4.ckpt - # stable-diffusion-model-switch: stable-diffusion-1.4 - - stable-diffusion-model: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt - stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt - stable-diffusion-model-switch: diffusers-1.5 - name: ${{ matrix.os }} with ${{ matrix.stable-diffusion-model-switch }} + name: ${{ matrix.environment-yaml }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} env: CONDA_ENV_NAME: invokeai @@ -62,7 +56,7 @@ jobs: id: cache-sd-model uses: actions/cache@v3 env: - cache-name: huggingface-${{ matrix.stable-diffusion-model-switch }} + cache-name: huggingface-${{ matrix.stable-diffusion-model }} with: path: ~/.cache/huggingface key: ${{ env.cache-name }} @@ -106,7 +100,7 @@ jobs: echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token fi python scripts/preload_models.py \ - --no-interactive \ + --no-interactive --yes \ --full-precision # can't use fp16 weights without a GPU - name: cat ~/.invokeai @@ -116,6 +110,7 @@ jobs: - name: Run the tests id: run-tests env: + # Set offline mode to make sure configure preloaded successfully. HF_HUB_OFFLINE: 1 HF_DATASETS_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 3866a4c32c1..baaa6f92a82 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -1,20 +1,20 @@ name: Test invoke.py pip -on: - push: - branches: - - 'main' - - 'development' - pull_request: - branches: - - 'main' - - 'development' +on: [push, pull_request] jobs: matrix: + # Run on: + # - pull requests + # - pushes to forks (will run in the forked project with that fork's secrets) + # - pushes to branches that are *not* pull requests + if: | + github.event_name == 'pull_request' + || github.repository != 'invoke-ai/InvokeAI' + || github.ref_protected strategy: matrix: stable-diffusion-model: - - stable-diffusion-1.5 + - diffusers-1.5 requirements-file: - requirements-lin-cuda.txt - requirements-lin-amd.txt @@ -32,10 +32,6 @@ jobs: - requirements-file: requirements-mac-mps-cpu.txt os: macOS-12 default-shell: bash -l {0} - - stable-diffusion-model: stable-diffusion-1.5 - stable-diffusion-model-url: https://huggingface.co/runwayml/stable-diffusion-v1-5/resolve/main/v1-5-pruned-emaonly.ckpt - stable-diffusion-model-dl-path: models/ldm/stable-diffusion-v1 - stable-diffusion-model-dl-name: v1-5-pruned-emaonly.ckpt name: ${{ matrix.requirements-file }} on ${{ matrix.python-version }} runs-on: ${{ matrix.os }} defaults: @@ -43,6 +39,8 @@ jobs: shell: ${{ matrix.default-shell }} env: INVOKEAI_ROOT: '${{ github.workspace }}/invokeai' + PYTHONUNBUFFERED: 1 + HAVE_SECRETS: ${{ secrets.HUGGINGFACE_TOKEN != '' }} steps: - name: Checkout sources id: checkout-sources @@ -53,6 +51,19 @@ jobs: mkdir -p ${{ env.INVOKEAI_ROOT }}/configs cp configs/models.yaml.example ${{ env.INVOKEAI_ROOT }}/configs/models.yaml + - name: Use Cached Stable Diffusion Model + id: cache-sd-model + uses: actions/cache@v3 + env: + cache-name: huggingface-${{ matrix.stable-diffusion-model }} + with: + path: ~/.cache/huggingface + key: ${{ env.cache-name }} + + - name: Check model availability + if: steps.cache-sd-model.outputs.cache-hit != true && env.HAVE_SECRETS != 'true' + run: echo -e '\a ⛔ GitHub model cache not found, and no HUGGINGFACE_TOKEN is available. Will not be able to load Stable Diffusion.' ; exit 1 + - name: set test prompt to main branch validation if: ${{ github.ref == 'refs/heads/main' }} run: echo "TEST_PROMPTS=tests/preflight_prompts.txt" >> $GITHUB_ENV @@ -81,29 +92,16 @@ jobs: - name: install requirements run: ${{ env.pythonLocation }}/bin/pip install -r '${{ matrix.requirements-file }}' - - name: Use Cached Stable Diffusion Model - id: cache-sd-model - uses: actions/cache@v3 - env: - cache-name: cache-${{ matrix.stable-diffusion-model }} - with: - path: ${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }} - key: ${{ env.cache-name }} - - - name: Download ${{ matrix.stable-diffusion-model }} - id: download-stable-diffusion-model - if: ${{ steps.cache-sd-model.outputs.cache-hit != 'true' }} - run: | - mkdir -p "${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }}" - curl \ - -H "Authorization: Bearer ${{ secrets.HUGGINGFACE_TOKEN }}" \ - -o "${{ env.INVOKEAI_ROOT }}/${{ matrix.stable-diffusion-model-dl-path }}/${{ matrix.stable-diffusion-model-dl-name }}" \ - -L ${{ matrix.stable-diffusion-model-url }} - - name: run configure_invokeai.py id: run-preload-models run: | - ${{ env.pythonLocation }}/bin/python scripts/configure_invokeai.py --no-interactive --yes + if [ "${HAVE_SECRETS}" == true ] ; then + mkdir -p ~/.huggingface + echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token + fi + ${{ env.pythonLocation }}/bin/python scripts/configure_invokeai.py \ + --no-interactive --yes \ + --full-precision # can't use fp16 weights without a GPU - name: cat ~/.invokeai id: cat-invokeai @@ -111,6 +109,11 @@ jobs: - name: Run the tests id: run-tests + env: + # Set offline mode to make sure configure preloaded successfully. + HF_HUB_OFFLINE: 1 + HF_DATASETS_OFFLINE: 1 + TRANSFORMERS_OFFLINE: 1 run: | time ${{ env.pythonLocation }}/bin/python scripts/invoke.py \ --no-patchmatch \ From 716ca8f8b99ca4c6903e51d43e500152480193ea Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 25 Nov 2022 13:31:56 -0800 Subject: [PATCH 025/199] dev: upgrade to diffusers 0.9 (from 0.8.1) --- environments-and-requirements/environment-lin-amd.yml | 2 +- environments-and-requirements/environment-lin-cuda.yml | 2 +- environments-and-requirements/environment-mac.yml | 2 +- environments-and-requirements/environment-win-cuda.yml | 2 +- environments-and-requirements/requirements-base.txt | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/environments-and-requirements/environment-lin-amd.yml b/environments-and-requirements/environment-lin-amd.yml index 8f9beb44418..8d9336e0ba9 100644 --- a/environments-and-requirements/environment-lin-amd.yml +++ b/environments-and-requirements/environment-lin-amd.yml @@ -11,7 +11,7 @@ dependencies: - --extra-index-url https://download.pytorch.org/whl/rocm5.2/ - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers~=0.8 + - diffusers~=0.9 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 9a6f3da25fa..83ff8b5bb02 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -15,7 +15,7 @@ dependencies: - accelerate~=0.13 - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers~=0.8 + - diffusers~=0.9 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index 3409ae3c117..0584a82392a 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -22,7 +22,7 @@ dependencies: - albumentations=1.2 - coloredlogs=15.0 - - diffusers~=0.8 + - diffusers~=0.9 - einops=0.3 - eventlet - grpcio=1.46 diff --git a/environments-and-requirements/environment-win-cuda.yml b/environments-and-requirements/environment-win-cuda.yml index e9d293cee71..4a2d47dcd67 100644 --- a/environments-and-requirements/environment-win-cuda.yml +++ b/environments-and-requirements/environment-win-cuda.yml @@ -15,7 +15,7 @@ dependencies: - albumentations==0.4.3 - basicsr==1.4.1 - dependency_injector==4.40.0 - - diffusers~=0.8 + - diffusers~=0.9 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index f3284474eb0..f125544f535 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,7 +1,7 @@ # pip will resolve the version which matches torch albumentations dependency_injector==4.40.0 -diffusers[torch]~=0.8 +diffusers[torch]~=0.9 einops eventlet facexlib From 56153c2ebf834087656d0e80de8cf6e789eb74a6 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 25 Nov 2022 13:52:29 -0800 Subject: [PATCH 026/199] lint: correct annotations for Python 3.9. --- ldm/invoke/generator/base.py | 2 ++ ldm/invoke/generator/diffusers_pipeline.py | 2 ++ ldm/invoke/model_cache.py | 5 +++++ 3 files changed, 9 insertions(+) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index c8b79f24e14..7325e1334e0 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -2,6 +2,8 @@ Base class for ldm.invoke.generator.* including img2img, txt2img, and inpaint ''' +from __future__ import annotations + import os import os.path as osp import random diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 5d6b66ccf01..2d3f694687a 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import secrets import warnings from dataclasses import dataclass diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 199ef4307d3..94eb8e9cea1 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -4,15 +4,20 @@ below a preset minimum, the least recently used model will be cleared and loaded from disk when next needed. ''' +from __future__ import annotations + +import contextlib import gc import hashlib import io import os import sys +import textwrap import time import traceback import warnings from pathlib import Path +from typing import Union import torch import transformers From 09728dd1e082a559845b5b72f3b3d7f1e2d380fe Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 25 Nov 2022 14:11:19 -0800 Subject: [PATCH 027/199] lint: correct AttributeError.name reference for Python 3.9. --- ldm/models/diffusion/cross_attention_control.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index 3284f990cef..08145b1e76e 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -4,6 +4,7 @@ import torch + # adapted from bloc97's CrossAttentionControl colab # https://github.com/bloc97/CrossAttentionControl @@ -255,7 +256,7 @@ def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim lambda module: context.get_slicing_strategy(identifier) ) except AttributeError as e: - if e.name == 'set_attention_slice_wrangler': + if is_attribute_error_about(e, 'set_attention_slice_wrangler'): warnings.warn(f"TODO: implement for {type(module)}") # TODO else: raise @@ -270,7 +271,14 @@ def remove_attention_function(unet): module.set_attention_slice_wrangler(None) module.set_slicing_strategy_getter(None) except AttributeError as e: - if e.name == 'set_attention_slice_wrangler': + if is_attribute_error_about(e, 'set_attention_slice_wrangler'): warnings.warn(f"TODO: implement for {type(module)}") # TODO else: raise + + +def is_attribute_error_about(error: AttributeError, attribute: str): + if hasattr(error, 'name'): # Python 3.10 + return error.name == attribute + else: # Python 3.9 + return attribute in str(error) From ea073980b0d3ed14e0db26d3b4a68bb85a554b92 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 25 Nov 2022 14:24:00 -0800 Subject: [PATCH 028/199] CI: prefer diffusers-1.4 because it no longer requires a token The RunwayML models still do. --- .github/workflows/test-invoke-conda.yml | 6 +----- .github/workflows/test-invoke-pip.yml | 6 +----- configs/models.yaml.example | 6 +++++- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index e1c5928b2ea..fd2c9c10e98 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - 'diffusers-1.5' + - diffusers-1.4 environment-yaml: - environment-lin-amd.yml - environment-lin-cuda.yml @@ -61,10 +61,6 @@ jobs: path: ~/.cache/huggingface key: ${{ env.cache-name }} - - name: Check model availability - if: steps.cache-sd-model.outputs.cache-hit != true && env.HAVE_SECRETS != 'true' - run: echo -e '\a ⛔ GitHub model cache not found, and no HUGGINGFACE_TOKEN is available. Will not be able to load Stable Diffusion.' ; exit 1 - - name: Use cached conda packages id: use-cached-conda-packages uses: actions/cache@v3 diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index baaa6f92a82..1803743e289 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - diffusers-1.5 + - diffusers-1.4 requirements-file: - requirements-lin-cuda.txt - requirements-lin-amd.txt @@ -60,10 +60,6 @@ jobs: path: ~/.cache/huggingface key: ${{ env.cache-name }} - - name: Check model availability - if: steps.cache-sd-model.outputs.cache-hit != true && env.HAVE_SECRETS != 'true' - run: echo -e '\a ⛔ GitHub model cache not found, and no HUGGINGFACE_TOKEN is available. Will not be able to load Stable Diffusion.' ; exit 1 - - name: set test prompt to main branch validation if: ${{ github.ref == 'refs/heads/main' }} run: echo "TEST_PROMPTS=tests/preflight_prompts.txt" >> $GITHUB_ENV diff --git a/configs/models.yaml.example b/configs/models.yaml.example index 1eb2781f4a8..87bc13645d1 100644 --- a/configs/models.yaml.example +++ b/configs/models.yaml.example @@ -5,11 +5,15 @@ # model requires a model config file, a weights file, # and the width and height of the images it # was trained on. +diffusers-1.4: + description: Diffusers version of Stable Diffusion version 1.4 + format: diffusers + repo_name: CompVis/stable-diffusion-v1-4 + default: true diffusers-1.5: description: Diffusers version of Stable Diffusion version 1.5 format: diffusers repo_name: runwayml/stable-diffusion-v1-5 - default: true stable-diffusion-1.5: description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt From c21660a6df4683426762b848d77bd65bccf44d6c Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 26 Nov 2022 10:52:40 -0800 Subject: [PATCH 029/199] build: there's yet another place to update requirements? --- environments-and-requirements/requirements-base.txt | 2 +- installer/requirements.in | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index f125544f535..867273928ee 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -31,7 +31,7 @@ taming-transformers-rom1504 test-tube>=0.7.5 torch-fidelity torchmetrics -transformers==4.21.* +transformers~=4.24 picklescan git+https://github.com/invoke-ai/GFPGAN@basicsr-1.4.1#egg=gfpgan ; platform_system == 'Windows' git+https://github.com/invoke-ai/GFPGAN@basicsr-1.4.2#egg=gfpgan ; platform_system != 'Windows' diff --git a/installer/requirements.in b/installer/requirements.in index 86f6be44349..8699ae88615 100644 --- a/installer/requirements.in +++ b/installer/requirements.in @@ -3,7 +3,8 @@ --trusted-host https://download.pytorch.org accelerate~=0.14 albumentations -diffusers +diffusers[torch]~=0.9 +einops eventlet flask_cors flask_socketio From 8ce1ae550ba617635b6cedfeda468a9f10977d5b Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 26 Nov 2022 10:58:12 -0800 Subject: [PATCH 030/199] configure: try to download models even without token Models in the CompVis and stabilityai repos no longer require them. (But runwayml still does.) --- scripts/configure_invokeai.py | 134 ++++++++++++++++++++++++---------- 1 file changed, 97 insertions(+), 37 deletions(-) diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 2bfefaa28c6..7c410e3ead2 100644 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -8,26 +8,28 @@ # print('Loading Python libraries...\n') import argparse -import sys import os import re -import warnings import shutil -from urllib import request -from tqdm import tqdm -from omegaconf import OmegaConf -from huggingface_hub import HfFolder, hf_hub_url +import sys +import traceback +import warnings from pathlib import Path +from typing import Dict +from urllib import request + +import requests +import transformers +from diffusers import StableDiffusionPipeline, AutoencoderKL from getpass_asterisk import getpass_asterisk +from huggingface_hub import HfFolder, hf_hub_url, whoami as hf_whoami +from omegaconf import OmegaConf +from tqdm import tqdm from transformers import CLIPTokenizer, CLIPTextModel + from ldm.invoke.globals import Globals from ldm.invoke.readline import generic_completer -import traceback -import requests -import clip -import transformers -import warnings warnings.filterwarnings('ignore') import torch transformers.logging.set_verbosity_error() @@ -65,7 +67,7 @@ def introduction(): def postscript(): print( '''\n** Model Installation Successful **\nYou're all set! You may now launch InvokeAI using one of these two commands: -Web version: +Web version: python scripts/invoke.py --web (connect to http://localhost:9090) Command-line version: python scripts/invoke.py @@ -127,7 +129,7 @@ def select_datasets(action:str): if action == 'customized': print(''' -Choose the weight file(s) you wish to download. Before downloading you +Choose the weight file(s) you wish to download. Before downloading you will be given the option to view and change your selections. ''' ) @@ -142,7 +144,7 @@ def select_datasets(action:str): if Datasets[ds]['recommended']: datasets[ds]=counter counter += 1 - + print('The following weight files will be downloaded:') for ds in datasets: dflt = '*' if dflt is None else '' @@ -166,11 +168,11 @@ def recommended_datasets()->dict: if Datasets[ds]['recommended']: datasets[ds]=True return datasets - + #-------------------------------Authenticate against Hugging Face def authenticate(): print(''' -To download the Stable Diffusion weight files from the official Hugging Face +To download the Stable Diffusion weight files from the official Hugging Face repository, you need to read and accept the CreativeML Responsible AI license. This involves a few easy steps. @@ -203,18 +205,18 @@ def authenticate(): access_token = HfFolder.get_token() if access_token is not None: print('found') - + if access_token is None: print('not found') print(''' 4. Thank you! The last step is to enter your HuggingFace access token so that this script is authorized to initiate the download. Go to the access tokens - page of your Hugging Face account and create a token by clicking the + page of your Hugging Face account and create a token by clicking the "New token" button: https://huggingface.co/settings/tokens - (You can enter anything you like in the token creation field marked "Name". + (You can enter anything you like in the token creation field marked "Name". "Role" should be "read"). Now copy the token to your clipboard and paste it here: ''' @@ -235,7 +237,7 @@ def migrate_models_ckpt(): if rename: print(f'model.ckpt => {new_name}') os.replace(os.path.join(model_path,'model.ckpt'),os.path.join(model_path,new_name)) - + #--------------------------------------------- def download_weight_datasets(models:dict, access_token:str): migrate_models_ckpt() @@ -262,9 +264,9 @@ def download_weight_datasets(models:dict, access_token:str): HfFolder.save_token(access_token) keys = ', '.join(successful.keys()) - print(f'Successfully installed {keys}') + print(f'Successfully installed {keys}') return successful - + #--------------------------------------------- def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_token:str=None)->bool: model_dest = os.path.join(model_dir, model_name) @@ -275,7 +277,7 @@ def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_t header = {"Authorization": f'Bearer {access_token}'} if access_token else {} open_mode = 'wb' exist_size = 0 - + if os.path.exists(model_dest): exist_size = os.path.getsize(model_dest) header['Range'] = f'bytes={exist_size}-' @@ -283,7 +285,7 @@ def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_t resp = requests.get(url, headers=header, stream=True) total = int(resp.headers.get('content-length', 0)) - + if resp.status_code==416: # "range not satisfiable", which means nothing to return print(f'* {model_name}: complete file found. Skipping.') return True @@ -331,12 +333,64 @@ def download_with_progress_bar(model_url:str, model_dest:str, label:str='the'): print(f'Error downloading {label} model') print(traceback.format_exc()) - + +#--------------------------------------------- +def download_diffusers(models: Dict, full_precision: bool): + # This is a minimal implementation until https://github.com/invoke-ai/InvokeAI/pull/1490 lands, + # which moves a bunch of stuff. + # We can be more complete after we know it won't be all merge conflicts. + diffusers_repos = { + 'CompVis/stable-diffusion-v1-4-original': 'CompVis/stable-diffusion-v1-4', + 'runwayml/stable-diffusion-v1-5': 'runwayml/stable-diffusion-v1-5', + 'runwayml/stable-diffusion-inpainting': 'runwayml/stable-diffusion-inpainting', + 'hakurei/waifu-diffusion-v1-3': 'hakurei/waifu-diffusion' + } + vae_repos = { + 'stabilityai/sd-vae-ft-mse-original': 'stabilityai/sd-vae-ft-mse', + } + precision_args = {} + if not full_precision: + precision_args.update(revision='fp16') + + for model_name, model in models.items(): + repo_id = model['repo_id'] + if repo_id in vae_repos: + print(f" * Downloading diffusers VAE {model_name}...") + # TODO: can we autodetect when a repo has no fp16 revision? + AutoencoderKL.from_pretrained(repo_id) + elif repo_id not in diffusers_repos: + print(f" * Downloading diffusers {model_name}...") + StableDiffusionPipeline.from_pretrained(repo_id, **precision_args) + else: + warnings.warn(f" ⚠ FIXME: add diffusers repo for {repo_id}") + continue + + +def download_diffusers_in_config(config_path: Path, full_precision: bool): + # This is a minimal implementation until https://github.com/invoke-ai/InvokeAI/pull/1490 lands, + # which moves a bunch of stuff. + # We can be more complete after we know it won't be all merge conflicts. + if not is_huggingface_authenticated(): + print("*⚠ No Hugging Face access token; some downloads may be blocked.") + + precision = 'full' if full_precision else 'float16' + cache = ModelCache(OmegaConf.load(config_path), precision=precision, + device_type='cpu', max_loaded_models=1) + for model_name in cache.list_models(): + # TODO: download model without loading it. + # https://github.com/huggingface/diffusers/issues/1301 + model_config = cache.config[model_name] + if model_config.get('format') == 'diffusers': + print(f" * Downloading diffusers {model_name}...") + cache.get_model(model_name) + cache.offload_model(model_name) + + #--------------------------------------------- def update_config_file(successfully_downloaded:dict,opt:dict): config_file = opt.config_file or Default_config_file config_file = os.path.normpath(os.path.join(Globals.root,config_file)) - + yaml = new_config_file_contents(successfully_downloaded,config_file) try: @@ -355,8 +409,8 @@ def update_config_file(successfully_downloaded:dict,opt:dict): print(f'Successfully created new configuration file {config_file}') - -#--------------------------------------------- + +#--------------------------------------------- def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str: if os.path.exists(config_file): conf = OmegaConf.load(config_file) @@ -366,19 +420,19 @@ def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str # find the VAE file, if there is one vaes = {} default_selected = False - + for model in successfully_downloaded: a = Datasets[model]['config'].split('/') if a[0] != 'VAE': continue vae_target = a[1] if len(a)>1 else 'default' vaes[vae_target] = Datasets[model]['file'] - + for model in successfully_downloaded: if Datasets[model]['config'].startswith('VAE'): # skip VAE entries continue stanza = conf[model] if model in conf else { } - + stanza['description'] = Datasets[model]['description'] stanza['weights'] = os.path.join(Model_dir,Weights_dir,Datasets[model]['file']) stanza['config'] = os.path.normpath(os.path.join(SD_Configs, Datasets[model]['config'])) @@ -397,14 +451,14 @@ def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str default_selected = True conf[model] = stanza return OmegaConf.to_yaml(conf) - + #--------------------------------------------- # this will preload the Bert tokenizer fles def download_bert(): print('Installing bert tokenizer (ignore deprecation errors)...', end='',file=sys.stderr) with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=DeprecationWarning) - from transformers import BertTokenizerFast, AutoFeatureExtractor + from transformers import BertTokenizerFast download_from_hf(BertTokenizerFast,'bert-base-uncased') print('...success',file=sys.stderr) @@ -467,7 +521,7 @@ def download_clipseg(): model_url = 'https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download' model_dest = os.path.join(Globals.root,'models/clipseg/clipseg_weights') weights_zip = 'models/clipseg/weights.zip' - + if not os.path.exists(model_dest): os.makedirs(os.path.dirname(model_dest), exist_ok=True) if not os.path.exists(f'{model_dest}/rd64-uni-refined.pth'): @@ -586,7 +640,7 @@ def select_outputs(root:str,yes_to_all:bool=False): #------------------------------------- def initialize_rootdir(root:str,yes_to_all:bool=False): assert os.path.exists('./configs'),'Run this script from within the InvokeAI source code directory, "InvokeAI" or the runtime directory "invokeai".' - + print(f'** INITIALIZING INVOKEAI RUNTIME DIRECTORY **') root_selected = False while not root_selected: @@ -670,7 +724,7 @@ def initialize_rootdir(root:str,yes_to_all:bool=False): # -Ak_euler_a -C10.0 # ''') - + #------------------------------------- class ProgressBar(): def __init__(self,model_name='file'): @@ -727,6 +781,12 @@ def main(): if opt.interactive: print('** DOWNLOADING DIFFUSION WEIGHTS **') download_weights(opt) + else: + config_path = Path(opt.config_file or Default_config_file) + if config_path.exists(): + download_diffusers_in_config(config_path, full_precision=opt.full_precision) + else: + print("*⚠ No config file found; downloading no weights.") print('\n** DOWNLOADING SUPPORT MODELS **') download_bert() download_clip() @@ -741,7 +801,7 @@ def main(): except Exception as e: print(f'\nA problem occurred during initialization.\nThe error was: "{str(e)}"') print(traceback.format_exc()) - + #------------------------------------- if __name__ == '__main__': main() From 3898336509f40e56f852776d5928efa99dccb963 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 27 Nov 2022 08:27:03 -0800 Subject: [PATCH 031/199] configure: add troubleshooting info for config-not-found --- scripts/configure_invokeai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 7c410e3ead2..2d8e83720ef 100644 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -786,7 +786,7 @@ def main(): if config_path.exists(): download_diffusers_in_config(config_path, full_precision=opt.full_precision) else: - print("*⚠ No config file found; downloading no weights.") + print(f"*⚠ No config file found; downloading no weights. Looked in {config_path}") print('\n** DOWNLOADING SUPPORT MODELS **') download_bert() download_clip() From 73ddde7d3f78e3c3b92449ea74ad05dcfa138ec0 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 27 Nov 2022 09:09:38 -0800 Subject: [PATCH 032/199] fix(configure): prepend root to config path --- scripts/configure_invokeai.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 2d8e83720ef..fd593bd5840 100644 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -782,7 +782,7 @@ def main(): print('** DOWNLOADING DIFFUSION WEIGHTS **') download_weights(opt) else: - config_path = Path(opt.config_file or Default_config_file) + config_path = Path(Globals.root, opt.config_file or Default_config_file) if config_path.exists(): download_diffusers_in_config(config_path, full_precision=opt.full_precision) else: From 35c4d263e6b98744519d2e35951571c64229809c Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 27 Nov 2022 10:14:40 -0800 Subject: [PATCH 033/199] fix(configure): remove second `default: true` from models example --- configs/models.yaml.example | 1 - 1 file changed, 1 deletion(-) diff --git a/configs/models.yaml.example b/configs/models.yaml.example index 87bc13645d1..dbb62f2e32b 100644 --- a/configs/models.yaml.example +++ b/configs/models.yaml.example @@ -21,7 +21,6 @@ stable-diffusion-1.5: width: 512 height: 512 vae: ./models/ldm/stable-diffusion-v1/vae-ft-mse-840000-ema-pruned.ckpt - default: true stable-diffusion-1.4: description: Stable Diffusion inference model version 1.4 config: configs/stable-diffusion/v1-inference.yaml From f9dcc9a9b439e7a79db353a2c6dff23e1e3aeb3b Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 29 Nov 2022 13:23:50 -0800 Subject: [PATCH 034/199] CI: simplify test-on-push logic now that we don't need secrets The "test on push but only in forks" logic was only necessary when tests didn't work for PRs-from-forks. --- .github/workflows/test-invoke-conda.yml | 16 +++++++--------- .github/workflows/test-invoke-pip.yml | 15 ++++++--------- 2 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index fd2c9c10e98..5812b63fccd 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -1,16 +1,14 @@ name: Test invoke.py -on: [push, pull_request] +on: + push: + branches: + - 'main' + - 'development' + - 'fix-gh-actions-fork' + pull_request: jobs: matrix: - # Run on: - # - pull requests - # - pushes to forks (will run in the forked project with that fork's secrets) - # - pushes to branches that are *not* pull requests - if: | - github.event_name == 'pull_request' - || github.repository != 'invoke-ai/InvokeAI' - || github.ref_protected strategy: matrix: stable-diffusion-model: diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 1803743e289..178c3a72022 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -1,16 +1,13 @@ name: Test invoke.py pip -on: [push, pull_request] +on: + push: + branches: + - 'main' + - 'development' + pull_request: jobs: matrix: - # Run on: - # - pull requests - # - pushes to forks (will run in the forked project with that fork's secrets) - # - pushes to branches that are *not* pull requests - if: | - github.event_name == 'pull_request' - || github.repository != 'invoke-ai/InvokeAI' - || github.ref_protected strategy: matrix: stable-diffusion-model: From ca1f76b7ba04e1ebe0e473d7c178403aee082383 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 28 Nov 2022 18:26:52 -0800 Subject: [PATCH 035/199] create an embedding_manager for diffusers --- ldm/invoke/generator/diffusers_pipeline.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 2d3f694687a..714d07faacf 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -16,6 +16,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent +from ldm.modules.embedding_manager import EmbeddingManager from ldm.modules.encoders.modules import WeightedFrozenCLIPEmbedder @@ -28,6 +29,16 @@ class PipelineIntermediateState: predicted_original: Optional[torch.Tensor] = None +# copied from configs/stable-diffusion/v1-inference.yaml +_default_personalization_config_params = dict( + placeholder_strings=["*"], + initializer_wods=["sculpture"], + per_image_tokens=False, + num_vectors_per_token=8, + progressive_words=False +) + + class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -89,6 +100,7 @@ def __init__( transformer=self.text_encoder ) self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) + self.embedding_manager = EmbeddingManager(self.clip_embedder, **_default_personalization_config_params) def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, From 8157bff4bc5ddf444182e066f950f67afaad0ebc Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 29 Nov 2022 19:02:45 -0800 Subject: [PATCH 036/199] internal: avoid importing diffusers DummyObject see https://github.com/huggingface/diffusers/issues/1479 --- ldm/generate.py | 53 +++++++++++----------- ldm/invoke/generator/diffusers_pipeline.py | 5 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index d99954c2497..63d9a7a6efe 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -1,48 +1,47 @@ # Copyright (c) 2022 Lincoln D. Stein (https://github.com/lstein) -import pyparsing # Derived from source code carrying the following copyrights # Copyright (c) 2022 Machine Vision and Learning Group, LMU Munich # Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors -import torch -import numpy as np -import random +import gc import os -import time +import random import re import sys +import time import traceback -import transformers -import io -import gc -import hashlib + import cv2 +import numpy as np import skimage -from diffusers import DiffusionPipeline, DDIMScheduler, LMSDiscreteScheduler, EulerDiscreteScheduler, \ - EulerAncestralDiscreteScheduler, PNDMScheduler, IPNDMScheduler - -from omegaconf import OmegaConf -from ldm.invoke.generator.base import downsampling +import torch +import transformers from PIL import Image, ImageOps -from torch import nn +from diffusers.pipeline_utils import DiffusionPipeline +from diffusers.schedulers.scheduling_ddim import DDIMScheduler +from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler +from diffusers.schedulers.scheduling_euler_discrete import EulerDiscreteScheduler +from diffusers.schedulers.scheduling_ipndm import IPNDMScheduler +from diffusers.schedulers.scheduling_lms_discrete import LMSDiscreteScheduler +from diffusers.schedulers.scheduling_pndm import PNDMScheduler +from omegaconf import OmegaConf from pytorch_lightning import seed_everything, logging -from ldm.invoke.prompt_parser import PromptParser -from ldm.util import instantiate_from_config -from ldm.invoke.globals import Globals -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.plms import PLMSSampler -from ldm.models.diffusion.ksampler import KSampler -from ldm.invoke.pngwriter import PngWriter from ldm.invoke.args import metadata_from_png -from ldm.invoke.image_util import InitImageResizer -from ldm.invoke.devices import choose_torch_device, choose_precision +from ldm.invoke.concepts_lib import Concepts from ldm.invoke.conditioning import get_uc_and_c_and_ec +from ldm.invoke.devices import choose_torch_device, choose_precision +from ldm.invoke.globals import Globals +from ldm.invoke.image_util import InitImageResizer from ldm.invoke.model_cache import ModelCache +from ldm.invoke.pngwriter import PngWriter from ldm.invoke.seamless import configure_model_padding -from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale -from ldm.invoke.concepts_lib import Concepts - +from ldm.invoke.txt2mask import Txt2Mask +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.ksampler import KSampler +from ldm.models.diffusion.plms import PLMSSampler + + def fix_func(orig): if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): def new_func(*args, **kw): diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 714d07faacf..e9d1d052ffc 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -7,10 +7,11 @@ import PIL.Image import torch -from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput -from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess +from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline +from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess, \ + StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer From 7c558d5056a1fc2e1a7a06757c0e91f1dfb677e9 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 29 Nov 2022 19:07:13 -0800 Subject: [PATCH 037/199] =?UTF-8?q?fix=20"config=20attributes=E2=80=A6not?= =?UTF-8?q?=20expected"=20diffusers=20warnings.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ldm/invoke/generator/diffusers_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index e9d1d052ffc..0a689ece7b3 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -83,8 +83,10 @@ def __init__( scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], safety_checker: Optional[StableDiffusionSafetyChecker], feature_extractor: Optional[CLIPFeatureExtractor], + requires_safety_checker: bool = False ): - super().__init__(vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor) + super().__init__(vae, text_encoder, tokenizer, unet, scheduler, + safety_checker, feature_extractor, requires_safety_checker) self.register_modules( vae=vae, From e9a0f07033dd6d1b3b55e808b755d5df6ac385fb Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 29 Nov 2022 19:35:13 -0800 Subject: [PATCH 038/199] fix deprecated scheduler construction --- ldm/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/generate.py b/ldm/generate.py index 63d9a7a6efe..04cdaea89d5 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -1023,7 +1023,7 @@ def _set_scheduler(self): if self.sampler_name in scheduler_map: sampler_class = scheduler_map[self.sampler_name] msg = f'>> Setting Sampler to {self.sampler_name} ({sampler_class.__name__})' - self.sampler = sampler_class.from_config( + self.sampler = sampler_class.from_pretrained( self.model_cache.model_name_or_path(self.model_name), subfolder="scheduler" ) From adaa1c7c3e4994834e276b87e98d06e3f675b161 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Wed, 30 Nov 2022 22:05:11 +0100 Subject: [PATCH 039/199] work around an apparent MPS torch bug that causes conditioning to have no effect --- ldm/models/diffusion/shared_invokeai_diffusion.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index d6ec1ea44bf..538410045fe 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -102,8 +102,11 @@ def apply_standard_conditioning(self, x, sigma, unconditioning, conditioning): x_twice = torch.cat([x] * 2) sigma_twice = torch.cat([sigma] * 2) both_conditionings = torch.cat([unconditioning, conditioning]) - unconditioned_next_x, conditioned_next_x = self.model_forward_callback(x_twice, sigma_twice, - both_conditionings).chunk(2) + both_results = self.model_forward_callback(x_twice, sigma_twice, both_conditionings) + unconditioned_next_x, conditioned_next_x = both_results.chunk(2) + if conditioned_next_x.device.type == 'mps': + # prevent a result filled with zeros. seems to be a torch bug. + conditioned_next_x = conditioned_next_x.clone() return unconditioned_next_x, conditioned_next_x From 494936a8d24a09cb278a8e4520db3238b0e63125 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 30 Nov 2022 15:25:47 -0800 Subject: [PATCH 040/199] =?UTF-8?q?=F0=9F=9A=A7=20post-rebase=20repair?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/test-invoke-conda.yml | 4 +- backend/invoke_ai_web_server.py | 42 +++++++++-------- .../environment-lin-cuda.yml | 9 ---- ldm/generate.py | 32 ++++++------- ldm/invoke/generator/base.py | 7 ++- ldm/invoke/model_cache.py | 45 ++++++++++++++----- .../diffusion/cross_attention_control.py | 44 +++++++++--------- scripts/configure_invokeai.py | 25 +++++++++++ 8 files changed, 122 insertions(+), 86 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 5812b63fccd..f37512d2bbc 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -86,14 +86,14 @@ jobs: if: ${{ github.ref != 'refs/heads/main' && github.ref != 'refs/heads/development' }} run: echo "TEST_PROMPTS=tests/validate_pr_prompt.txt" >> $GITHUB_ENV - - name: run preload_models.py + - name: run configure_invokeai.py id: run-preload-models run: | if [ "${HAVE_SECRETS}" == true ] ; then mkdir -p ~/.huggingface echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token fi - python scripts/preload_models.py \ + python scripts/configure_invokeai.py \ --no-interactive --yes \ --full-precision # can't use fp16 weights without a GPU diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py index d525cf87f89..a66e12a1a16 100644 --- a/backend/invoke_ai_web_server.py +++ b/backend/invoke_ai_web_server.py @@ -1,33 +1,31 @@ -import eventlet +import base64 import glob +import io +import json +import math +import mimetypes import os import shutil -import mimetypes import traceback -import math -import io -import base64 -import os -import json +from threading import Event +from uuid import uuid4 -from werkzeug.utils import secure_filename +import eventlet +from PIL import Image +from PIL.Image import Image as ImageType from flask import Flask, redirect, send_from_directory, request, make_response from flask_socketio import SocketIO -from PIL import Image, ImageOps -from PIL.Image import Image as ImageType -from uuid import uuid4 -from threading import Event +from werkzeug.utils import secure_filename +from backend.modules.get_canvas_generation_mode import ( + get_canvas_generation_mode, +) +from backend.modules.parameters import parameters_to_command from ldm.invoke.args import Args, APP_ID, APP_VERSION, calculate_init_img_hash from ldm.invoke.generator.diffusers_pipeline import PipelineIntermediateState +from ldm.invoke.generator.inpaint import infill_methods from ldm.invoke.pngwriter import PngWriter, retrieve_metadata from ldm.invoke.prompt_parser import split_weighted_subprompts -from ldm.invoke.generator.inpaint import infill_methods - -from backend.modules.parameters import parameters_to_command -from backend.modules.get_canvas_generation_mode import ( - get_canvas_generation_mode, -) # Loading Arguments opt = Args() @@ -251,7 +249,7 @@ def find_frontend(self): return candidate assert "Frontend files cannot be found. Cannot continue" - + def setup_app(self): self.result_url = "outputs/" self.init_image_url = "outputs/init-images/" @@ -776,10 +774,10 @@ def generate_images( ).convert("RGBA") """ - The outpaint image and mask are pre-cropped by the UI, so the bounding box we pass + The outpaint image and mask are pre-cropped by the UI, so the bounding box we pass to the generator should be: { - "x": 0, + "x": 0, "y": 0, "width": original_bounding_box["width"], "height": original_bounding_box["height"] @@ -799,7 +797,7 @@ def generate_images( ) """ - Apply the mask to the init image, creating a "mask" image with + Apply the mask to the init image, creating a "mask" image with transparency where inpainting should occur. This is the kind of mask that prompt2image() needs. """ diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 83ff8b5bb02..4397f95371c 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -40,15 +40,6 @@ dependencies: - torch-fidelity==0.3.0 - torchmetrics==0.7.0 - transformers==4.21.3 - - diffusers~=0.7 - - torchmetrics==0.7.0 - - flask==2.1.3 - - flask_socketio==5.3.0 - - flask_cors==3.0.10 - - dependency_injector==4.40.0 - - eventlet - - getpass_asterisk - - kornia==0.6.0 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg diff --git a/ldm/generate.py b/ldm/generate.py index 04cdaea89d5..375b72b362a 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -236,7 +236,7 @@ def __init__( except Exception: print('** An error was encountered while installing the safety checker:') print(traceback.format_exc()) - + def prompt2png(self, prompt, outdir, **kwargs): """ Takes a prompt and an output directory, writes out the requested number @@ -330,7 +330,7 @@ def prompt2image( infill_method = infill_methods[0], # The infill method to use force_outpaint: bool = False, enable_image_debugging = False, - + **args, ): # eat up additional cruft """ @@ -373,7 +373,7 @@ def prompt2image( def process_image(image,seed): image.save(f{'images/seed.png'}) - The code used to save images to a directory can be found in ldm/invoke/pngwriter.py. + The code used to save images to a directory can be found in ldm/invoke/pngwriter.py. It contains code to create the requested output directory, select a unique informative name for each image, and write the prompt into the PNG metadata. """ @@ -593,7 +593,7 @@ def apply_postprocessor( seed = opt.seed or args.seed if seed is None or seed < 0: seed = random.randrange(0, np.iinfo(np.uint32).max) - + prompt = opt.prompt or args.prompt or '' print(f'>> using seed {seed} and prompt "{prompt}" for {image_path}') @@ -645,7 +645,7 @@ def apply_postprocessor( opt.seed = seed opt.prompt = prompt - + if len(extend_instructions) > 0: restorer = Outcrop(image,self,) return restorer.process ( @@ -687,7 +687,7 @@ def apply_postprocessor( image_callback = callback, prefix = prefix ) - + elif tool is None: print(f'* please provide at least one postprocessing option, such as -G or -U') return None @@ -710,13 +710,13 @@ def select_generator( if embiggen is not None: return self._make_embiggen() - + if inpainting_model_in_use: return self._make_omnibus() if ((init_image is not None) and (mask_image is not None)) or force_outpaint: return self._make_inpaint() - + if init_image is not None: return self._make_img2img() @@ -747,7 +747,7 @@ def _make_images( if self._has_transparency(image): self._transparency_check_and_warning(image, mask, force_outpaint) init_mask = self._create_init_mask(image, width, height, fit=fit) - + if (image.width * image.height) > (self.width * self.height) and self.size_matters: print(">> This input is larger than your defaults. If you run out of memory, please use a smaller image.") self.size_matters = False @@ -763,7 +763,7 @@ def _make_images( if invert_mask: init_mask = ImageOps.invert(init_mask) - + return init_image,init_mask # lots o' repeated code here! Turn into a make_func() @@ -822,7 +822,7 @@ def load_model(self): self.set_model(self.model_name) def set_model(self,model_name): - """ + """ Given the name of a model defined in models.yaml, will load and initialize it and return the model object. Previously-used models will be cached. """ @@ -834,7 +834,7 @@ def set_model(self,model_name): if not cache.valid_model(model_name): print(f'** "{model_name}" is not a known model name. Please check your models.yaml file') return self.model - + cache.print_vram_usage() # have to get rid of all references to model in order @@ -843,7 +843,7 @@ def set_model(self,model_name): self.sampler = None self.generators = {} gc.collect() - + model_data = cache.get_model(model_name) if model_data is None: # restore previous model_data = cache.get_model(self.model_name) @@ -856,7 +856,7 @@ def set_model(self,model_name): # uncache generators so they pick up new models self.generators = {} - + seed_everything(random.randrange(0, np.iinfo(np.uint32).max)) if self.embedding_path is not None: self.model.embedding_manager.load( @@ -905,7 +905,7 @@ def upscale_and_reconstruct(self, image_callback = None, prefix = None, ): - + for r in image_list: image, seed = r try: @@ -915,7 +915,7 @@ def upscale_and_reconstruct(self, if self.gfpgan is None: print('>> GFPGAN not found. Face restoration is disabled.') else: - image = self.gfpgan.process(image, strength, seed) + image = self.gfpgan.process(image, strength, seed) if facetool == 'codeformer': if self.codeformer is None: print('>> CodeFormer not found. Face restoration is disabled.') diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index 7325e1334e0..da2d2900ca9 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -9,9 +9,10 @@ import random import traceback +import cv2 import numpy as np import torch -from PIL import Image, ImageFilter +from PIL import Image, ImageFilter, ImageChops from diffusers import DiffusionPipeline from einops import rearrange from pytorch_lightning import seed_everything @@ -169,7 +170,7 @@ def repaste_and_color_correct(self, result: Image.Image, init_image: Image.Image # Blur the mask out (into init image) by specified amount if mask_blur_radius > 0: nm = np.asarray(pil_init_mask, dtype=np.uint8) - nmd = cv.erode(nm, kernel=np.ones((3,3), dtype=np.uint8), iterations=int(mask_blur_radius / 2)) + nmd = cv2.erode(nm, kernel=np.ones((3,3), dtype=np.uint8), iterations=int(mask_blur_radius / 2)) pmd = Image.fromarray(nmd, mode='L') blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(mask_blur_radius)) else: @@ -181,8 +182,6 @@ def repaste_and_color_correct(self, result: Image.Image, init_image: Image.Image matched_result.paste(init_image, (0,0), mask = multiplied_blurred_init_mask) return matched_result - - def sample_to_lowres_estimated_image(self,samples): # origingally adapted from code by @erucipe and @keturn here: # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 94eb8e9cea1..4efa7aca0d2 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -21,9 +21,6 @@ import torch import transformers -import textwrap -import contextlib -from typing import Union from omegaconf import OmegaConf from omegaconf.errors import ConfigAttributeError from picklescan.scanner import scan_file_path @@ -99,7 +96,7 @@ def get_model(self, model_name:str): assert self.current_model,'** FATAL: no current model to restore to' print(f'** restoring {self.current_model}') self.get_model(self.current_model) - return None + return self.current_model = model_name self._push_newest_model(model_name) @@ -219,7 +216,7 @@ def _load_model(self, model_name:str): if model_format == 'ckpt': weights = mconfig.weights print(f'>> Loading {model_name} from {weights}') - model, width, height, model_hash = self._load_ckpt_model(mconfig) + model, width, height, model_hash = self._load_ckpt_model(model_name, mconfig) elif model_format == 'diffusers': model, width, height, model_hash = self._load_diffusers_model(mconfig) else: @@ -237,10 +234,10 @@ def _load_model(self, model_name:str): ) return model, width, height, model_hash - def _load_ckpt_model(self, mconfig): + def _load_ckpt_model(self, model_name, mconfig): config = mconfig.config weights = mconfig.weights - vae = mconfig.get('vae', None) + vae = mconfig.get('vae') width = mconfig.width height = mconfig.height @@ -249,10 +246,22 @@ def _load_ckpt_model(self, mconfig): if not os.path.isabs(weights): weights = os.path.normpath(os.path.join(Globals.root,weights)) # scan model - self._scan_model(model_name, weights) + self.scan_model(model_name, weights) + + print(f'>> Loading {model_name} from {weights}') + + # for usage statistics + if self._has_cuda(): + torch.cuda.reset_peak_memory_stats() + torch.cuda.empty_cache() + + tic = time.time() - c = OmegaConf.load(config) - with open(weights, 'rb') as f: + # this does the work + if not os.path.isabs(config): + config = os.path.join(Globals.root,config) + omega_config = OmegaConf.load(config) + with open(weights,'rb') as f: weight_bytes = f.read() model_hash = self._cached_sha256(weights, weight_bytes) sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu') @@ -289,6 +298,18 @@ def _load_ckpt_model(self, mconfig): if isinstance(module, (torch.nn.Conv2d, torch.nn.ConvTranspose2d)): module._orig_padding_mode = module.padding_mode + # usage statistics + toc = time.time() + print(f'>> Model loaded in', '%4.2fs' % (toc - tic)) + + if self._has_cuda(): + print( + '>> Max VRAM used to load the model:', + '%4.2fG' % (torch.cuda.max_memory_allocated() / 1e9), + '\n>> Current VRAM usage:' + '%4.2fG' % (torch.cuda.memory_allocated() / 1e9), + ) + return model, width, height, model_hash def _load_diffusers_model(self, mconfig): @@ -308,6 +329,8 @@ def _load_diffusers_model(self, mconfig): print(f'>> Loading diffusers model from {name_or_path}') + # TODO: scan weights maybe? + if self.precision == 'float16': print(' | Using faster float16 precision') pipeline_args.update(revision="fp16", torch_dtype=torch.float16) @@ -342,7 +365,7 @@ def model_name_or_path(self, model_name:str) -> str | Path: else: raise ValueError("Model config must specify either repo_name or path.") - def offload_model(self, model_name:str): + def offload_model(self, model_name:str) -> None: ''' Offload the indicated model to CPU. Will call _make_cache_room() to free space if needed. diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index 08145b1e76e..ec7c3c215cc 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -248,33 +248,33 @@ def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim cross_attention_modules = [(name, module) for (name, module) in unet.named_modules() if type(module).__name__ == "CrossAttention"] - for identifier, module in cross_attention_modules: - module.identifier = identifier - try: - module.set_attention_slice_wrangler(attention_slice_wrangler) - module.set_slicing_strategy_getter( - lambda module: context.get_slicing_strategy(identifier) - ) - except AttributeError as e: - if is_attribute_error_about(e, 'set_attention_slice_wrangler'): - warnings.warn(f"TODO: implement for {type(module)}") # TODO - else: - raise + for identifier, module in cross_attention_modules: + module.identifier = identifier + try: + module.set_attention_slice_wrangler(attention_slice_wrangler) + module.set_slicing_strategy_getter( + lambda module: context.get_slicing_strategy(identifier) + ) + except AttributeError as e: + if is_attribute_error_about(e, 'set_attention_slice_wrangler'): + warnings.warn(f"TODO: implement for {type(module)}") # TODO + else: + raise def remove_attention_function(unet): cross_attention_modules = [module for (_, module) in unet.named_modules() if type(module).__name__ == "CrossAttention"] - for module in cross_attention_modules: - try: - # clear wrangler callback - module.set_attention_slice_wrangler(None) - module.set_slicing_strategy_getter(None) - except AttributeError as e: - if is_attribute_error_about(e, 'set_attention_slice_wrangler'): - warnings.warn(f"TODO: implement for {type(module)}") # TODO - else: - raise + for module in cross_attention_modules: + try: + # clear wrangler callback + module.set_attention_slice_wrangler(None) + module.set_slicing_strategy_getter(None) + except AttributeError as e: + if is_attribute_error_about(e, 'set_attention_slice_wrangler'): + warnings.warn(f"TODO: implement for {type(module)}") # TODO + else: + raise def is_attribute_error_about(error: AttributeError, attribute: str): diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index fd593bd5840..af3489d737a 100644 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -34,6 +34,12 @@ import torch transformers.logging.set_verbosity_error() +try: + from ldm.invoke.model_cache import ModelCache +except ImportError: + sys.path.append('.') + from ldm.invoke.model_cache import ModelCache + #--------------------------globals----------------------- Model_dir = 'models' Weights_dir = 'ldm/stable-diffusion-v1/' @@ -267,6 +273,19 @@ def download_weight_datasets(models:dict, access_token:str): print(f'Successfully installed {keys}') return successful +#--------------------------------------------- +def is_huggingface_authenticated(): + # huggingface_hub 0.10 API isn't great for this, it could be OSError, ValueError, + # maybe other things, not all end-user-friendly. + # noinspection PyBroadException + try: + response = hf_whoami() + if response.get('id') is not None: + return True + except Exception: + pass + return False + #--------------------------------------------- def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_token:str=None)->bool: model_dest = os.path.join(model_dir, model_name) @@ -749,6 +768,12 @@ def main(): action=argparse.BooleanOptionalAction, default=True, help='run in interactive mode (default)') + parser.add_argument('--full-precision', + dest='full_precision', + action=argparse.BooleanOptionalAction, + type=bool, + default=False, + help='use 32-bit weights instead of faster 16-bit weights') parser.add_argument('--yes','-y', dest='yes_to_all', action='store_true', From ea1cf83c20910b3be6ec7d5df3bb13e1a4bedfdd Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 30 Nov 2022 22:05:58 -0800 Subject: [PATCH 041/199] preliminary support for outpainting (no masking yet) --- ldm/invoke/generator/diffusers_pipeline.py | 76 +++++++++++- ldm/invoke/generator/inpaint.py | 128 +++++++++------------ 2 files changed, 127 insertions(+), 77 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 0a689ece7b3..989d43546e6 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -6,12 +6,13 @@ from typing import List, Optional, Union, Callable import PIL.Image +import einops import torch +import torchvision.transforms as T from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline -from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import preprocess, \ - StableDiffusionImg2ImgPipeline +from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -40,6 +41,25 @@ class PipelineIntermediateState: ) +def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True, multiple_of=8) -> torch.FloatTensor: + """ + + :param image: input image + :param normalize: scale the range to [-1, 1] instead of [0, 1] + :param multiple_of: resize the input so both dimensions are a multiple of this + """ + w, h = image.size + w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 + transformation = T.Compose([ + T.Resize((h, w), T.InterpolationMode.LANCZOS), + T.ToTensor(), + ]) + tensor = transformation(image) + if normalize: + tensor = tensor * 2.0 - 1.0 + return tensor + + class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -270,12 +290,15 @@ def img2img_from_embeddings(self, noise_func=None, **extra_step_kwargs) -> StableDiffusionPipelineOutput: device = self.unet.device - latents_dtype = text_embeddings.dtype + latents_dtype = self.unet.dtype batch_size = 1 num_images_per_prompt = 1 if isinstance(init_image, PIL.Image.Image): - init_image = preprocess(init_image.convert('RGB')) + init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) + + if init_image.dim() == 3: + init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) @@ -297,6 +320,51 @@ def img2img_from_embeddings(self, raise AssertionError("why was that an empty generator?") return result + def inpaint_from_embeddings( + self, + init_image: torch.FloatTensor, + mask_image: torch.FloatTensor, + strength: float, + num_inference_steps: int, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, callback: Callable[[PipelineIntermediateState], None] = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + run_id=None, + noise_func=None, + **extra_step_kwargs) -> StableDiffusionPipelineOutput: + device = self.unet.device + latents_dtype = self.unet.dtype + batch_size = 1 + num_images_per_prompt = 1 + + if isinstance(init_image, PIL.Image.Image): + init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) + + if init_image.dim() == 3: + init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') + + img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) + img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) + timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) + + # 6. Prepare latent variables + latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) + + result = None + for result in self.generate_from_embeddings( + latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, + run_id=run_id, **extra_step_kwargs): + if callback is not None and isinstance(result, PipelineIntermediateState): + callback(result) + if result is None: + raise AssertionError("why was that an empty generator?") + return result + + def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_func) -> torch.FloatTensor: # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents # because we have our own noise function diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 02bac7c999b..8a16e1e86fe 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -1,22 +1,21 @@ ''' ldm.invoke.generator.inpaint descends from ldm.invoke.generator ''' +from __future__ import annotations import math import PIL -import cv2 as cv +import cv2 import numpy as np import torch from PIL import Image, ImageFilter, ImageOps, ImageChops from einops import repeat -from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import downsampling +from ldm.invoke.generator.diffusers_pipeline import image_resized_to_grid_as_tensor, StableDiffusionGeneratorPipeline from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.globals import Globals -from ldm.models.diffusion.ddim import DDIMSampler -from ldm.models.diffusion.ksampler import KSampler from ldm.util import debug_image infill_methods: list[str] = list() @@ -36,6 +35,9 @@ class Inpaint(Img2Img): def __init__(self, model, precision): + self.inpaint_height = 0 + self.inpaint_width = 0 + self.enable_image_debugging = False self.init_latent = None self.pil_image = None self.pil_mask = None @@ -123,13 +125,13 @@ def mask_edge(self, mask: Image, edge_size: int, edge_blur: int) -> Image: npgradient = np.uint8(255 * (1.0 - np.floor(np.abs(0.5 - np.float32(npimg) / 255.0) * 2.0))) # Detect hard edges - npedge = cv.Canny(npimg, threshold1=100, threshold2=200) + npedge = cv2.Canny(npimg, threshold1=100, threshold2=200) # Combine npmask = npgradient + npedge # Expand - npmask = cv.dilate(npmask, np.ones((3,3), np.uint8), iterations = int(edge_size / 2)) + npmask = cv2.dilate(npmask, np.ones((3,3), np.uint8), iterations = int(edge_size / 2)) new_mask = Image.fromarray(npmask) @@ -139,15 +141,8 @@ def mask_edge(self, mask: Image, edge_size: int, edge_blur: int) -> Image: return ImageOps.invert(new_mask) - def seam_paint(self, - im: Image.Image, - seam_size: int, - seam_blur: int, - prompt,sampler,steps,cfg_scale,ddim_eta, - conditioning,strength, - noise, - step_callback - ) -> Image.Image: + def seam_paint(self, im: Image.Image, seam_size: int, seam_blur: int, prompt, sampler, steps, cfg_scale, ddim_eta, + conditioning, strength, noise, infill_method, step_callback) -> Image.Image: hard_mask = self.pil_image.split()[-1].copy() mask = self.mask_edge(hard_mask, seam_size, seam_blur) @@ -165,7 +160,8 @@ def seam_paint(self, seam_size = 0, step_callback = step_callback, inpaint_width = im.width, - inpaint_height = im.height + inpaint_height = im.height, + infill_method = infill_method ) seam_noise = self.get_noise(im.width, im.height) @@ -177,7 +173,10 @@ def seam_paint(self, @torch.no_grad() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, - conditioning,init_image,mask_image,strength, + conditioning, + init_image: PIL.Image.Image | torch.FloatTensor, + mask_image: PIL.Image.Image | torch.FloatTensor, + strength: float, mask_blur_radius: int = 8, # Seam settings - when 0, doesn't fill seam seam_size: int = 0, @@ -223,7 +222,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, debug_image(init_filled, "init_filled", debug_status=self.enable_image_debugging) # Create init tensor - init_image = self._image_to_tensor(init_filled.convert('RGB')) + init_image = image_resized_to_grid_as_tensor(init_filled.convert('RGB')) if isinstance(mask_image, PIL.Image.Image): self.pil_mask = mask_image.copy() @@ -244,85 +243,68 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, ), resample=Image.Resampling.NEAREST ) - mask_image = self._image_to_tensor(mask_image,normalize=False) + mask_image = image_resized_to_grid_as_tensor(mask_image, normalize=False) self.mask_blur_radius = mask_blur_radius # klms samplers not supported yet, so ignore previous sampler - if isinstance(sampler,KSampler): - print( - ">> Using recommended DDIM sampler for inpainting." - ) - sampler = DDIMSampler(self.model, device=self.model.device) - - sampler.make_schedule( - ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False - ) + # if isinstance(sampler,KSampler): + # print( + # ">> Using recommended DDIM sampler for inpainting." + # ) + # sampler = DDIMSampler(self.model, device=self.model.device) mask_image = mask_image[0][0].unsqueeze(0).repeat(4,1,1).unsqueeze(0) mask_image = repeat(mask_image, '1 ... -> b ...', b=1) - scope = choose_autocast(self.precision) - with scope(self.model.device.type): - self.init_latent = self.model.get_first_stage_encoding( - self.model.encode_first_stage(init_image) - ) # move to latent space - t_enc = int(strength * steps) # todo: support cross-attention control uc, c, _ = conditioning print(f">> target t_enc is {t_enc} steps") - @torch.no_grad() + # noinspection PyTypeChecker + pipeline: StableDiffusionGeneratorPipeline = self.model + pipeline.scheduler = sampler + def make_image(x_T): + # FIXME: some of this z_enc and inpaint_replace stuff was probably important # encode (scaled latent) - z_enc = sampler.stochastic_encode( - self.init_latent, - torch.tensor([t_enc]).to(self.model.device), - noise=x_T - ) - - # to replace masked area with latent noise, weighted by inpaint_replace strength - if inpaint_replace > 0.0: - print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}') - l_noise = self.get_noise(kwargs['width'],kwargs['height']) - inverted_mask = 1.0-mask_image # there will be 1s where the mask is - masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise - z_enc = z_enc * mask_image + masked_region - - # decode it - samples = sampler.decode( - z_enc, - c, - t_enc, - img_callback = step_callback, - unconditional_guidance_scale = cfg_scale, - unconditional_conditioning = uc, - mask = mask_image, - init_latent = self.init_latent + # z_enc = sampler.stochastic_encode( + # self.init_latent, + # torch.tensor([t_enc]).to(self.model.device), + # noise=x_T + # ) + # + # # to replace masked area with latent noise, weighted by inpaint_replace strength + # if inpaint_replace > 0.0: + # print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}') + # l_noise = self.get_noise(kwargs['width'],kwargs['height']) + # inverted_mask = 1.0-mask_image # there will be 1s where the mask is + # masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise + # z_enc = z_enc * mask_image + masked_region + + pipeline_output = pipeline.inpaint_from_embeddings( + init_image=init_image, + mask_image=mask_image, + strength=strength, + num_inference_steps=steps, + text_embeddings=c, + unconditioned_embeddings=uc, + guidance_scale=cfg_scale, + noise_func=self.get_noise_like, + callback=step_callback, ) - result = self.sample_to_image(samples) + result = pipeline.numpy_to_pil(pipeline_output.images)[0] # Seam paint if this is our first pass (seam_size set to 0 during seam painting) if seam_size > 0: old_image = self.pil_image or init_image old_mask = self.pil_mask or mask_image - result = self.seam_paint( - result, - seam_size, - seam_blur, - prompt, - sampler, - seam_steps, - cfg_scale, - ddim_eta, - conditioning, - seam_strength, - x_T, - step_callback) + result = self.seam_paint(result, seam_size, seam_blur, prompt, sampler, seam_steps, cfg_scale, ddim_eta, + conditioning, seam_strength, x_T, infill_method, step_callback) # Restore original settings self.get_make_image(prompt,sampler,steps,cfg_scale,ddim_eta, From 9ea10ece9502ec39f058edde538bca6005305116 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sat, 3 Dec 2022 10:10:37 +0100 Subject: [PATCH 042/199] monkey-patch diffusers.attention and use Invoke lowvram code --- ldm/invoke/generator/diffusers_cross_attention_enhancements.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 ldm/invoke/generator/diffusers_cross_attention_enhancements.py diff --git a/ldm/invoke/generator/diffusers_cross_attention_enhancements.py b/ldm/invoke/generator/diffusers_cross_attention_enhancements.py new file mode 100644 index 00000000000..e69de29bb2d From f48706efeef3359bf0edb423173262c9d3ebc21f Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sun, 4 Dec 2022 15:15:39 +0100 Subject: [PATCH 043/199] add always_use_cpu arg to bypass MPS --- ldm/invoke/CLI.py | 45 ++++++++++--------- ldm/invoke/args.py | 16 ++++--- ldm/invoke/devices.py | 3 ++ .../diffusers_cross_attention_enhancements.py | 0 ldm/invoke/globals.py | 4 ++ 5 files changed, 41 insertions(+), 27 deletions(-) delete mode 100644 ldm/invoke/generator/diffusers_cross_attention_enhancements.py diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index f05ccbb1ca1..15468891aad 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -27,7 +27,7 @@ def main(): """Initialize command-line parsers and the diffusion model""" global infile print('* Initializing, be patient...') - + opt = Args() args = opt.parse_args() if not args: @@ -47,7 +47,8 @@ def main(): # alert - setting globals here Globals.root = os.path.expanduser(args.root_dir or os.environ.get('INVOKEAI_ROOT') or os.path.abspath('.')) Globals.try_patchmatch = args.patchmatch - + Globals.always_use_cpu = args.always_use_cpu + print(f'>> InvokeAI runtime directory is "{Globals.root}"') # loading here to avoid long delays on startup @@ -339,8 +340,8 @@ def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None): filename, tool, formatted_dream_prompt, - ) - + ) + if (not postprocessed) or opt.save_original: # only append to results if we didn't overwrite an earlier output results.append([path, formatted_dream_prompt]) @@ -430,7 +431,7 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: add_embedding_terms(gen, completer) completer.add_history(command) operation = None - + elif command.startswith('!models'): gen.model_cache.print_models() completer.add_history(command) @@ -531,7 +532,7 @@ def add_weights_to_config(model_path:str, gen, opt, completer): completer.complete_extensions(('.yaml','.yml')) completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml' - + done = False while not done: new_config['config'] = input('Configuration file for this model: ') @@ -562,7 +563,7 @@ def add_weights_to_config(model_path:str, gen, opt, completer): print('** Please enter a valid integer between 64 and 2048') make_default = input('Make this the default model? [n] ') in ('y','Y') - + if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default): completer.add_model(model_name) @@ -575,14 +576,14 @@ def del_config(model_name:str, gen, opt, completer): gen.model_cache.commit(opt.conf) print(f'** {model_name} deleted') completer.del_model(model_name) - + def edit_config(model_name:str, gen, opt, completer): config = gen.model_cache.config - + if model_name not in config: print(f'** Unknown model {model_name}') return - + print(f'\n>> Editing model {model_name} from configuration file {opt.conf}') conf = config[model_name] @@ -595,10 +596,10 @@ def edit_config(model_name:str, gen, opt, completer): make_default = input('Make this the default model? [n] ') in ('y','Y') completer.complete_extensions(None) write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default) - + def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False): current_model = gen.model_name - + op = 'modify' if clobber else 'import' print('\n>> New configuration:') if make_default: @@ -621,7 +622,7 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak gen.model_cache.set_default_model(model_name) gen.model_cache.commit(conf_path) - + do_switch = input(f'Keep model loaded? [y]') if len(do_switch)==0 or do_switch[0] in ('y','Y'): pass @@ -651,7 +652,7 @@ def do_postprocess (gen, opt, callback): opt.prompt = opt.new_prompt else: opt.prompt = None - + if os.path.dirname(file_path) == '': #basename given file_path = os.path.join(opt.outdir,file_path) @@ -716,7 +717,7 @@ def add_postprocessing_to_metadata(opt,original_file,new_file,tool,command): ) meta['image']['postprocessing'] = pp write_metadata(new_file,meta) - + def prepare_image_metadata( opt, prefix, @@ -794,21 +795,21 @@ def invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan): os.chdir( os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) ) - + invoke_ai_web_server = InvokeAIWebServer(generate=gen, gfpgan=gfpgan, codeformer=codeformer, esrgan=esrgan) try: invoke_ai_web_server.run() except KeyboardInterrupt: pass - + def add_embedding_terms(gen,completer): ''' Called after setting the model, updates the autocompleter with any terms loaded by the embedding manager. ''' completer.add_embedding_terms(gen.model.embedding_manager.list_terms()) - + def split_variations(variations_string) -> list: # shotgun parsing, woo parts = [] @@ -865,7 +866,7 @@ def callback(img, step): image = gen.sample_to_image(img) image.save(filename,'PNG') return callback - + def retrieve_dream_command(opt,command,completer): ''' Given a full or partial path to a previously-generated image file, @@ -873,7 +874,7 @@ def retrieve_dream_command(opt,command,completer): and pop it into the readline buffer (linux, Mac), or print out a comment for cut-and-paste (windows) - Given a wildcard path to a folder with image png files, + Given a wildcard path to a folder with image png files, will retrieve and format the dream command used to generate the images, and save them to a file commands.txt for further processing ''' @@ -909,7 +910,7 @@ def write_commands(opt, file_path:str, outfilepath:str): except ValueError: print(f'## "{basename}": unacceptable pattern') return - + commands = [] cmd = None for path in paths: @@ -938,7 +939,7 @@ def emergency_model_reconfigure(): print(' After reconfiguration is done, please relaunch invoke.py. ') print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') print('configure_invokeai is launching....\n') - + sys.argv = ['configure_invokeai','--interactive'] import configure_invokeai configure_invokeai.main() diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index f1b5e9029d8..66be322e2ee 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -337,7 +337,7 @@ def __getattribute__(self,name): if not hasattr(cmd_switches,name) and not hasattr(arg_switches,name): raise AttributeError - + value_arg,value_cmd = (None,None) try: value_cmd = getattr(cmd_switches,name) @@ -393,7 +393,7 @@ def _create_arg_parser(self): description= """ Generate images using Stable Diffusion. - Use --web to launch the web interface. + Use --web to launch the web interface. Use --from_file to load prompts from a file path or standard input ("-"). Otherwise you will be dropped into an interactive command prompt (type -h for help.) Other command-line arguments are defaults that can usually be overridden @@ -455,6 +455,12 @@ def _create_arg_parser(self): action='store_true', help='Force free gpu memory before final decoding', ) + model_group.add_argument( + "--always_use_cpu", + dest="always_use_cpu", + action="store_true", + help="Force use of CPU even if GPU is available" + ) model_group.add_argument( '--precision', dest='precision', @@ -1036,7 +1042,7 @@ def metadata_dumps(opt, Given an Args object, returns a dict containing the keys and structure of the proposed stable diffusion metadata standard https://github.com/lstein/stable-diffusion/discussions/392 - This is intended to be turned into JSON and stored in the + This is intended to be turned into JSON and stored in the "sd ''' @@ -1119,7 +1125,7 @@ def args_from_png(png_file_path) -> list[Args]: meta = ldm.invoke.pngwriter.retrieve_metadata(png_file_path) except AttributeError: return [legacy_metadata_load({},png_file_path)] - + try: return metadata_loads(meta) except: @@ -1218,4 +1224,4 @@ def legacy_metadata_load(meta,pathname) -> Args: opt.prompt = '' opt.seed = 0 return opt - + diff --git a/ldm/invoke/devices.py b/ldm/invoke/devices.py index 424ae5a6d30..0fc749c4a44 100644 --- a/ldm/invoke/devices.py +++ b/ldm/invoke/devices.py @@ -1,9 +1,12 @@ import torch from torch import autocast from contextlib import nullcontext +from ldm.invoke.globals import Globals def choose_torch_device() -> str: '''Convenience routine for guessing which GPU device to run model on''' + if Globals.always_use_cpu: + return "cpu" if torch.cuda.is_available(): return 'cuda' if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): diff --git a/ldm/invoke/generator/diffusers_cross_attention_enhancements.py b/ldm/invoke/generator/diffusers_cross_attention_enhancements.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index b29e9aa35d3..a875c866b77 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -8,6 +8,7 @@ - root - the root directory under which "models" and "outputs" can be found - initfile - path to the initialization file - try_patchmatch - option to globally disable loading of 'patchmatch' module + - always_use_cpu - force use of CPU even if GPU is available ''' import os @@ -24,3 +25,6 @@ # Awkward workaround to disable attempted loading of pypatchmatch # which is causing CI tests to error out. Globals.try_patchmatch = True + +# Use CPU even if GPU is available (main use case is for debugging MPS issues) +Globals.always_use_cpu = False From ff42027a002a2fad81b04d8538053ba1c7fcd3c0 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sun, 4 Dec 2022 16:07:54 +0100 Subject: [PATCH 044/199] add cross-attention control support to diffusers (fails on MPS) For unknown reasons MPS produces garbage output with .swap(). Use --always_use_cpu arg to invoke.py for now to test this code on MPS. --- ldm/invoke/generator/diffusers_pipeline.py | 8 + .../diffusion/cross_attention_control.py | 184 ++++++++++++++++-- ldm/modules/attention.py | 2 + 3 files changed, 182 insertions(+), 12 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 989d43546e6..174a57fd859 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -9,6 +9,14 @@ import einops import torch import torchvision.transforms as T + +from ldm.models.diffusion.cross_attention_control import InvokeAICrossAttention + +from diffusers.models import attention +# monkeypatch diffusers CrossAttention 🙈 +# this is to make prompt2prompt and (future) attention maps work +attention.CrossAttention = InvokeAICrossAttention + from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index ec7c3c215cc..ec4c3447166 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -1,8 +1,11 @@ import enum -import warnings -from typing import Optional +import math +from typing import Optional, Callable +import psutil import torch +import diffusers +from torch import nn # adapted from bloc97's CrossAttentionControl colab @@ -66,8 +69,12 @@ def __init__(self, arguments: Arguments, step_count: int): def register_cross_attention_modules(self, model): for name,module in get_attention_modules(model, CrossAttentionType.SELF): + if name in self.self_cross_attention_module_identifiers: + assert False, f"name {name} cannot appear more than once" self.self_cross_attention_module_identifiers.append(name) for name,module in get_attention_modules(model, CrossAttentionType.TOKENS): + if name in self.tokens_cross_attention_module_identifiers: + assert False, f"name {name} cannot appear more than once" self.tokens_cross_attention_module_identifiers.append(name) def request_save_attention_maps(self, cross_attention_type: CrossAttentionType): @@ -189,7 +196,7 @@ def setup_cross_attention_control(model, context: Context): # mask=1 means use base prompt attention, mask=0 means use edited prompt attention mask = torch.zeros(max_length) indices_target = torch.arange(max_length, dtype=torch.long) - indices = torch.zeros(max_length, dtype=torch.long) + indices = torch.arange(max_length, dtype=torch.long) for name, a0, a1, b0, b1 in context.arguments.edit_opcodes: if b0 < max_length: if name == "equal":# or (name == "replace" and a1 - a0 == b1 - b0): @@ -204,9 +211,22 @@ def setup_cross_attention_control(model, context: Context): def get_attention_modules(model, which: CrossAttentionType): + # cross_attention_class: type = ldm.modules.attention.CrossAttention + cross_attention_class: type = InvokeAICrossAttention which_attn = "attn1" if which is CrossAttentionType.SELF else "attn2" - return [(name,module) for name, module in model.named_modules() if - type(module).__name__ == "CrossAttention" and which_attn in name] + attention_module_tuples = [(name,module) for name, module in model.named_modules() if + isinstance(module, cross_attention_class) and which_attn in name] + cross_attention_modules_in_model_count = len(attention_module_tuples) + expected_count = 16 + if cross_attention_modules_in_model_count != expected_count: + # non-fatal error but .swap() won't work. + print(f"Error! CrossAttentionControl found an unexpected number of InvokeAICrossAttention modules in the model " + + f"(expected {expected_count}, found {cross_attention_modules_in_model_count}). Either monkey-patching failed " + + f"or some assumption has changed about the structure of the model itself. Please fix the monkey-patching, " + + f"and/or update the {expected_count} above to an appropriate number, and/or find and inform someone who knows " + + f"what it means. This error is non-fatal, but it is likely that .swap() and attention map display will not " + + f"work properly until it is fixed.") + return attention_module_tuples def inject_attention_function(unet, context: Context): @@ -246,8 +266,7 @@ def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim return attention_slice - cross_attention_modules = [(name, module) for (name, module) in unet.named_modules() - if type(module).__name__ == "CrossAttention"] + cross_attention_modules = get_attention_modules(unet, CrossAttentionType.TOKENS) + get_attention_modules(unet, CrossAttentionType.SELF) for identifier, module in cross_attention_modules: module.identifier = identifier try: @@ -257,22 +276,21 @@ def attention_slice_wrangler(module, suggested_attention_slice:torch.Tensor, dim ) except AttributeError as e: if is_attribute_error_about(e, 'set_attention_slice_wrangler'): - warnings.warn(f"TODO: implement for {type(module)}") # TODO + print(f"TODO: implement set_attention_slice_wrangler for {type(module)}") # TODO else: raise def remove_attention_function(unet): - cross_attention_modules = [module for (_, module) in unet.named_modules() - if type(module).__name__ == "CrossAttention"] - for module in cross_attention_modules: + cross_attention_modules = get_attention_modules(unet, CrossAttentionType.TOKENS) + get_attention_modules(unet, CrossAttentionType.SELF) + for identifier, module in cross_attention_modules: try: # clear wrangler callback module.set_attention_slice_wrangler(None) module.set_slicing_strategy_getter(None) except AttributeError as e: if is_attribute_error_about(e, 'set_attention_slice_wrangler'): - warnings.warn(f"TODO: implement for {type(module)}") # TODO + print(f"TODO: implement set_attention_slice_wrangler for {type(module)}") else: raise @@ -282,3 +300,145 @@ def is_attribute_error_about(error: AttributeError, attribute: str): return error.name == attribute else: # Python 3.9 return attribute in str(error) + + + +def get_mem_free_total(device): + #only on cuda + if not torch.cuda.is_available(): + return None + stats = torch.cuda.memory_stats(device) + mem_active = stats['active_bytes.all.current'] + mem_reserved = stats['reserved_bytes.all.current'] + mem_free_cuda, _ = torch.cuda.mem_get_info(device) + mem_free_torch = mem_reserved - mem_active + mem_free_total = mem_free_cuda + mem_free_torch + return mem_free_total + +class InvokeAICrossAttention(diffusers.models.attention.CrossAttention): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) + + self.attention_slice_wrangler = None + self.slicing_strategy_getter = None + + def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, torch.Tensor, int, int, int], torch.Tensor]]): + ''' + Set custom attention calculator to be called when attention is calculated + :param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size), + which returns either the suggested_attention_slice or an adjusted equivalent. + `module` is the current CrossAttention module for which the callback is being invoked. + `suggested_attention_slice` is the default-calculated attention slice + `dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing. + If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length. + + Pass None to use the default attention calculation. + :return: + ''' + self.attention_slice_wrangler = wrangler + + def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]): + self.slicing_strategy_getter = getter + + def _attention(self, query, key, value): + #default_result = super()._attention(query, key, value) + damian_result = self.get_attention_mem_efficient(query, key, value) + + hidden_states = self.reshape_batch_dim_to_heads(damian_result) + return hidden_states + + def einsum_lowest_level(self, query, key, value, dim, offset, slice_size): + # calculate attention scores + #attention_scores = torch.einsum('b i d, b j d -> b i j', q, k) * self.scale + if dim is not None: + print(f"sliced dim {dim}, offset {offset}, slice_size {slice_size}") + attention_scores = torch.baddbmm( + torch.empty(query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device), + query, + key.transpose(-1, -2), + beta=0, + alpha=self.scale, + ) + + # calculate attention slice by taking the best scores for each latent pixel + default_attention_slice = attention_scores.softmax(dim=-1, dtype=attention_scores.dtype) + attention_slice_wrangler = self.attention_slice_wrangler + if attention_slice_wrangler is not None: + attention_slice = attention_slice_wrangler(self, default_attention_slice, dim, offset, slice_size) + else: + attention_slice = default_attention_slice + + #return torch.einsum('b i j, b j d -> b i d', attention_slice, v) + hidden_states = torch.bmm(attention_slice, value) + return hidden_states + + + def einsum_op_slice_dim0(self, q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[0], slice_size): + end = i + slice_size + r[i:end] = self.einsum_lowest_level(q[i:end], k[i:end], v[i:end], dim=0, offset=i, slice_size=slice_size) + return r + + def einsum_op_slice_dim1(self, q, k, v, slice_size): + r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) + for i in range(0, q.shape[1], slice_size): + end = i + slice_size + r[:, i:end] = self.einsum_lowest_level(q[:, i:end], k, v, dim=1, offset=i, slice_size=slice_size) + return r + + def einsum_op_mps_v1(self, q, k, v): + if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096 + return self.einsum_lowest_level(q, k, v, None, None, None) + else: + slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1])) + return self.einsum_op_slice_dim1(q, k, v, slice_size) + + def einsum_op_mps_v2(self, q, k, v): + if self.mem_total_gb > 8 and q.shape[1] <= 4096: + return self.einsum_lowest_level(q, k, v, None, None, None) + else: + return self.einsum_op_slice_dim0(q, k, v, 1) + + def einsum_op_tensor_mem(self, q, k, v, max_tensor_mb): + size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20) + if size_mb <= max_tensor_mb: + return self.einsum_lowest_level(q, k, v, None, None, None) + div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length() + if div <= q.shape[0]: + return self.einsum_op_slice_dim0(q, k, v, q.shape[0] // div) + return self.einsum_op_slice_dim1(q, k, v, max(q.shape[1] // div, 1)) + + def einsum_op_cuda(self, q, k, v): + # check if we already have a slicing strategy (this should only happen during cross-attention controlled generation) + slicing_strategy_getter = self.slicing_strategy_getter + if slicing_strategy_getter is not None: + (dim, slice_size) = slicing_strategy_getter(self) + if dim is not None: + # print("using saved slicing strategy with dim", dim, "slice size", slice_size) + if dim == 0: + return self.einsum_op_slice_dim0(q, k, v, slice_size) + elif dim == 1: + return self.einsum_op_slice_dim1(q, k, v, slice_size) + + # fallback for when there is no saved strategy, or saved strategy does not slice + mem_free_total = get_mem_free_total(q.device) + # Divide factor of safety as there's copying and fragmentation + return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) + + + def get_attention_mem_efficient(self, q, k, v): + if q.device.type == 'cuda': + #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) + return self.einsum_op_cuda(q, k, v) + + if q.device.type == 'mps' or q.device.type == 'cpu': + if self.mem_total_gb >= 32: + return self.einsum_op_mps_v1(q, k, v) + return self.einsum_op_mps_v2(q, k, v) + + # Smaller slices are faster due to L2/L3/SLC caches. + # Tested on i7 with 8MB L3 cache. + return self.einsum_op_tensor_mem(q, k, v, 32) diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 94bb8a2916e..94922270a46 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -165,7 +165,9 @@ def get_mem_free_total(device): class CrossAttention(nn.Module): + def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.): + print(f"Warning! ldm.modules.attention.CrossAttention is no longer being maintained. Please use InvokeAICrossAttention instead.") super().__init__() inner_dim = dim_head * heads context_dim = default(context_dim, query_dim) From 875312080d01463171d12a0c7d0ae6206789edb6 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 4 Dec 2022 20:12:04 -0800 Subject: [PATCH 045/199] diffusers support for the inpainting model --- ldm/invoke/generator/diffusers_pipeline.py | 78 ++++++++++++++++------ ldm/invoke/generator/inpaint.py | 47 ++++--------- 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 174a57fd859..430e051ca6f 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -9,10 +9,10 @@ import einops import torch import torchvision.transforms as T +from diffusers.models import attention from ldm.models.diffusion.cross_attention_control import InvokeAICrossAttention -from diffusers.models import attention # monkeypatch diffusers CrossAttention 🙈 # this is to make prompt2prompt and (future) attention maps work attention.CrossAttention = InvokeAICrossAttention @@ -23,6 +23,7 @@ from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent @@ -49,6 +50,21 @@ class PipelineIntermediateState: ) +@dataclass +class AddsMaskLatents: + forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor] + mask: torch.FloatTensor + mask_latents: torch.FloatTensor + + def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: torch.FloatTensor) -> torch.Tensor: + batch_size = latents.size(0) + mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) + mask_latents = einops.repeat(self.mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) + model_input, _ = einops.pack([latents, mask, mask_latents], 'b * h w') + # model_input = torch.cat([latents, mask, mask_latents], dim=1) + return self.forward(model_input, t, text_embeddings) + + def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True, multiple_of=8) -> torch.FloatTensor: """ @@ -57,7 +73,7 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True :param multiple_of: resize the input so both dimensions are a multiple of this """ w, h = image.size - w, h = map(lambda x: x - x % 8, (w, h)) # resize to integer multiple of 8 + w, h = map(lambda x: x - x % multiple_of, (w, h)) # resize to integer multiple of 8 transformation = T.Compose([ T.Resize((h, w), T.InterpolationMode.LANCZOS), T.ToTensor(), @@ -68,6 +84,10 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True return tensor +def is_inpainting_model(unet: UNet2DConditionModel): + return unet.conv_in.in_channels == 9 + + class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" Pipeline for text-to-image generation using Stable Diffusion. @@ -314,7 +334,7 @@ def img2img_from_embeddings(self, latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # 6. Prepare latent variables - latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) + latents, _ = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) result = None for result in self.generate_from_embeddings( @@ -331,7 +351,7 @@ def img2img_from_embeddings(self, def inpaint_from_embeddings( self, init_image: torch.FloatTensor, - mask_image: torch.FloatTensor, + mask: torch.FloatTensor, strength: float, num_inference_steps: int, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, @@ -349,8 +369,10 @@ def inpaint_from_embeddings( if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) + init_image = init_image.to(device=device, dtype=latents_dtype) + if init_image.dim() == 3: - init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') + init_image = init_image.unsqueeze(0) img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) @@ -358,22 +380,38 @@ def inpaint_from_embeddings( # 6. Prepare latent variables latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) + latents, init_image_latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) - result = None - for result in self.generate_from_embeddings( - latents, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, - timesteps=timesteps, - run_id=run_id, **extra_step_kwargs): - if callback is not None and isinstance(result, PipelineIntermediateState): - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result + if is_inpainting_model(self.unet): + if mask.dim() == 3: + mask = mask.unsqueeze(0) + mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR)\ + .to(device=device, dtype=latents_dtype) + + self.invokeai_diffuser.model_forward_callback = \ + AddsMaskLatents(self._unet_forward, mask, init_image_latents) + else: + # FIXME: need to add guidance that applies mask + pass + result = None - def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_func) -> torch.FloatTensor: + try: + for result in self.generate_from_embeddings( + latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, + run_id=run_id, **extra_step_kwargs): + if callback is not None and isinstance(result, PipelineIntermediateState): + callback(result) + if result is None: + raise AssertionError("why was that an empty generator?") + return result + finally: + self.invokeai_diffuser.model_forward_callback = self._unet_forward + + + def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_func) -> (torch.FloatTensor, torch.FloatTensor): # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents # because we have our own noise function init_image = init_image.to(device=device, dtype=dtype) @@ -383,8 +421,8 @@ def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_ init_latents = 0.18215 * init_latents noise = noise_func(init_latents) - - return self.scheduler.add_noise(init_latents, noise, timestep) + noised_latents = self.scheduler.add_noise(init_latents, noise, timestep) + return noised_latents, init_latents def check_for_safety(self, output, dtype): with torch.inference_mode(): diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 25c776de555..976121d720f 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -10,9 +10,7 @@ import numpy as np import torch from PIL import Image, ImageFilter, ImageOps, ImageChops -from einops import repeat -from ldm.invoke.generator.base import downsampling from ldm.invoke.generator.diffusers_pipeline import image_resized_to_grid_as_tensor, StableDiffusionGeneratorPipeline from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.globals import Globals @@ -154,7 +152,7 @@ def seam_paint(self, im: Image.Image, seam_size: int, seam_blur: int, prompt, sa ddim_eta, conditioning, init_image = im.copy().convert('RGBA'), - mask_image = mask.convert('RGB'), # Code currently requires an RGB mask + mask_image = mask, strength = strength, mask_blur_radius = 0, seam_size = 0, @@ -228,7 +226,11 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.pil_mask = mask_image.copy() debug_image(mask_image, "mask_image BEFORE multiply with pil_image", debug_status=self.enable_image_debugging) - mask_image = ImageChops.multiply(mask_image, self.pil_image.split()[-1].convert('RGB')) + init_alpha = self.pil_image.getchannel("A") + if mask_image.mode != "L": + # FIXME: why do we get passed an RGB image here? We can only use single-channel. + mask_image = mask_image.convert("L") + mask_image = ImageChops.multiply(mask_image, init_alpha) self.pil_mask = mask_image # Resize if requested for inpainting @@ -236,57 +238,32 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, mask_image = mask_image.resize((inpaint_width, inpaint_height)) debug_image(mask_image, "mask_image AFTER multiply with pil_image", debug_status=self.enable_image_debugging) - mask_image = mask_image.resize( - ( - mask_image.width // downsampling, - mask_image.height // downsampling - ), - resample=Image.Resampling.NEAREST - ) - mask_image = image_resized_to_grid_as_tensor(mask_image, normalize=False) + mask: torch.FloatTensor = image_resized_to_grid_as_tensor(mask_image, normalize=False) + else: + mask: torch.FloatTensor = mask_image self.mask_blur_radius = mask_blur_radius - # klms samplers not supported yet, so ignore previous sampler - # if isinstance(sampler,KSampler): - # print( - # ">> Using recommended DDIM sampler for inpainting." - # ) - # sampler = DDIMSampler(self.model, device=self.model.device) - - mask_image = mask_image[0][0].unsqueeze(0).repeat(4,1,1).unsqueeze(0) - mask_image = repeat(mask_image, '1 ... -> b ...', b=1) - - t_enc = int(strength * steps) # todo: support cross-attention control uc, c, _ = conditioning - print(f">> target t_enc is {t_enc} steps") - # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler def make_image(x_T): # FIXME: some of this z_enc and inpaint_replace stuff was probably important - # encode (scaled latent) - # z_enc = sampler.stochastic_encode( - # self.init_latent, - # torch.tensor([t_enc]).to(self.model.device), - # noise=x_T - # ) - # # # to replace masked area with latent noise, weighted by inpaint_replace strength # if inpaint_replace > 0.0: # print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}') # l_noise = self.get_noise(kwargs['width'],kwargs['height']) - # inverted_mask = 1.0-mask_image # there will be 1s where the mask is + # inverted_mask = 1.0-mask # there will be 1s where the mask is # masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise - # z_enc = z_enc * mask_image + masked_region + # z_enc = z_enc * mask + masked_region pipeline_output = pipeline.inpaint_from_embeddings( init_image=init_image, - mask_image=mask_image, + mask=1 - mask, # expects white means "paint here." strength=strength, num_inference_steps=steps, text_embeddings=c, From b2664e807ef616896f0785b8c88cd4f4352c501b Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 4 Dec 2022 20:12:47 -0800 Subject: [PATCH 046/199] fix debug_image to not crash with non-RGB images. --- ldm/util.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/ldm/util.py b/ldm/util.py index ae28edb96ad..b653c8e7db4 100644 --- a/ldm/util.py +++ b/ldm/util.py @@ -1,17 +1,13 @@ import importlib - -import torch -import numpy as np import math -from collections import abc -from einops import rearrange -from functools import partial - import multiprocessing as mp -from threading import Thread +from collections import abc +from inspect import isfunction from queue import Queue +from threading import Thread -from inspect import isfunction +import numpy as np +import torch from PIL import Image, ImageDraw, ImageFont @@ -221,7 +217,7 @@ def rand_perlin_2d(shape, res, device, fade = lambda t: 6*t**5 - 15*t**4 + 10*t* grid = torch.stack(torch.meshgrid(torch.arange(0, res[0], delta[0]), torch.arange(0, res[1], delta[1]), indexing='ij'), dim = -1).to(device) % 1 rand_val = torch.rand(res[0]+1, res[1]+1) - + angles = 2*math.pi*rand_val gradients = torch.stack((torch.cos(angles), torch.sin(angles)), dim = -1).to(device) @@ -249,8 +245,8 @@ def ask_user(question: str, answers: list): def debug_image(debug_image, debug_text, debug_show=True, debug_result=False, debug_status=False ): if not debug_status: return - - image_copy = debug_image.copy() + + image_copy = debug_image.copy().convert("RGBA") ImageDraw.Draw(image_copy).text( (5, 5), debug_text, @@ -261,4 +257,4 @@ def debug_image(debug_image, debug_text, debug_show=True, debug_result=False, de image_copy.show() if debug_result: - return image_copy \ No newline at end of file + return image_copy From f3570d8344f84f8f1567c3ff63fc33a46dcfee59 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sun, 4 Dec 2022 23:36:12 -0800 Subject: [PATCH 047/199] inpainting for the normal model [WIP] This seems to be performing well until the LAST STEP, at which point it dissolves to confetti. --- ldm/invoke/generator/diffusers_pipeline.py | 49 ++++++++++++++++++---- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 430e051ca6f..67a2f2fba6d 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -22,6 +22,7 @@ from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker +from diffusers.schedulers.scheduling_utils import SchedulerMixin from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -61,10 +62,32 @@ def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) mask_latents = einops.repeat(self.mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) model_input, _ = einops.pack([latents, mask, mask_latents], 'b * h w') - # model_input = torch.cat([latents, mask, mask_latents], dim=1) return self.forward(model_input, t, text_embeddings) +@dataclass +class AddsMaskGuidance: + forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor] + mask: torch.FloatTensor + mask_latents: torch.FloatTensor + _scheduler: SchedulerMixin + _noise_func: Callable + _debug: Optional[Callable] = None + + def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: torch.FloatTensor) -> torch.Tensor: + batch_size = latents.size(0) + mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) + noise = self._noise_func(self.mask_latents) + mask_latents = self._scheduler.add_noise(self.mask_latents, noise, t[0]) # .to(dtype=mask_latents.dtype) + mask_latents = einops.repeat(mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) + # if self._debug: + # self._debug(latents, f"t={t[0]} latents") + masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)) + if self._debug: + self._debug(masked_input, f"t={t[0]} lerped") + return self.forward(masked_input, t, text_embeddings) + + def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True, multiple_of=8) -> torch.FloatTensor: """ @@ -382,17 +405,18 @@ def inpaint_from_embeddings( latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) latents, init_image_latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) - if is_inpainting_model(self.unet): - if mask.dim() == 3: - mask = mask.unsqueeze(0) - mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR)\ - .to(device=device, dtype=latents_dtype) + if mask.dim() == 3: + mask = mask.unsqueeze(0) + mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR) \ + .to(device=device, dtype=latents_dtype) + if is_inpainting_model(self.unet): self.invokeai_diffuser.model_forward_callback = \ AddsMaskLatents(self._unet_forward, mask, init_image_latents) else: - # FIXME: need to add guidance that applies mask - pass + self.invokeai_diffuser.model_forward_callback = \ + AddsMaskGuidance(self._unet_forward, mask, init_image_latents, + self.scheduler, noise_func) # self.debug_latents) result = None @@ -417,7 +441,7 @@ def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_ init_image = init_image.to(device=device, dtype=dtype) with torch.inference_mode(): init_latent_dist = self.vae.encode(init_image).latent_dist - init_latents = init_latent_dist.sample() # FIXME: uses torch.randn. make reproducible! + init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible! init_latents = 0.18215 * init_latents noise = noise_func(init_latents) @@ -456,3 +480,10 @@ def _tokenize(self, prompt: Union[str, List[str]]): def channels(self) -> int: """Compatible with DiffusionWrapper""" return self.unet.in_channels + + def debug_latents(self, latents, msg): + with torch.inference_mode(): + from ldm.util import debug_image + decoded = self.numpy_to_pil(self.decode_latents(latents)) + for i, img in enumerate(decoded): + debug_image(img, f"latents {msg} {i+1}/{len(decoded)}", debug_status=True) From c6f31e5f3603a552d0d29a845ed2dec32e57aee6 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sun, 4 Dec 2022 11:41:03 +0100 Subject: [PATCH 048/199] fix off-by-one bug in cross-attention-control (#1774) prompt token sequences begin with a "beginning-of-sequence" marker and end with a repeated "end-of-sequence" marker - to make a default prompt length of + 75 prompt tokens + . the .swap() code was failing to take the column for at index 0 into account. the changes here do that, and also add extra handling for a single (which may be redundant but which is included for completeness). based on my understanding and some assumptions about how this all works, the reason .swap() nevertheless seemed to do the right thing, to some extent, is because over multiple steps the conditioning process in Stable Diffusion operates as a feedback loop. a change to token n-1 has flow-on effects to how the [1x4x64x64] latent tensor is modified by all the tokens after it, - and as the next step is processed, all the tokens before it as well. intuitively, a token's conditioning effects "echo" throughout the whole length of the prompt. so even though the token at n-1 was being edited when what the user actually wanted was to edit the token at n, it nevertheless still had some non-negligible effect, in roughly the right direction, often enough that it seemed like it was working properly. --- ldm/invoke/conditioning.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/conditioning.py b/ldm/invoke/conditioning.py index 54092578a10..328167d7835 100644 --- a/ldm/invoke/conditioning.py +++ b/ldm/invoke/conditioning.py @@ -77,8 +77,13 @@ def get_uc_and_c_and_ec(prompt_string_uncleaned, model, log_tokens=False, skip_n # for name, a0, a1, b0, b1 in edit_opcodes: only name == 'equal' is currently parsed original_token_count = 0 edited_token_count = 0 - edit_opcodes = [] edit_options = [] + edit_opcodes = [] + # beginning of sequence + edit_opcodes.append(('equal', original_token_count, original_token_count+1, edited_token_count, edited_token_count+1)) + edit_options.append(None) + original_token_count += 1 + edited_token_count += 1 for fragment in flattened_prompt.children: if type(fragment) is CrossAttentionControlSubstitute: original_prompt.append(fragment.original) @@ -105,6 +110,12 @@ def get_uc_and_c_and_ec(prompt_string_uncleaned, model, log_tokens=False, skip_n edit_options.append(None) original_token_count += count edited_token_count += count + # end of sequence + edit_opcodes.append(('equal', original_token_count, original_token_count+1, edited_token_count, edited_token_count+1)) + edit_options.append(None) + original_token_count += 1 + edited_token_count += 1 + original_embeddings, original_tokens = build_embeddings_and_tokens_for_flattened_prompt(model, original_prompt, log_tokens=log_tokens, From 69d42762de89911e4319e94fa7d0d536f38fab9d Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Mon, 5 Dec 2022 20:00:18 +0100 Subject: [PATCH 049/199] refactor common CrossAttention stuff into a mixin so that the old ldm code can still work if necessary --- ldm/invoke/generator/diffusers_pipeline.py | 4 +- .../diffusion/cross_attention_control.py | 45 ++++--- ldm/modules/attention.py | 121 +----------------- 3 files changed, 37 insertions(+), 133 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 67a2f2fba6d..b0b4ac2a675 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -11,11 +11,11 @@ import torchvision.transforms as T from diffusers.models import attention -from ldm.models.diffusion.cross_attention_control import InvokeAICrossAttention +from ldm.models.diffusion.cross_attention_control import InvokeAIDiffusersCrossAttention # monkeypatch diffusers CrossAttention 🙈 # this is to make prompt2prompt and (future) attention maps work -attention.CrossAttention = InvokeAICrossAttention +attention.CrossAttention = InvokeAIDiffusersCrossAttention from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index ec4c3447166..06867f445d7 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -212,7 +212,7 @@ def setup_cross_attention_control(model, context: Context): def get_attention_modules(model, which: CrossAttentionType): # cross_attention_class: type = ldm.modules.attention.CrossAttention - cross_attention_class: type = InvokeAICrossAttention + cross_attention_class: type = InvokeAIDiffusersCrossAttention which_attn = "attn1" if which is CrossAttentionType.SELF else "attn2" attention_module_tuples = [(name,module) for name, module in model.named_modules() if isinstance(module, cross_attention_class) and which_attn in name] @@ -315,12 +315,16 @@ def get_mem_free_total(device): mem_free_total = mem_free_cuda + mem_free_torch return mem_free_total -class InvokeAICrossAttention(diffusers.models.attention.CrossAttention): - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) +class InvokeAICrossAttentionMixin: + """ + Enable InvokeAI-flavoured CrossAttention calculation, which does aggressive low-memory slicing and calls + through both to an attention_slice_wrangler and a slicing_strategy_getter for custom attention map wrangling + and dymamic slicing strategy selection. + """ + def __init__(self): + self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) self.attention_slice_wrangler = None self.slicing_strategy_getter = None @@ -342,16 +346,9 @@ def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, t def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]): self.slicing_strategy_getter = getter - def _attention(self, query, key, value): - #default_result = super()._attention(query, key, value) - damian_result = self.get_attention_mem_efficient(query, key, value) - - hidden_states = self.reshape_batch_dim_to_heads(damian_result) - return hidden_states - def einsum_lowest_level(self, query, key, value, dim, offset, slice_size): # calculate attention scores - #attention_scores = torch.einsum('b i d, b j d -> b i j', q, k) * self.scale + #attention_scores = torch.einsum('b i d, b j d -> b i j', q, k) if dim is not None: print(f"sliced dim {dim}, offset {offset}, slice_size {slice_size}") attention_scores = torch.baddbmm( @@ -370,11 +367,9 @@ def einsum_lowest_level(self, query, key, value, dim, offset, slice_size): else: attention_slice = default_attention_slice - #return torch.einsum('b i j, b j d -> b i d', attention_slice, v) hidden_states = torch.bmm(attention_slice, value) return hidden_states - def einsum_op_slice_dim0(self, q, k, v, slice_size): r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) for i in range(0, q.shape[0], slice_size): @@ -424,12 +419,12 @@ def einsum_op_cuda(self, q, k, v): return self.einsum_op_slice_dim1(q, k, v, slice_size) # fallback for when there is no saved strategy, or saved strategy does not slice - mem_free_total = get_mem_free_total(q.device) + mem_free_total = self.cached_mem_free_total or get_mem_free_total(q.device) # Divide factor of safety as there's copying and fragmentation return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) - def get_attention_mem_efficient(self, q, k, v): + def get_invokeai_attention_mem_efficient(self, q, k, v): if q.device.type == 'cuda': #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) return self.einsum_op_cuda(q, k, v) @@ -442,3 +437,19 @@ def get_attention_mem_efficient(self, q, k, v): # Smaller slices are faster due to L2/L3/SLC caches. # Tested on i7 with 8MB L3 cache. return self.einsum_op_tensor_mem(q, k, v, 32) + + +class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention, InvokeAICrossAttentionMixin): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + InvokeAICrossAttentionMixin.__init__(self) + + def _attention(self, query, key, value): + #default_result = super()._attention(query, key, value) + damian_result = self.get_invokeai_attention_mem_efficient(query, key, value) + + hidden_states = self.reshape_batch_dim_to_heads(damian_result) + return hidden_states + + diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 94922270a46..a926f2c3ade 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -7,6 +7,7 @@ from torch import nn, einsum from einops import rearrange, repeat +from ldm.models.diffusion.cross_attention_control import InvokeAICrossAttentionMixin from ldm.modules.diffusionmodules.util import checkpoint import psutil @@ -163,8 +164,7 @@ def get_mem_free_total(device): mem_free_total = mem_free_cuda + mem_free_torch return mem_free_total - -class CrossAttention(nn.Module): +class CrossAttention(nn.Module, InvokeAICrossAttentionMixin): def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.): print(f"Warning! ldm.modules.attention.CrossAttention is no longer being maintained. Please use InvokeAICrossAttention instead.") @@ -184,117 +184,6 @@ def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0. nn.Dropout(dropout) ) - self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) - - self.cached_mem_free_total = None - self.attention_slice_wrangler = None - self.slicing_strategy_getter = None - - def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, torch.Tensor, int, int, int], torch.Tensor]]): - ''' - Set custom attention calculator to be called when attention is calculated - :param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size), - which returns either the suggested_attention_slice or an adjusted equivalent. - `module` is the current CrossAttention module for which the callback is being invoked. - `suggested_attention_slice` is the default-calculated attention slice - `dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing. - If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length. - - Pass None to use the default attention calculation. - :return: - ''' - self.attention_slice_wrangler = wrangler - - def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]): - self.slicing_strategy_getter = getter - - def cache_free_memory_count(self, device): - self.cached_mem_free_total = get_mem_free_total(device) - print("free cuda memory: ", self.cached_mem_free_total) - - def clear_cached_free_memory_count(self): - self.cached_mem_free_total = None - - def einsum_lowest_level(self, q, k, v, dim, offset, slice_size): - # calculate attention scores - attention_scores = einsum('b i d, b j d -> b i j', q, k) - # calculate attention slice by taking the best scores for each latent pixel - default_attention_slice = attention_scores.softmax(dim=-1, dtype=attention_scores.dtype) - attention_slice_wrangler = self.attention_slice_wrangler - if attention_slice_wrangler is not None: - attention_slice = attention_slice_wrangler(self, default_attention_slice, dim, offset, slice_size) - else: - attention_slice = default_attention_slice - - return einsum('b i j, b j d -> b i d', attention_slice, v) - - def einsum_op_slice_dim0(self, q, k, v, slice_size): - r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) - for i in range(0, q.shape[0], slice_size): - end = i + slice_size - r[i:end] = self.einsum_lowest_level(q[i:end], k[i:end], v[i:end], dim=0, offset=i, slice_size=slice_size) - return r - - def einsum_op_slice_dim1(self, q, k, v, slice_size): - r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) - for i in range(0, q.shape[1], slice_size): - end = i + slice_size - r[:, i:end] = self.einsum_lowest_level(q[:, i:end], k, v, dim=1, offset=i, slice_size=slice_size) - return r - - def einsum_op_mps_v1(self, q, k, v): - if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096 - return self.einsum_lowest_level(q, k, v, None, None, None) - else: - slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1])) - return self.einsum_op_slice_dim1(q, k, v, slice_size) - - def einsum_op_mps_v2(self, q, k, v): - if self.mem_total_gb > 8 and q.shape[1] <= 4096: - return self.einsum_lowest_level(q, k, v, None, None, None) - else: - return self.einsum_op_slice_dim0(q, k, v, 1) - - def einsum_op_tensor_mem(self, q, k, v, max_tensor_mb): - size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20) - if size_mb <= max_tensor_mb: - return self.einsum_lowest_level(q, k, v, None, None, None) - div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length() - if div <= q.shape[0]: - return self.einsum_op_slice_dim0(q, k, v, q.shape[0] // div) - return self.einsum_op_slice_dim1(q, k, v, max(q.shape[1] // div, 1)) - - def einsum_op_cuda(self, q, k, v): - # check if we already have a slicing strategy (this should only happen during cross-attention controlled generation) - slicing_strategy_getter = self.slicing_strategy_getter - if slicing_strategy_getter is not None: - (dim, slice_size) = slicing_strategy_getter(self) - if dim is not None: - # print("using saved slicing strategy with dim", dim, "slice size", slice_size) - if dim == 0: - return self.einsum_op_slice_dim0(q, k, v, slice_size) - elif dim == 1: - return self.einsum_op_slice_dim1(q, k, v, slice_size) - - # fallback for when there is no saved strategy, or saved strategy does not slice - mem_free_total = self.cached_mem_free_total or get_mem_free_total(q.device) - # Divide factor of safety as there's copying and fragmentation - return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) - - - def get_attention_mem_efficient(self, q, k, v): - if q.device.type == 'cuda': - #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) - return self.einsum_op_cuda(q, k, v) - - if q.device.type == 'mps': - if self.mem_total_gb >= 32: - return self.einsum_op_mps_v1(q, k, v) - return self.einsum_op_mps_v2(q, k, v) - - # Smaller slices are faster due to L2/L3/SLC caches. - # Tested on i7 with 8MB L3 cache. - return self.einsum_op_tensor_mem(q, k, v, 32) def forward(self, x, context=None, mask=None): h = self.heads @@ -307,7 +196,11 @@ def forward(self, x, context=None, mask=None): q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h=h), (q, k, v)) - r = self.get_attention_mem_efficient(q, k, v) + # prevent scale being applied twice + cached_scale = self.scale + self.scale = 1 + r = self.get_invokeai_attention_mem_efficient(q, k, v) + self.scale = cached_scale hidden_states = rearrange(r, '(b h) n d -> b n (h d)', h=h) return self.to_out(hidden_states) From 5c7e6751e0aee1d546956e451dfb1ac188b71e24 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 5 Dec 2022 12:36:50 -0800 Subject: [PATCH 050/199] inpainting for the normal model. I think it works this time. --- ldm/invoke/generator/diffusers_pipeline.py | 95 +++++++++++++++++----- 1 file changed, 76 insertions(+), 19 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index b0b4ac2a675..95de688f578 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -22,8 +22,9 @@ from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion import StableDiffusionPipeline from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion_img2img import StableDiffusionImg2ImgPipeline from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker -from diffusers.schedulers.scheduling_utils import SchedulerMixin +from diffusers.schedulers.scheduling_utils import SchedulerMixin, SchedulerOutput from diffusers.schedulers import DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler +from diffusers.utils.outputs import BaseOutput from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer @@ -53,39 +54,76 @@ class PipelineIntermediateState: @dataclass class AddsMaskLatents: + """Add the channels required for inpainting model input. + + The inpainting model takes the normal latent channels as input, _plus_ a one-channel mask + and the latent encoding of the base image. + + This class assumes the same mask and base image should apply to all items in the batch. + """ forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor] mask: torch.FloatTensor - mask_latents: torch.FloatTensor + initial_image_latents: torch.FloatTensor def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: torch.FloatTensor) -> torch.Tensor: + model_input = self.add_mask_channels(latents) + return self.forward(model_input, t, text_embeddings) + + def add_mask_channels(self, latents): batch_size = latents.size(0) + # duplicate mask and latents for each batch mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) - mask_latents = einops.repeat(self.mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) - model_input, _ = einops.pack([latents, mask, mask_latents], 'b * h w') - return self.forward(model_input, t, text_embeddings) + image_latents = einops.repeat(self.initial_image_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) + # add mask and image as additional channels + model_input, _ = einops.pack([latents, mask, image_latents], 'b * h w') + return model_input + +def are_like_tensors(a: torch.Tensor, b: object) -> bool: + return ( + isinstance(b, torch.Tensor) + and (a.size() == b.size()) + ) @dataclass class AddsMaskGuidance: - forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor] mask: torch.FloatTensor mask_latents: torch.FloatTensor _scheduler: SchedulerMixin _noise_func: Callable _debug: Optional[Callable] = None - def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: torch.FloatTensor) -> torch.Tensor: + def __call__(self, step_output: BaseOutput | SchedulerOutput, t: torch.Tensor, conditioning) -> BaseOutput: + output_class = step_output.__class__ # We'll create a new one with masked data. + + # The problem with taking SchedulerOutput instead of the model output is that we're less certain what's in it. + # It's reasonable to assume the first thing is prev_sample, but then does it have other things + # like pred_original_sample? Should we apply the mask to them too? + # But what if there's just some other random field? + prev_sample = step_output[0] + # Mask anything that has the same shape as prev_sample, return others as-is. + return output_class( + {k: (self.apply_mask(v, self._t_for_field(k, t)) + if are_like_tensors(prev_sample, v) else v) + for k, v in step_output.items()} + ) + + def _t_for_field(self, field_name:str, t): + if field_name == "pred_original_sample": + return torch.zeros_like(t, dtype=t.dtype) # it represents t=0 + return t + + def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor: batch_size = latents.size(0) mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) noise = self._noise_func(self.mask_latents) - mask_latents = self._scheduler.add_noise(self.mask_latents, noise, t[0]) # .to(dtype=mask_latents.dtype) + mask_latents = self._scheduler.add_noise(self.mask_latents, noise, t) + # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already? mask_latents = einops.repeat(mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) - # if self._debug: - # self._debug(latents, f"t={t[0]} latents") masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)) if self._debug: - self._debug(masked_input, f"t={t[0]} lerped") - return self.forward(masked_input, t, text_embeddings) + self._debug(masked_input, f"t={t} lerped") + return masked_input def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True, multiple_of=8) -> torch.FloatTensor: @@ -263,10 +301,14 @@ def generate_from_embeddings( run_id: str = None, extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, timesteps = None, + additional_guidance: List[Callable] = None, **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) + if additional_guidance is None: + additional_guidance = [] + if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control: self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count=len(self.scheduler.timesteps)) @@ -289,7 +331,8 @@ def generate_from_embeddings( batched_t.fill_(t) step_output = self.step(batched_t, latents, guidance_scale, text_embeddings, unconditioned_embeddings, - i, **extra_step_kwargs) + i, additional_guidance=additional_guidance, + **extra_step_kwargs) latents = step_output.prev_sample predicted_original = getattr(step_output, 'pred_original_sample', None) yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, @@ -306,11 +349,14 @@ def generate_from_embeddings( @torch.inference_mode() def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - step_index:int | None = None, + step_index:int | None = None, additional_guidance: List[Callable] = None, **extra_step_kwargs): # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value timestep = t[0] + if additional_guidance is None: + additional_guidance = [] + # TODO: should this scaling happen here or inside self._unet_forward? # i.e. before or after passing it to InvokeAIDiffuserComponent latent_model_input = self.scheduler.scale_model_input(latents, timestep) @@ -323,7 +369,15 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, step_index=step_index) # compute the previous noisy sample x_t -> x_t-1 - return self.scheduler.step(noise_pred, timestep, latents, **extra_step_kwargs) + step_output = self.scheduler.step(noise_pred, timestep, latents, **extra_step_kwargs) + + # TODO: this additional_guidance extension point feels redundant with InvokeAIDiffusionComponent. + # But the way things are now, scheduler runs _after_ that, so there was + # no way to use it to apply an operation that happens after the last scheduler.step. + for guidance in additional_guidance: + step_output = guidance(step_output, timestep, (unconditioned_embeddings, text_embeddings)) + + return step_output def _unet_forward(self, latents, t, text_embeddings): # predict the noise residual @@ -401,6 +455,8 @@ def inpaint_from_embeddings( img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) + assert img2img_pipeline.scheduler is self.scheduler + # 6. Prepare latent variables latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) latents, init_image_latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) @@ -410,13 +466,14 @@ def inpaint_from_embeddings( mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR) \ .to(device=device, dtype=latents_dtype) + guidance: List[Callable] = [] + if is_inpainting_model(self.unet): + # TODO: we should probably pass this in so we don't have to try/finally around setting it. self.invokeai_diffuser.model_forward_callback = \ AddsMaskLatents(self._unet_forward, mask, init_image_latents) else: - self.invokeai_diffuser.model_forward_callback = \ - AddsMaskGuidance(self._unet_forward, mask, init_image_latents, - self.scheduler, noise_func) # self.debug_latents) + guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise_func)) result = None @@ -425,7 +482,7 @@ def inpaint_from_embeddings( latents, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, timesteps=timesteps, - run_id=run_id, **extra_step_kwargs): + run_id=run_id, additional_guidance=guidance, **extra_step_kwargs): if callback is not None and isinstance(result, PipelineIntermediateState): callback(result) if result is None: From bf6376417a734264dd8bc1095532cf6e273421ce Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 6 Dec 2022 15:52:49 -0800 Subject: [PATCH 051/199] diffusers: reset num_vectors_per_token sync with 44a00555718f1df173c60da0ed646cf700e29537 --- ldm/invoke/generator/diffusers_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 95de688f578..22fee100194 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -47,7 +47,7 @@ class PipelineIntermediateState: placeholder_strings=["*"], initializer_wods=["sculpture"], per_image_tokens=False, - num_vectors_per_token=8, + num_vectors_per_token=1, progressive_words=False ) From 04a5bc938e606c8f807dc449596a831836cb3c59 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 6 Dec 2022 19:16:28 -0800 Subject: [PATCH 052/199] diffusers: txt2img2img (hires_fix) with so much slicing and dicing of pipeline methods to stitch them together --- ldm/invoke/generator/diffusers_pipeline.py | 146 ++++++++++++++------ ldm/invoke/generator/txt2img.py | 8 -- ldm/invoke/generator/txt2img2img.py | 147 +++++++-------------- 3 files changed, 149 insertions(+), 152 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 22fee100194..35db1db3837 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -3,7 +3,7 @@ import secrets import warnings from dataclasses import dataclass -from typing import List, Optional, Union, Callable +from typing import List, Optional, Union, Callable, Type, TypeVar, Generic, Any, ParamSpec import PIL.Image import einops @@ -11,11 +11,11 @@ import torchvision.transforms as T from diffusers.models import attention -from ldm.models.diffusion.cross_attention_control import InvokeAIDiffusersCrossAttention +from ...models.diffusion import cross_attention_control # monkeypatch diffusers CrossAttention 🙈 # this is to make prompt2prompt and (future) attention maps work -attention.CrossAttention = InvokeAIDiffusersCrossAttention +attention.CrossAttention = cross_attention_control.InvokeAIDiffusersCrossAttention from diffusers.models import AutoencoderKL, UNet2DConditionModel from diffusers.pipelines.stable_diffusion import StableDiffusionPipelineOutput @@ -126,6 +126,10 @@ def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor: return masked_input +def trim_to_multiple_of(*args, multiple_of=8): + return tuple((x - x % multiple_of) for x in args) + + def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True, multiple_of=8) -> torch.FloatTensor: """ @@ -133,8 +137,7 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True :param normalize: scale the range to [-1, 1] instead of [0, 1] :param multiple_of: resize the input so both dimensions are a multiple of this """ - w, h = image.size - w, h = map(lambda x: x - x % multiple_of, (w, h)) # resize to integer multiple of 8 + w, h = trim_to_multiple_of(*image.size) transformation = T.Compose([ T.Resize((h, w), T.InterpolationMode.LANCZOS), T.ToTensor(), @@ -148,6 +151,26 @@ def image_resized_to_grid_as_tensor(image: PIL.Image.Image, normalize: bool=True def is_inpainting_model(unet: UNet2DConditionModel): return unet.conv_in.in_channels == 9 +CallbackType = TypeVar('CallbackType') +ReturnType = TypeVar('ReturnType') +ParamType = ParamSpec('ParamType') + +@dataclass(frozen=True) +class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]): + generator_method: Callable[ParamType, ReturnType] + callback_arg_type: Type[CallbackType] + + def __call__(self, *args: ParamType.args, + callback:Callable[[CallbackType], Any]=None, + **kwargs: ParamType.kwargs) -> ReturnType: + result = None + for result in self.generator_method(*args, **kwargs): + if callback is not None and isinstance(result, self.callback_arg_type): + callback(result) + if result is None: + raise AssertionError("why was that an empty generator?") + return result + class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" @@ -250,6 +273,21 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, raise AssertionError("why was that an empty generator?") return result + def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, callback: Callable[[PipelineIntermediateState], None]=None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo=None, + run_id=None, + **extra_step_kwargs) -> PipelineIntermediateState: + self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) + f = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) + return f(latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + run_id=run_id, + callback=callback, + **extra_step_kwargs) + def generate( self, prompt: Union[str, List[str]], @@ -303,19 +341,42 @@ def generate_from_embeddings( timesteps = None, additional_guidance: List[Callable] = None, **extra_step_kwargs): + latents = yield from self.generate_latents_from_embeddings(latents, text_embeddings, unconditioned_embeddings, + guidance_scale, run_id=run_id, extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, additional_guidance=additional_guidance, **extra_step_kwargs) + + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + torch.cuda.empty_cache() + + with torch.inference_mode(): + image = self.decode_latents(latents) + output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + yield self.check_for_safety(output, dtype=text_embeddings.dtype) + + def generate_latents_from_embeddings( + self, + latents: torch.Tensor, + text_embeddings: torch.Tensor, + unconditioned_embeddings: torch.Tensor, + guidance_scale: float, + *, + run_id: str = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + timesteps = None, + additional_guidance: List[Callable] = None, + **extra_step_kwargs + ): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) - if additional_guidance is None: additional_guidance = [] - if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control: self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count=len(self.scheduler.timesteps)) else: self.invokeai_diffuser.remove_cross_attention_control() - if timesteps is None: + # NOTE: Depends on scheduler being already initialized! timesteps = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler @@ -326,7 +387,7 @@ def generate_from_embeddings( batch_size = latents.shape[0] batched_t = torch.full((batch_size,), timesteps[0], dtype=timesteps.dtype, device=self.unet.device) - # NOTE: Depends on scheduler being already initialized! + for i, t in enumerate(self.progress_bar(timesteps)): batched_t.fill_(t) step_output = self.step(batched_t, latents, guidance_scale, @@ -337,14 +398,7 @@ def generate_from_embeddings( predicted_original = getattr(step_output, 'pred_original_sample', None) yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, predicted_original=predicted_original) - - # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 - torch.cuda.empty_cache() - - with torch.inference_mode(): - image = self.decode_latents(latents) - output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) - yield self.check_for_safety(output, dtype=text_embeddings.dtype) + return latents @torch.inference_mode() def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, @@ -396,34 +450,38 @@ def img2img_from_embeddings(self, **extra_step_kwargs) -> StableDiffusionPipelineOutput: device = self.unet.device latents_dtype = self.unet.dtype - batch_size = 1 - num_images_per_prompt = 1 - if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) if init_image.dim() == 3: init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') + # 6. Prepare latent variables + initial_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) + + result = self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, text_embeddings, + unconditioned_embeddings, guidance_scale, strength, + extra_conditioning_info, noise_func, run_id, callback, + **extra_step_kwargs) + return result + + def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, text_embeddings, + unconditioned_embeddings, guidance_scale, strength, extra_conditioning_info, + noise_func, run_id=None, callback=None, **extra_step_kwargs): + device = self.unet.device + batch_size = initial_latents.size(0) img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) - latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) + latent_timestep = timesteps[:1].repeat(batch_size) + latents = self.noise_latents_for_time(initial_latents, latent_timestep, noise_func=noise_func) - # 6. Prepare latent variables - latents, _ = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) - - result = None - for result in self.generate_from_embeddings( - latents, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, - timesteps=timesteps, - run_id=run_id, **extra_step_kwargs): - if callback is not None and isinstance(result, PipelineIntermediateState): - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result + f = GeneratorToCallbackinator(self.generate_from_embeddings, PipelineIntermediateState) + return f(latents, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, + callback=callback, + run_id=run_id, **extra_step_kwargs) def inpaint_from_embeddings( self, @@ -459,7 +517,10 @@ def inpaint_from_embeddings( # 6. Prepare latent variables latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) - latents, init_image_latents = self.prepare_latents_from_image(init_image, latent_timestep, latents_dtype, device, noise_func) + # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents + # because we have our own noise function + init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) + latents = self.noise_latents_for_time(init_image_latents, latent_timestep, noise_func=noise_func) if mask.dim() == 3: mask = mask.unsqueeze(0) @@ -491,19 +552,18 @@ def inpaint_from_embeddings( finally: self.invokeai_diffuser.model_forward_callback = self._unet_forward - - def prepare_latents_from_image(self, init_image, timestep, dtype, device, noise_func) -> (torch.FloatTensor, torch.FloatTensor): - # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents - # because we have our own noise function + def non_noised_latents_from_image(self, init_image, *, device, dtype): init_image = init_image.to(device=device, dtype=dtype) with torch.inference_mode(): init_latent_dist = self.vae.encode(init_image).latent_dist init_latents = init_latent_dist.sample().to(dtype=dtype) # FIXME: uses torch.randn. make reproducible! init_latents = 0.18215 * init_latents + return init_latents - noise = noise_func(init_latents) - noised_latents = self.scheduler.add_noise(init_latents, noise, timestep) - return noised_latents, init_latents + def noise_latents_for_time(self, latents, timestep, *, noise_func): + noise = noise_func(latents) + noised_latents = self.scheduler.add_noise(latents, noise, timestep) + return noised_latents def check_for_safety(self, output, dtype): with torch.inference_mode(): diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index f9af1ac3ed7..36f5219b28e 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -29,10 +29,6 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, pipeline.scheduler = sampler def make_image(x_T) -> PIL.Image.Image: - # FIXME: restore free_gpu_mem functionality - # if self.free_gpu_mem and self.model.model.device != self.model.device: - # self.model.model.to(self.model.device) - pipeline_output = pipeline.image_from_embeddings( latents=x_T, num_inference_steps=steps, @@ -45,10 +41,6 @@ def make_image(x_T) -> PIL.Image.Image: # TODO: threshold = threshold, ) - # FIXME: restore free_gpu_mem functionality - # if self.free_gpu_mem: - # self.model.model.to("cpu") - return pipeline.numpy_to_pil(pipeline_output.images)[0] return make_image diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 3da42ebb8af..35c6a39ca29 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -3,13 +3,12 @@ ''' import math +from typing import Callable, Optional import torch -from PIL import Image from ldm.invoke.generator.base import Generator -from ldm.invoke.generator.omnibus import Omnibus -from ldm.models.diffusion.ddim import DDIMSampler +from ldm.invoke.generator.diffusers_pipeline import trim_to_multiple_of, StableDiffusionGeneratorPipeline class Txt2Img2Img(Generator): @@ -17,9 +16,9 @@ def __init__(self, model, precision): super().__init__(model, precision) self.init_latent = None # for get_noise() - @torch.no_grad() - def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, - conditioning,width,height,strength,step_callback=None,**kwargs): + def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_eta, + conditioning, width:int, height:int, strength:float, + step_callback:Optional[Callable]=None, **kwargs): """ Returns a function returning an image derived from the prompt and the initial image Return value depends on the seed at the time you call it @@ -29,125 +28,72 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, scale_dim = min(width, height) scale = 512 / scale_dim - init_width = math.ceil(scale * width / 64) * 64 - init_height = math.ceil(scale * height / 64) * 64 + init_width, init_height = trim_to_multiple_of(scale * width, scale * height) - @torch.no_grad() - def make_image(x_T): + # noinspection PyTypeChecker + pipeline: StableDiffusionGeneratorPipeline = self.model + pipeline.scheduler = sampler - shape = [ - self.latent_channels, - init_height // self.downsampling_factor, - init_width // self.downsampling_factor, - ] + def make_image(x_T): - sampler.make_schedule( - ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + pipeline_output = pipeline.latents_from_embeddings( + latents=x_T, + num_inference_steps=steps, + text_embeddings=c, + unconditioned_embeddings=uc, + guidance_scale=cfg_scale, + callback=step_callback, + extra_conditioning_info=extra_conditioning_info, + # TODO: eta = ddim_eta, + # TODO: threshold = threshold, ) - #x = self.get_noise(init_width, init_height) - x = x_T - - if self.free_gpu_mem and self.model.model.device != self.model.device: - self.model.model.to(self.model.device) - - samples, _ = sampler.sample( - batch_size = 1, - S = steps, - x_T = x, - conditioning = c, - shape = shape, - verbose = False, - unconditional_guidance_scale = cfg_scale, - unconditional_conditioning = uc, - eta = ddim_eta, - img_callback = step_callback, - extra_conditioning_info = extra_conditioning_info - ) + first_pass_latent_output = pipeline_output.latents print( f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height} using DDIM sampling" ) # resizing - samples = torch.nn.functional.interpolate( - samples, + resized_latents = torch.nn.functional.interpolate( + first_pass_latent_output, size=(height // self.downsampling_factor, width // self.downsampling_factor), mode="bilinear" ) - t_enc = int(strength * steps) - ddim_sampler = DDIMSampler(self.model, device=self.model.device) - ddim_sampler.make_schedule( - ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False - ) - - z_enc = ddim_sampler.stochastic_encode( - samples, - torch.tensor([t_enc]).to(self.model.device), - noise=self.get_noise(width,height,False) - ) - - # decode it - samples = ddim_sampler.decode( - z_enc, - c, - t_enc, - img_callback = step_callback, - unconditional_guidance_scale=cfg_scale, - unconditional_conditioning=uc, + pipeline_output = pipeline.img2img_from_latents_and_embeddings( + resized_latents, + num_inference_steps=steps, + text_embeddings=c, + unconditioned_embeddings=uc, + guidance_scale=cfg_scale, strength=strength, extra_conditioning_info=extra_conditioning_info, - all_timesteps_count=steps - ) + noise_func=self.get_noise_like, + callback=step_callback) - if self.free_gpu_mem: - self.model.model.to("cpu") + return pipeline.numpy_to_pil(pipeline_output.images)[0] - return self.sample_to_image(samples) + + # FIXME: do we really need something entirely different for the inpainting model? # in the case of the inpainting model being loaded, the trick of # providing an interpolated latent doesn't work, so we transiently # create a 512x512 PIL image, upscale it, and run the inpainting # over it in img2img mode. Because the inpaing model is so conservative # it doesn't change the image (much) - def inpaint_make_image(x_T): - omnibus = Omnibus(self.model,self.precision) - result = omnibus.generate( - prompt, - sampler=sampler, - width=init_width, - height=init_height, - step_callback=step_callback, - steps = steps, - cfg_scale = cfg_scale, - ddim_eta = ddim_eta, - conditioning = conditioning, - **kwargs - ) - assert result is not None and len(result)>0,'** txt2img failed **' - image = result[0][0] - interpolated_image = image.resize((width,height),resample=Image.Resampling.LANCZOS) - print(kwargs.pop('init_image',None)) - result = omnibus.generate( - prompt, - sampler=sampler, - init_image=interpolated_image, - width=width, - height=height, - seed=result[0][1], - step_callback=step_callback, - steps = steps, - cfg_scale = cfg_scale, - ddim_eta = ddim_eta, - conditioning = conditioning, - **kwargs - ) - return result[0][0] - - if sampler.uses_inpainting_model(): - return inpaint_make_image + + return make_image + + def get_noise_like(self, like: torch.Tensor): + device = like.device + if device.type == 'mps': + x = torch.randn_like(like, device='cpu').to(device) else: - return make_image + x = torch.randn_like(like, device=device) + if self.perlin > 0.0: + shape = like.shape + x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2]) + return x # returns a tensor filled with random numbers from a normal distribution def get_noise(self,width,height,scale = True): @@ -175,4 +121,3 @@ def get_noise(self,width,height,scale = True): scaled_height // self.downsampling_factor, scaled_width // self.downsampling_factor], device=device) - From 821c7df2401deb3101654304d963642d22256f53 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 7 Dec 2022 18:20:56 -0800 Subject: [PATCH 053/199] refactor(diffusers): reduce some code duplication amongst the different tasks --- ldm/invoke/generator/diffusers_pipeline.py | 176 +++++++-------------- 1 file changed, 61 insertions(+), 115 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 35db1db3837..4dbd576c510 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -157,6 +157,8 @@ def is_inpainting_model(unet: UNet2DConditionModel): @dataclass(frozen=True) class GeneratorToCallbackinator(Generic[ParamType, ReturnType, CallbackType]): + """Convert a generator to a function with a callback and a return value.""" + generator_method: Callable[ParamType, ReturnType] callback_arg_type: Type[CallbackType] @@ -261,111 +263,48 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, :param run_id: :param extra_step_kwargs: """ - self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) - result = None - for result in self.generate_from_embeddings( - latents, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, - run_id=run_id, **extra_step_kwargs): - if callback is not None and isinstance(result, PipelineIntermediateState): - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result - - def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - guidance_scale: float, - *, callback: Callable[[PipelineIntermediateState], None]=None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo=None, - run_id=None, - **extra_step_kwargs) -> PipelineIntermediateState: - self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) - f = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) - return f(latents, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, - run_id=run_id, - callback=callback, - **extra_step_kwargs) - - def generate( - self, - prompt: Union[str, List[str]], - *, - opposing_prompt: Union[str, List[str]] = None, - height: Optional[int] = 512, - width: Optional[int] = 512, - num_inference_steps: Optional[int] = 50, - guidance_scale: Optional[float] = 7.5, - generator: Optional[torch.Generator] = None, - latents: Optional[torch.FloatTensor] = None, - run_id: str = None, - **extra_step_kwargs, - ): - if isinstance(prompt, str): - batch_size = 1 - else: - batch_size = len(prompt) - - if height % 8 != 0 or width % 8 != 0: - raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.") - - # here `guidance_scale` is defined analog to the guidance weight `w` of equation (2) - # of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1` - # corresponds to doing no classifier free guidance. - do_classifier_free_guidance = guidance_scale > 1.0 - - combined_embeddings = self._encode_prompt(prompt, device=self._execution_device, num_images_per_prompt=1, - do_classifier_free_guidance=do_classifier_free_guidance, - negative_prompt=opposing_prompt) - text_embeddings, unconditioned_embeddings = combined_embeddings.chunk(2) - self.scheduler.set_timesteps(num_inference_steps) - latents = self.prepare_latents(batch_size=batch_size, num_channels_latents=self.unet.in_channels, - height=height, width=width, - dtype=self.unet.dtype, device=self._execution_device, - generator=generator, - latents=latents) - - yield from self.generate_from_embeddings(latents, text_embeddings, unconditioned_embeddings, - guidance_scale, run_id=run_id, **extra_step_kwargs) - - def generate_from_embeddings( - self, - latents: torch.Tensor, - text_embeddings: torch.Tensor, - unconditioned_embeddings: torch.Tensor, - guidance_scale: float, - *, - run_id: str = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, - timesteps = None, - additional_guidance: List[Callable] = None, - **extra_step_kwargs): - latents = yield from self.generate_latents_from_embeddings(latents, text_embeddings, unconditioned_embeddings, - guidance_scale, run_id=run_id, extra_conditioning_info=extra_conditioning_info, - timesteps=timesteps, additional_guidance=additional_guidance, **extra_step_kwargs) - + result_latents = self.latents_from_embeddings( + latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + run_id=run_id, callback=callback, **extra_step_kwargs + ) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() with torch.inference_mode(): - image = self.decode_latents(latents) + image = self.decode_latents(result_latents) output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) - yield self.check_for_safety(output, dtype=text_embeddings.dtype) + return self.check_for_safety(output, dtype=text_embeddings.dtype) - def generate_latents_from_embeddings( - self, - latents: torch.Tensor, - text_embeddings: torch.Tensor, - unconditioned_embeddings: torch.Tensor, + def latents_from_embeddings( + self, latents: torch.Tensor, num_inference_steps: int, + text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, guidance_scale: float, *, - run_id: str = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, timesteps = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, additional_guidance: List[Callable] = None, + run_id=None, + callback: Callable[[PipelineIntermediateState], None]=None, **extra_step_kwargs - ): + ) -> torch.Tensor: + if timesteps is None: + self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) + timesteps = self.scheduler.timesteps + infer_latents_from_embeddings = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) + return infer_latents_from_embeddings( + latents, timesteps, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + additional_guidance=additional_guidance, + run_id=run_id, + callback=callback, + **extra_step_kwargs).latents + + def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, text_embeddings: torch.Tensor, + unconditioned_embeddings: torch.Tensor, guidance_scale: float, *, + run_id: str = None, + extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, + additional_guidance: List[Callable] = None, **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) if additional_guidance is None: @@ -375,9 +314,6 @@ def generate_latents_from_embeddings( step_count=len(self.scheduler.timesteps)) else: self.invokeai_diffuser.remove_cross_attention_control() - if timesteps is None: - # NOTE: Depends on scheduler being already initialized! - timesteps = self.scheduler.timesteps # scale the initial noise by the standard deviation required by the scheduler latents *= self.scheduler.init_noise_sigma @@ -448,8 +384,6 @@ def img2img_from_embeddings(self, run_id=None, noise_func=None, **extra_step_kwargs) -> StableDiffusionPipelineOutput: - device = self.unet.device - latents_dtype = self.unet.dtype if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) @@ -457,13 +391,14 @@ def img2img_from_embeddings(self, init_image = einops.rearrange(init_image, 'c h w -> 1 c h w') # 6. Prepare latent variables + device = self.unet.device + latents_dtype = self.unet.dtype initial_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) - result = self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, text_embeddings, + return self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, strength, extra_conditioning_info, noise_func, run_id, callback, **extra_step_kwargs) - return result def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, strength, extra_conditioning_info, @@ -476,13 +411,21 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste latent_timestep = timesteps[:1].repeat(batch_size) latents = self.noise_latents_for_time(initial_latents, latent_timestep, noise_func=noise_func) - f = GeneratorToCallbackinator(self.generate_from_embeddings, PipelineIntermediateState) - return f(latents, text_embeddings, unconditioned_embeddings, guidance_scale, + result_latents = self.latents_from_embeddings( + latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, timesteps=timesteps, callback=callback, run_id=run_id, **extra_step_kwargs) + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + torch.cuda.empty_cache() + + with torch.inference_mode(): + image = self.decode_latents(result_latents) + output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + return self.check_for_safety(output, dtype=text_embeddings.dtype) + def inpaint_from_embeddings( self, init_image: torch.FloatTensor, @@ -536,22 +479,25 @@ def inpaint_from_embeddings( else: guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise_func)) - result = None - try: - for result in self.generate_from_embeddings( - latents, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, - timesteps=timesteps, - run_id=run_id, additional_guidance=guidance, **extra_step_kwargs): - if callback is not None and isinstance(result, PipelineIntermediateState): - callback(result) - if result is None: - raise AssertionError("why was that an empty generator?") - return result + result_latents = self.latents_from_embeddings( + latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, + extra_conditioning_info=extra_conditioning_info, + timesteps=timesteps, + run_id=run_id, additional_guidance=guidance, + callback=callback, + **extra_step_kwargs) finally: self.invokeai_diffuser.model_forward_callback = self._unet_forward + # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 + torch.cuda.empty_cache() + + with torch.inference_mode(): + image = self.decode_latents(result_latents) + output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + return self.check_for_safety(output, dtype=text_embeddings.dtype) + def non_noised_latents_from_image(self, init_image, *, device, dtype): init_image = init_image.to(device=device, dtype=dtype) with torch.inference_mode(): From 9bcb3b1bf75620b31f45da94bfb38766bfa6902b Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 7 Dec 2022 18:26:08 -0800 Subject: [PATCH 054/199] fixup! refactor(diffusers): reduce some code duplication amongst the different tasks --- ldm/invoke/generator/txt2img2img.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 35c6a39ca29..29a7106246f 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -36,7 +36,7 @@ def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_e def make_image(x_T): - pipeline_output = pipeline.latents_from_embeddings( + first_pass_latent_output = pipeline.latents_from_embeddings( latents=x_T, num_inference_steps=steps, text_embeddings=c, @@ -48,8 +48,6 @@ def make_image(x_T): # TODO: threshold = threshold, ) - first_pass_latent_output = pipeline_output.latents - print( f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height} using DDIM sampling" ) From 30a8d4c2b38eb2de6b2d51400c430656d012d6d0 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 7 Dec 2022 19:00:23 -0800 Subject: [PATCH 055/199] diffusers: enable DPMSolver++ scheduler --- ldm/generate.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 375b72b362a..aefe58c8b74 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -19,6 +19,7 @@ from PIL import Image, ImageOps from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_ddim import DDIMScheduler +from diffusers.schedulers.scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler from diffusers.schedulers.scheduling_euler_discrete import EulerDiscreteScheduler from diffusers.schedulers.scheduling_ipndm import IPNDMScheduler @@ -1005,19 +1006,14 @@ def _set_sampler_legacy(self): def _set_scheduler(self): default = self.model.scheduler - higher_order_samplers = [ - 'k_dpm_2', - 'k_dpm_2_a', - 'k_heun', - 'plms', # Its first step is like Heun - ] scheduler_map = dict( ddim=DDIMScheduler, ipndm=IPNDMScheduler, k_euler=EulerDiscreteScheduler, k_euler_a=EulerAncestralDiscreteScheduler, k_lms=LMSDiscreteScheduler, - pndm=PNDMScheduler, + plms=PNDMScheduler, + k_dpmpp_2=DPMSolverMultistepScheduler, ) if self.sampler_name in scheduler_map: @@ -1027,11 +1023,6 @@ def _set_scheduler(self): self.model_cache.model_name_or_path(self.model_name), subfolder="scheduler" ) - elif self.sampler_name in higher_order_samplers: - msg = (f'>> Unsupported Sampler: {self.sampler_name} ' - f'— diffusers does not yet support higher-order samplers, ' - f'Defaulting to {default}') - self.sampler = default else: msg = (f'>> Unsupported Sampler: {self.sampler_name} ' f'Defaulting to {default}') From 9199d698f8200783c577bbbd83acf6d14fef029a Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 8 Dec 2022 13:02:47 -0800 Subject: [PATCH 056/199] diffusers: upgrade to diffusers 0.10, add Heun scheduler --- binary_installer/requirements.in | 2 +- ldm/generate.py | 8 +++++++- ldm/invoke/generator/diffusers_pipeline.py | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/binary_installer/requirements.in b/binary_installer/requirements.in index cada6ec51ea..95ef3f5185a 100644 --- a/binary_installer/requirements.in +++ b/binary_installer/requirements.in @@ -4,7 +4,7 @@ --trusted-host https://download.pytorch.org accelerate~=0.14 albumentations -diffusers[torch]~=0.9 +diffusers[torch]~=0.10 einops eventlet flask_cors diff --git a/ldm/generate.py b/ldm/generate.py index aefe58c8b74..82fe52f4aef 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -17,6 +17,7 @@ import torch import transformers from PIL import Image, ImageOps +from diffusers import HeunDiscreteScheduler from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_ddim import DDIMScheduler from diffusers.schedulers.scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler @@ -1008,12 +1009,17 @@ def _set_scheduler(self): scheduler_map = dict( ddim=DDIMScheduler, + dpmpp_2=DPMSolverMultistepScheduler, ipndm=IPNDMScheduler, + # DPMSolverMultistepScheduler is technically not `k_` anything, as it is neither + # the k-diffusers implementation nor included in EDM (Karras 2022), but we can + # provide an alias for compatibility. + k_dpmpp_2=DPMSolverMultistepScheduler, k_euler=EulerDiscreteScheduler, k_euler_a=EulerAncestralDiscreteScheduler, + k_heun=HeunDiscreteScheduler, k_lms=LMSDiscreteScheduler, plms=PNDMScheduler, - k_dpmpp_2=DPMSolverMultistepScheduler, ) if self.sampler_name in scheduler_map: diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 4dbd576c510..8f2b4901b87 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -407,7 +407,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste batch_size = initial_latents.size(0) img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) + timesteps, _ = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) latent_timestep = timesteps[:1].repeat(batch_size) latents = self.noise_latents_for_time(initial_latents, latent_timestep, noise_func=noise_func) @@ -454,7 +454,7 @@ def inpaint_from_embeddings( img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) - timesteps = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) + timesteps, _ = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) assert img2img_pipeline.scheduler is self.scheduler From c28f56daec5b2f8b48756b7be58ab4e658cfae5d Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 9 Dec 2022 10:23:39 -0800 Subject: [PATCH 057/199] diffusers(ModelCache): stopgap to make from_cpu compatible with diffusers --- ldm/invoke/model_cache.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 7b7b840b021..ee32ba12e5d 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -464,9 +464,12 @@ def _model_to_cpu(self,model): def _model_from_cpu(self,model): if self.device != 'cpu': model.to(self.device) - model.first_stage_model.to(self.device) - model.cond_stage_model.to(self.device) - model.cond_stage_model.device = self.device + try: + model.first_stage_model.to(self.device) + model.cond_stage_model.to(self.device) + model.cond_stage_model.device = self.device + except AttributeError as e: + warnings.warn(f"TODO: clean up legacy model-management: {e}") return model def _pop_oldest_model(self): From 8e2da9a97efee37058e85caac5be60a8b92bbc11 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 9 Dec 2022 10:37:55 -0800 Subject: [PATCH 058/199] CI: default to diffusers-1.5 now that runwayml token requirement is gone --- .github/workflows/test-invoke-conda.yml | 2 +- .github/workflows/test-invoke-pip.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index d53635d0cc7..383729ae249 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - diffusers-1.4 + - diffusers-1.5 environment-yaml: - environment-lin-amd.yml - environment-lin-cuda.yml diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 67907eaf6df..f00ff0328b6 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - diffusers-1.4 + - diffusers-1.5 requirements-file: - requirements-lin-cuda.txt - requirements-lin-amd.txt From 811dc23efb00c06df117b7f02a04afce0e4415bb Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 9 Dec 2022 10:45:16 -0800 Subject: [PATCH 059/199] diffusers: update to 0.10 (and transformers to 4.25) --- binary_installer/requirements.in | 2 +- environments-and-requirements/environment-lin-aarch64.yml | 2 +- environments-and-requirements/environment-lin-amd.yml | 4 ++-- environments-and-requirements/environment-lin-cuda.yml | 4 ++-- environments-and-requirements/environment-mac.yml | 4 ++-- environments-and-requirements/environment-win-cuda.yml | 4 ++-- environments-and-requirements/requirements-base.txt | 4 ++-- 7 files changed, 12 insertions(+), 12 deletions(-) diff --git a/binary_installer/requirements.in b/binary_installer/requirements.in index 95ef3f5185a..affaa813650 100644 --- a/binary_installer/requirements.in +++ b/binary_installer/requirements.in @@ -2,7 +2,7 @@ --extra-index-url https://download.pytorch.org/whl/torch_stable.html --extra-index-url https://download.pytorch.org/whl/cu116 --trusted-host https://download.pytorch.org -accelerate~=0.14 +accelerate~=0.15 albumentations diffusers[torch]~=0.10 einops diff --git a/environments-and-requirements/environment-lin-aarch64.yml b/environments-and-requirements/environment-lin-aarch64.yml index 5f71828453a..ae798d777c1 100644 --- a/environments-and-requirements/environment-lin-aarch64.yml +++ b/environments-and-requirements/environment-lin-aarch64.yml @@ -28,7 +28,7 @@ dependencies: - torch-fidelity=0.3.0 - torchmetrics=0.7.0 - torchvision - - transformers=4.21.3 + - transformers~=4.25 - pip: - dependency_injector==4.40.0 - getpass_asterisk diff --git a/environments-and-requirements/environment-lin-amd.yml b/environments-and-requirements/environment-lin-amd.yml index af251956a07..2eb4b1b1480 100644 --- a/environments-and-requirements/environment-lin-amd.yml +++ b/environments-and-requirements/environment-lin-amd.yml @@ -11,7 +11,7 @@ dependencies: - --extra-index-url https://download.pytorch.org/whl/rocm5.2/ - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers~=0.9 + - diffusers~=0.10 - einops==0.3.0 - eventlet - flask==2.1.3 @@ -39,7 +39,7 @@ dependencies: - torchaudio - torchmetrics==0.7.0 - torchvision - - transformers==4.21.3 + - transformers~=4.25 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 43dc46b5c63..0d51656cb0b 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -15,7 +15,7 @@ dependencies: - accelerate~=0.13 - albumentations==0.4.3 - dependency_injector==4.40.0 - - diffusers~=0.9 + - diffusers~=0.10 - einops==0.3.0 - eventlet - flask==2.1.3 @@ -39,7 +39,7 @@ dependencies: - test-tube>=0.7.5 - torch-fidelity==0.3.0 - torchmetrics==0.7.0 - - transformers==4.21.3 + - transformers=~4.25 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index cdf7c2ebf76..dbb6c6717c2 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -22,7 +22,7 @@ dependencies: - albumentations=1.2 - coloredlogs=15.0 - - diffusers~=0.9 + - diffusers~=0.10 - einops=0.3 - eventlet - grpcio=1.46 @@ -49,7 +49,7 @@ dependencies: - sympy=1.10 - send2trash=1.8 - tensorboard=2.10 - - transformers=4.23 + - transformers=~4.25 - pip: - getpass_asterisk - picklescan diff --git a/environments-and-requirements/environment-win-cuda.yml b/environments-and-requirements/environment-win-cuda.yml index 3409ff20bf8..8b733796cd3 100644 --- a/environments-and-requirements/environment-win-cuda.yml +++ b/environments-and-requirements/environment-win-cuda.yml @@ -15,7 +15,7 @@ dependencies: - albumentations==0.4.3 - basicsr==1.4.1 - dependency_injector==4.40.0 - - diffusers~=0.9 + - diffusers~=0.10 - einops==0.3.0 - eventlet - flask==2.1.3 @@ -39,7 +39,7 @@ dependencies: - test-tube>=0.7.5 - torch-fidelity==0.3.0 - torchmetrics==0.7.0 - - transformers==4.21.3 + - transformers~=4.25 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k_diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index 370eb2ded15..44848e5500d 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,7 +1,7 @@ # pip will resolve the version which matches torch albumentations dependency_injector==4.40.0 -diffusers[torch]~=0.9 +diffusers[torch]~=0.10 einops eventlet facexlib @@ -31,7 +31,7 @@ taming-transformers-rom1504 test-tube>=0.7.5 torch-fidelity torchmetrics -transformers~=4.24 +transformers~=4.25 picklescan # git+https://github.com/invoke-ai/GFPGAN@basicsr-1.4.1#egg=gfpgan ; platform_system == 'Windows' git+https://github.com/invoke-ai/GFPGAN@basicsr-1.4.2#egg=gfpgan ; platform_system != 'Windows' From 1a678360124c82327fae26034d0f6376dac00457 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 08:26:24 -0800 Subject: [PATCH 060/199] diffusers: use xformers when available diffusers no longer auto-enables this as of 0.10.2. --- ldm/invoke/generator/diffusers_pipeline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 8f2b4901b87..e1a4b120c79 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -10,6 +10,7 @@ import torch import torchvision.transforms as T from diffusers.models import attention +from diffusers.utils.import_utils import is_xformers_available from ...models.diffusion import cross_attention_control @@ -239,6 +240,9 @@ def __init__( self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) self.embedding_manager = EmbeddingManager(self.clip_embedder, **_default_personalization_config_params) + if is_xformers_available(): + self.enable_xformers_memory_efficient_attention() + def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, guidance_scale: float, From 50c48cffc7c0ebba90f544ab08400d83f60b99dd Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 08:27:46 -0800 Subject: [PATCH 061/199] diffusers: make masked img2img behave better with multi-step schedulers re-randomizing the noise each step was confusing them. --- ldm/invoke/generator/diffusers_pipeline.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index e1a4b120c79..e3bb47cec4a 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -91,7 +91,7 @@ class AddsMaskGuidance: mask: torch.FloatTensor mask_latents: torch.FloatTensor _scheduler: SchedulerMixin - _noise_func: Callable + noise: torch.Tensor _debug: Optional[Callable] = None def __call__(self, step_output: BaseOutput | SchedulerOutput, t: torch.Tensor, conditioning) -> BaseOutput: @@ -117,8 +117,9 @@ def _t_for_field(self, field_name:str, t): def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor: batch_size = latents.size(0) mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) - noise = self._noise_func(self.mask_latents) - mask_latents = self._scheduler.add_noise(self.mask_latents, noise, t) + # Noise shouldn't be re-randomized between steps here. The multistep schedulers + # get very confused about what is happening from step to step when we do that. + mask_latents = self._scheduler.add_noise(self.mask_latents, self.noise, t) # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already? mask_latents = einops.repeat(mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)) @@ -413,7 +414,9 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) timesteps, _ = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) latent_timestep = timesteps[:1].repeat(batch_size) - latents = self.noise_latents_for_time(initial_latents, latent_timestep, noise_func=noise_func) + noise = noise_func(initial_latents) + noised_latents = self.scheduler.add_noise(initial_latents, noise, latent_timestep) + latents = noised_latents result_latents = self.latents_from_embeddings( latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, @@ -467,7 +470,8 @@ def inpaint_from_embeddings( # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents # because we have our own noise function init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) - latents = self.noise_latents_for_time(init_image_latents, latent_timestep, noise_func=noise_func) + noise = noise_func(init_image_latents) + latents = self.scheduler.add_noise(init_image_latents, noise, latent_timestep) if mask.dim() == 3: mask = mask.unsqueeze(0) @@ -481,7 +485,7 @@ def inpaint_from_embeddings( self.invokeai_diffuser.model_forward_callback = \ AddsMaskLatents(self._unet_forward, mask, init_image_latents) else: - guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise_func)) + guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise)) try: result_latents = self.latents_from_embeddings( @@ -510,11 +514,6 @@ def non_noised_latents_from_image(self, init_image, *, device, dtype): init_latents = 0.18215 * init_latents return init_latents - def noise_latents_for_time(self, latents, timestep, *, noise_func): - noise = noise_func(latents) - noised_latents = self.scheduler.add_noise(latents, noise, timestep) - return noised_latents - def check_for_safety(self, output, dtype): with torch.inference_mode(): screened_images, has_nsfw_concept = self.run_safety_checker( From 66d32b79b736ec8686523ea7b9aacba924bf0673 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 08:29:12 -0800 Subject: [PATCH 062/199] diffusers: work more better with more models. fixed relative path problem with local models. fixed models on hub not always having a `fp16` branch. --- ldm/invoke/model_cache.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index ee32ba12e5d..f57dc6de198 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -21,6 +21,8 @@ import torch import transformers +from huggingface_hub import hf_hub_download +from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf from omegaconf.errors import ConfigAttributeError from picklescan.scanner import scan_file_path @@ -323,6 +325,8 @@ def _load_diffusers_model(self, mconfig): # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash elif 'path' in mconfig: name_or_path = Path(mconfig['path']) + if not name_or_path.is_absolute(): + name_or_path = Path(Globals.root, name_or_path).resolve() # FIXME: What should the model_hash be? A hash of the unet weights? Of all files of all # the submodels hashed together? The commit ID from the repo? model_hash = "FIXME TOO" @@ -335,7 +339,16 @@ def _load_diffusers_model(self, mconfig): if self.precision == 'float16': print(' | Using faster float16 precision') - pipeline_args.update(revision="fp16", torch_dtype=torch.float16) + + if not isinstance(name_or_path, Path): + try: + hf_hub_download(name_or_path, "model_index.json", revision="fp16") + except RevisionNotFoundError as e: + pass + else: + pipeline_args.update(revision="fp16") + + pipeline_args.update(torch_dtype=torch.float16) else: # TODO: more accurately, "using the model's default precision." # How do we find out what that is? @@ -363,7 +376,10 @@ def model_name_or_path(self, model_name:str) -> str | Path: if 'repo_name' in mconfig: return mconfig['repo_name'] elif 'path' in mconfig: - return Path(mconfig['path']) + path = Path(mconfig['path']) + if not path.is_absolute(): + path = Path(Globals.root, path).resolve() + return path else: raise ValueError("Model config must specify either repo_name or path.") From cd358c40ad89d93106f951e62b32d1d546c71526 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 19:48:15 -0800 Subject: [PATCH 063/199] diffusers: stopgap fix for attention_maps_callback crash after recent merge --- ldm/invoke/generator/txt2img.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index b03c4b99fc0..ef3d35dbca7 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -40,7 +40,9 @@ def make_image(x_T) -> PIL.Image.Image: extra_conditioning_info=extra_conditioning_info, # TODO: eta = ddim_eta, # TODO: threshold = threshold, - attention_maps_callback = attention_maps_callback, + # FIXME: Attention Maps Callback merged from main, but not hooked up + # in diffusers branch yet. - keturn + # attention_maps_callback = attention_maps_callback, ) return pipeline.numpy_to_pil(pipeline_output.images)[0] From a31ac57901a94e16acd112f1fe6b5f225e8bd050 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 21:12:42 -0800 Subject: [PATCH 064/199] fixup import merge conflicts correction for 061c5369a2247c6c92cd69606bcf54c4f1962a0b --- backend/invoke_ai_web_server.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py index 426ea0000be..2f3c547a9fc 100644 --- a/backend/invoke_ai_web_server.py +++ b/backend/invoke_ai_web_server.py @@ -23,10 +23,11 @@ from backend.modules.parameters import parameters_to_command from ldm.generate import Generate from ldm.invoke.args import Args, APP_ID, APP_VERSION, calculate_init_img_hash +from ldm.invoke.conditioning import get_tokens_for_prompt, get_prompt_structure from ldm.invoke.generator.diffusers_pipeline import PipelineIntermediateState from ldm.invoke.generator.inpaint import infill_methods from ldm.invoke.pngwriter import PngWriter, retrieve_metadata -from ldm.invoke.prompt_parser import split_weighted_subprompts +from ldm.invoke.prompt_parser import split_weighted_subprompts, Blend # Loading Arguments opt = Args() From 4fa26e82e83ddf40b0fa150f7410e3df9badca0d Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 21:14:10 -0800 Subject: [PATCH 065/199] test: add tests/inpainting inputs for masked img2img --- tests/inpainting/coyote-inpainting.prompt | 14 ++++++++++ tests/inpainting/coyote-input.webp | Bin 0 -> 36320 bytes tests/inpainting/coyote-mask.webp | Bin 0 -> 1576 bytes tests/inpainting/original.json | 30 ++++++++++++++++++++++ 4 files changed, 44 insertions(+) create mode 100644 tests/inpainting/coyote-inpainting.prompt create mode 100644 tests/inpainting/coyote-input.webp create mode 100644 tests/inpainting/coyote-mask.webp create mode 100644 tests/inpainting/original.json diff --git a/tests/inpainting/coyote-inpainting.prompt b/tests/inpainting/coyote-inpainting.prompt new file mode 100644 index 00000000000..7f979c55e7c --- /dev/null +++ b/tests/inpainting/coyote-inpainting.prompt @@ -0,0 +1,14 @@ +# 🌻 🌻 🌻 sunflowers 🌻 🌻 🌻 +a coyote, deep palette knife oil painting, sunflowers, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.2 +a coyote, deep palette knife oil painting, sunflowers, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.4 +a coyote, deep palette knife oil painting, sunflowers, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.6 +a coyote, deep palette knife oil painting, sunflowers, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.8 +a coyote, deep palette knife oil painting, sunflowers, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.99 + +# 🌹 🌹 🌹 roses 🌹 🌹 🌹 +a coyote, deep palette knife oil painting, red roses, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.2 +a coyote, deep palette knife oil painting, red roses, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.4 +a coyote, deep palette knife oil painting, red roses, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.6 +a coyote, deep palette knife oil painting, red roses, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.8 +a coyote, deep palette knife oil painting, red roses, plants, desert landscape, award winning -s 50 -S 1234554321 -W 512 -H 512 -C 7.5 -I tests/inpainting/coyote-input.webp -A k_lms -M tests/inpainting/coyote-mask.webp -f 0.99 + diff --git a/tests/inpainting/coyote-input.webp b/tests/inpainting/coyote-input.webp new file mode 100644 index 0000000000000000000000000000000000000000..488caceae5a38b5e76271a9b23b990e0d566e8be GIT binary patch literal 36320 zcmV(rK<>X%Nk&H4jQ{{wMM6+kP&gpWjQ{}fNdlb#DgXii0zO?Rkw}Q8`#1}MaT$zM zl<4mm=KXn}N%Cr$6HrEUBNZ7n9W9cup{G&2|I9xX;7nBtp47E}#m&43R83%-u4`Eb$1xTP z3%~!|8TrXoh&3YS#tuU9Fh<)*2U*>EVa{S8&=YLJEa@G^!UZV!`=^Sw_zsAw0E-CCfhVkgR((cNWzSpqIOR%yX zB_sAIZ(=0c={@egfu3)ZDaWGxHG2kxzmfB<-{upR-}Rxuy(l81!-N(j36&{db+Y#K z1hG1Vc$Clnv)*=Vgtn~=hEHDh8`5_U;loi;8C2fhUmm#S6{h=?rJ9dk>%6&;!%#pP zB~}ZYRRmNF91$9D4k{U{sK-kf*Mh%mp?RsXJY*^EHsbiDDQGe->uF$s+^k14m&o>q&$u99)oLnLNh9%YrQ^y+SPGkt@3M1p znR7t0%ib_R@yAsq;fkSi%yZ4F*5;rElL|)O#)BWxz-hT0-ZIj&MJ_9q2jh*|nfR%1 zqH)O%ompV$<3T5Rgn=K*)lUmpW|jFRLOL-Vnm(~T11AI0$VU-s#D(BY4|=CxY3R12 zQCJVpS1okG{!TK8dNXSzFqBj$yxr`us*Sp|)m&pcF>Rr0F6Ys+yF3c5$5^3Guk^8Ehb=*%muIh}7S{UMP)3w2Cp~6V5wZ`!x@sl-Y{liXN0LVTG~OXQHx|wtH8Tc{l8EF9dVwThslP>!K_*R6&{|GmQuj+lR8B4- zX0?Rx1Bm8f7L2lUSJ-KpQLwgNxa^*YF8-qzU!EMcLF$`j{}6On@+h%cxN6R&|BXTj zisdOSL>Tye3JC9f)3euuI1;ZCMKvOx9*ZRy^(_J^sZxBJL_hW*bJVqYo0smyhu1k) z3fUn4Pnt$pkanRAy`sjU3fMO*yNilG3YaAeO;>Q6Fhm8FP}1>1`um5KP-Hu@Qxu&} zft)&<*4;d3I+Sa65-Qjxh)mu>3-K459A1`bA8rLzkit@Z4%`VE9e)A(zma#5+?lLM zK_5_Uey_6%IebeJdyq~`sJW)hsw|HQ>->W#iz1}A8mu#jT0Ct(1|l?k>BIlY0l6O7 z@^hcA#vR0XutOAn++02nZ*SKWVZHjV>FULl-;l=Fkjvr;nMmH>`gmQ!?2r*KV}2p4Fq8r)AmR{-t_wP9lyHi81Xitdj`wdU>(9^)>!uAy_(#gRBBH8X#t{+X}F5`Tm8HP4a zE2augw>@(AOhz{h?*COo!KwDw21&&bAhRG27$*)W?($gl<&>JAcV<>!qJ~2nw?JJN z+gWj<5q(Xm)C1{f0keXoa_=b-+f_ z;JrxZjRJdu%c=$9mn|{5mqxn60WV}v(8-Xj!!COYEFE&P`f$JJ81R&RAzBZ}p_tjT z;TNA$mv&#IX>|7G5|hJROiKAtJBBb;PwHF3H&aPSIL#R$;kYpU=JsImgz!t8X3mmA zeBt}auSj8f9cvZjDItTKK5FB%{Aqe%X%FwqfYaYPd5%*$Q@ zd%kQex{=XhJn{&hjT7*34R@@MShizLIugFEiXMc>KEc9)tG(|W7Doj$3@=6toOkeo&uDr4R&Ap&zacAa~(eBF)#=4QLv<=kp-NrqU#>!+TI=g56$$#x;Lwjg=6sF^;4Tv^Z+`*VHSc&TIr z9arQ17U~k(O;7hKN@pI%$uRttxrQC^-=o_80b|`AbD`hSgtbTUv4DMx)g`PF*gWGS?tp z2N(I@>XdZ&n?^b%{@YtZ(^Ez|@YsC7;X$|N9FH!ZXmdi%Sb(a%K@ToEK1`+XE}^Ew z+Ww1lLGfs&;5kkrEoqHrahP+mCEoZ@Z8-(zol$t?bLV&K+M}$8Y8bAy{)^rlBM|{c5#oND| zcF^asP@6TMtFcH&qsZ4M#nDMb31@#<#&uYQL2$UkyM}!!{Mxsb=4!=D2)Mr2d_)Iv z-j^0G4{B0XxQfBqLr=#2GWoaq`fAjM3bTLeLB>E&o#=RR{Mqcc?`(7qn*oXmQ$l?M zKV}a&{XhuTWiRoP7+H~lji-9Q8lleL_bu{1j3(569x;kX^}$*4UUBQ2gO=Uig9O0) z_(>31W38BI!hg!Mc3Shuj`3MQqZZz}8-DpIO^$_LettSNug~)g&&Ov+McBXz#f~y( zl}?9*=dOi6b|3RjLE+TY>*ztJ1=a&o5b{=3OS_)p6)ezvWnECxvI&cCWSREVy9mCKFIeZ>CM z&z+URn}Evb`fes9=eTA;c~#zO_<8AG%|xYUx@+wc$B1>_h! zLm${?sD^tnuwEt#@Hp=0W`_d7(WckCOu&Fo5@k>LZ1JMlp}_tA_Joot)>!e!-v_fcRH zaocd^2Kk|V$GbNscpkx-G`w25@7Yv+~dVje3P+#!^d zzm#Xzw>i`cS7+pA!3~a7eyXkXJ*h`FGec3LaE~QxUih3A_vW{6B_%;7`txjQtom66 zFZz_^R4fn|6*_8iEVq5KUb&h6F_m|;1O2pVBu%a4T|;juUPkQ*rV1N z?0t{z(5_i~TOyOGId0lDt?~=$Ra+ll8jsjZ;(L(B_Tw|(_DOTnDR`UNxU(TQOUyDT zugv@{TFkhfFaDqr{}`Fy6gt9}6KQpilO~B%d3GLSO|aTV3|}4T!Q_MrtPpcV3}-^z z{YTLF8*Zi)n@wxG+4jx2%aX;sgaE}QJo;0r>12u_`VOK;y!4!JydSz+C34^tvbG&#T#*-N zIR!TUc-qik+m9cM;o3)zo$cFi!SQWop|#J37Y#b9!vZP1hf?f(GL*D5}Li>ScSkh%bEYmo;TD|-P- zp#w2!RSY4uXy3XT&;N}a45!JO=vu$jpS=}LCR%qgkztAr*OH9Xe7M8ZbmP6eS;KjT zyi2hdxO%#*!$Co)*{z$Z6;}@6WxeL8V~NO9pD-m%@~6diUWMV=io57+#cLP8i3LJ5 zIDIZ>o2164+TQlzy+$X%dOlS`Wdar&jJo)3pn9ONmj>>uut_!u%>@Wt7HYa2#xdPSYBk#!ef} z#SPZ6R}}6Zo>pAqqf_|zg5nE5#TKa)TVRUJW0^_oK#L79;IuD_0e#$j+hG6|gg$tt z25Aj*!sZXu9V4iLEeaMlsMk_OI;3-=hE)aZ7b#ug;F9j}2mN_roOcNhq-wx`eWgr1PlU&R#64Iz|J$ zJLGY$=1Xqu?xyS21d}LFC~>klPt~@0)q2heauPV>^hqxgZ=A6`!8W-8!rp%t@d?0q zRN5~~tvW^K>vfJf_P><-!!=@C`q&QigViOcgJysvKvedEN<(-v_o!vN^i5jD!9~7-RBwd-jxa-Eo*sh}Np0 z`;0i)K-SCC@Shvjo@o02X+F_TO*)m1qV0qHEiMUv;2v7eEvbx-c|dvZ4C8IT@Z=KHQS&hQC1wk~u&(F;{(tT@{MoH1 z+uvpE&;QH)e}+8#UJ~W{oT5n{HD>p4*)H}z2~#+F|1v3`WB~PCS@1DcA~Y^hsr%02 zmIFAh1Z;ba;;|CK6dH?YuH)SnVPC1f@o_hk9@YTO7vc50PWUHTQVE?t1Kn8~+>h-& zpEW(}qdM`Easc%^$egfbE2l(mwljJ^sUr~#RZz!j^cM!3Z1(r-O6%fHBy0{3jm!n#S1u->vXb7-W$c}W zyq?^Bz9^X5E-PGmAzV(ubLkN- z?5w=x8Y>rJc);&^)<`IS5<~CD_ zkNF|_#P=jWp*iF@i`6B!iWJS5Gvn*}7Qnl=`>37wtEnEgS&Rz^E$wYe5}Qh!hc*+J z#$J7db9FfaC;gBjyTE0J!4Ji{E#d6Tkj;2HLc%spN>|_=bun;R@AdbrPu4Rid8bA@ z&(_}N##n8#?keXtQ7v%jx0YfB5WAy-Y4WJ^o_+>Z=AaWx?wDmJt&`>s{}bN089^De zfc8-~`?iNH2<+1quI__kIQ|MoI1-9EE02Xry zI|`DlSk^aEExGj*qvrv=xg&Y?mvM=!Uas|glO$QH7BG2|wm_`=zQIEmJhb^;jK$#q zY@0%r16c412KO|@s4qk`7lO)+c%QD`!;N~F}7~3|g zEbmiqG%xSxy!+y@5*gXFcm%RV*KlY4^b%oltWo%u-K@=9$qW+=E1|kJ5uTEVxI;~# zI#D3ikvcICpq^2nImciLlpb5eg>O9F2wpsw2H{AcmNoPy0y)Br_hrV$K+gw_vgzX%+-uDp`o?LcS-DPm|DwF zh5@j|&u536o#aFC`m6i&$dx&U4mr`L3S*Q@sS=cNeupZWnu zTk+FKkYkTDyR9mMKoyAp0GyJGz^Bq*1GJ|4qQGLYiI3}OFxnwnUxWGbawl(?VQv^R8$a&$1U#N| z?-^*wlJ`FCSssN|z3rbv6ZQs^izdfbxPpZS#0y_2%ca}P3F=-ZaOo>uk2c+J0?E=T z*oy;ad}F9=n}6a17dn(41IcSv-+ zGwP{1L(>=VkMQlpc^gcNqhoaM>@-Pd8&Q=%w<@^pHQSY1CA z%}5D!K?s-B_GW%mbhi)=rPf1aU0QJbc~V+#W%5nSi=`b^jnl;HO$x)yRBvQzDy`qp z>mc+ntLh8;&Wb4fpJYcw0(#2nE5esA!WbJPp_K4Ye64oc`Y$K3zo9FXr`3$ARM{ zWY<4+hR6f7J9xaD&m0jCuMHG?{wKfyXy3IDso*woW+LXY{DC(*M;(GflSuZ~#yyQc z-yYvY@2G9F?WWLH>+2-szdl3V!#0E)a}#OwLJS9zzaII3!_I7oJ0HYFN-1=*DAIW@ zO}xHQ4I>xdEGRk{&kWL{Y(6=_??TEG&nO$#_i6;~GlcU!n!urg)=HO1@RtV>#c|bk z&bX_Hm3M`RkF)XWk`Y3gB_Mq(Z4)jCN>V;=->Eo+_{p%E#TiF>{Ep+6PmZ-nbem-DTdXVND0*P#?2^{x2?wt#*TC;N*h zh=)p7ttqK#l7`(5>=*h5!hU$Z?;^IgR&wyJ0AV5h5zPuuxLMRzQM=;9q3L2V(G*CI zj$jP~j$~^ytpufCrs~rp1M9}4zA0BdbJ9Ua2`tYZ@c*YO zjm-v>oimGHy-EoOXV}g7k)@jWtwMsXSMVJe7}BEQN{d0X4LorUd*kFB2ur;5Xn$8m zhu(nJpGC$b%D5MV0^prwcmi3-(|OU}{UYX+oE5{$=Z+fj2?t1?z{Kg5hdx%%rQizG ztJh^KkPwFD;%S`nQasP1Goq5{nT@}5pa_mLs-L)P_=QUaR!-bqdi&2VB!;9g+>Ag^ zblK3xZs=$6qswjt>u^Z~vlHY4OdmdCs)dv1+jj#)am7@f*kW9Nl3M%VaQ>X*OV~`| zOhD=Wxf3%(MS^keb|W4hREPH~zoFtZ0qY1f%Av!uxONa6tRynqeACy=6!vH+A*2N^w$5 z5}!2VRe=IScGZ(?a`G1UQI1op*OIux?s31OOQ{Ah5sZACG+>|DCZ^*2@#02!pyVjv z_3m@FL4nH<=C4-r)fQ>C`eh;ml;Q)*HBy9`Q}T)XucqYf5$6oxRz4J0cuzC*d=jWI z7=S!c@H9P*KI39_IDcDxwb^|Tx;~i=1si7^1l2w?Zq12buOePdr4YR=@AYp1C_OBB zICS&@E2mm zjF;(I2k%vi4qQu|5#wo@=3w_Xu}q(H#NFd0EOb!I;it3W)FU697)XXDwgt~AW-K{q z3JVzk9hu?7t#1xdGm*N^x}8NCEAR%kVx~6$&~L~rmwa8#L|A1qexAd58-ip0ZAB!3 zZDT|JjBOS+*Bys(sf=u;eXr#o@Vr z^5O+-0q82I1VBh(aKMF2A%9(1s-bfq%$hlKpPpd_vlW87$hGw8IJ`ggr87*kqRqL4 zUIhh{sl1qAL^l~1D61PVRr0uDC<$HEy#nm&=f9Y$FK`gUMH<*lHaz20Wz&FxM_oEf+>0Cff>6l~NVQ#Qqk3>a za8!?I)*SdZ%Q{w7SaGhC+xf~2qDg3*TUES^Pl2*Gf84xC>|KG}94RG7~Ud|AR^S&-Wn6|{IHQ7UrxS=S*>>By3_{;o7+vxJ0n24b$n zVOxo|k)l%|dpHIsD0#w7_hy92H8y&x4RS&c@%Yqv-oEGMYE#X#RvT>N8N?1%k0mZr zcGi01oGL9C#uuJPC%+BIi@foPwSI-IFfS&w z)OxLf_oC0P)%jV63n{RR#dbL&e)zYe^cx9lJ1EZDQLqA>#I3BAwNgM!`2QsFpYm&Y zE3MX$Y)t(6esIo=>A>EUV{`bd?UFT`BFs_cN%R%tT> zjm+J`CI!T{aX5f0Edx>1M?n$BC<5F^W&D(Ch)~)gNP=4c?Zi=NV5*NQ^soATXy=LX zXFaQsNgG@vHhLIv0(}jHu^Z5;?h8rSYKlGTy3Ugvn!O#h7(I-|wD>yVs9?5%5WR7$ z6DHocq5dh~^fgRKd7@-XAjpD-buX~NAUZocIWH)dRPOIMY?`HrsZ^Y^t1f4oxf1;d z72c7vw9yV#A_Dj7OEP1xl^jpFK8Lu&J#JTXA0 zK~2>}1o((nqI*R{(UhB0jA<%FKuy(5g51B9RxDQ@#p&Q7y zY|yVgT=LVurmq76mFOomf<4@pT`5tQ2~AJh0jZ|zdrKf8^hlS!ptu)$(UgiS_0uW( zC-Re|#@j{g#~8KiD>4giA*$$^aV6S1b=pqsbrgZ<_&l!=(EIDd#xJ^yrvb+Q*iMl; zN}f9#Tw-8YaAH?Gc@nu7n0B;T!&rApkiD#|S?Cz{z;*S>mDnUtLVpuF@UYc<5$BHv z^00rn0MRp&5#+pY6Z1?au4FAp@!mPGBQ(0m1o6G`c=akGLJ#&y-v()2`CS2f=Q*F6 zt@#F6!4rKmcH$4Ao`PGMV=>1iN6JWEGd z#!Pm&!2(6{o{_1kYwG!CFd};-`lvcXH@g)2(c;(k*`;&+9cbOT(yVd(%}2N)7C!Si1q{|Ui>r2q*650JF2~<*gJH1& zP_uhQ-5d6A_Dg#@^jB^vq~Mr}AlmDsWagd4nRKSB#*^h5+-X=(TJ-xhhs>FmMRKh$ z4a<3(F7tg}Jb(&~f{FlyH8n+0)^UFP-DjJ$6EFNKdxv#^{-p9+AnF}Xn*!jB{g*nW zg`gEi-PeC0#M3Xsz^Md~M%?p={Xb<NIWFpAKHUhQZ$8=K&0bo*iPC2M$d2m97Gec{l67S0wvaRoO{zN$H`W#dXxOyH@v5 zr>aex<$vvP^|FZ32L@P^?v^e+2K0~4Z1|4t5ohI_jdQ@7sAjQ-mUMHilJJMhS`+Dg zTZ%cp^>^ELq4zl9;(+=2Y&8DTlPvp1molLyVnVC*QsAjtL2m@IM4sh{d2w+$FBa=N zV5s}a6pD%1r67h`8UJvJ(W0$w?G?+}pnWVXf4_|jD6Iu#Y0JXHe>&Bn1G1Ho-~H{K zs=o26{}134+^DKbgg_VK7kul#$&<>a%#3YE#n)(s9wkT;kcDL;qpwxwPfJE_mvD`t zqaUH@G{`ZSooQopz3{U}A$4F-6dKeE`gKq(D@(pc`%yP{_T6I_28yxUZc47Ux%K`r ztbc$^+@`aKo+T=)D+6y^VGUoudBQ$5N!a_iQZljO2pL1}WcA(?lksGRxCN*{Xp?ra zo+$K?XSWjvz(2?&*gUKLGU1IEAIo(Xp&pV=wA+BU)KNWPl4Sg(v)HXcXycw@- ze~iI%H9~Yxod-kGPj6*9XF#(Ny339`IXt(pul3|i2#y->NUQr&U&s_UqA}KA0KPPT zhH;AQKTtko0B;E(e1xE;CL{z>G{tW4^rYrFZ!0FQe&bNj8)`q*d&BD#n-{F}ZMY^q z6{&swzKo*YV>Y8fI_I>v}CqPXaSS9SeCZzr1N(c=wAnE=GpdxlDhzvxSSXa zY!X)05XnQHd6GN>g0cPnl-+^WXkSXXdm&wIj$L3KrsgE{2-hHfzCu<Do#WhrHDMGu=&kDctSrr(6?C zq|)d%ei{gBl70;YZxNxEPN|qoh2@QM`^d77p^ZNDa~(OC9=NlpqF8KZZpAjn3VsSW zsKA|S^WMfGFSUndXEX1mQcu+a`}K-z{HuEDrAB(28soFw+Tq3M0uyr!{d~@Ow3pcO z4LZrB7s!XVlWp3O<#3-S7V!~AAJEeq68lRYd!OCf3q!~2GkQD6@tTcCCmN<@E`c)o z6`dN8Bs9Ip%C4}zdYd`sQf5{(FI==<5^zxJW82IIdA$k+kv911$lm~`hk~5S+p9!6>RUapXSsU50;iq*hdqKI0VlmF)3^vvk*BY%~k@2 z&k#tWLqHEys8aXUV-{!#AQZfJMX2zaPiF;z+%0peXMb=dgQPamFVu5&pg&B`1I0Ke z&Azt+Z`V<`IKi@K7HOqCyq?MXN}PMy2soWc`m_P&(=I6obzjxBR260T|TW}5Jtr5HRujryr#Vq0$(AW;CAD5CtfE(GL$ zfauc=3uB*Ts+){AhlwG*L>_o&!8rb+7qb3&3qD|L={*PrTYJ;{2G=D!@0DTD<}fAZ zvHWjuGH}W59@2%8IF_nZBLn(T%EQK`R6HdDq{9BXh8meMyE@Hd4*lij z$$wZu#BLFMNtzcxp+bd1B>{(SOAmXT$*X;?>966SP%w{#7$9Z!I(2D;3*kl>1GyveYu(;b^nRU$v z0TKO$aAR1-TkJzl8lk;5AzUk$w|c#8L4n@<7KdEBpSyI5&}y(S#+?Q?d+BI7>X(}4 zcHp{~m^Z6!q=A&LQZ=D#romAZVK0j9e7B&gt z_fj>>=w$Q?7CkmmA(y5QDWM<6n!3ysehkgk=%40#j;_jY$RuPtohPP#*W(dU+x zy%(b!!XDIcE;9lcybnI7Hle!-J6Xwajg`g%wTT?+Pc^_U8ON9; zynLLM7u=(fQ5}#Ch%*L?cVX#z?In#Hs{ofi{751{a6Eph zL8vxO>hFvh${1j#Q@0jKs8Vjas40O6P^gp1kLFy#1oysl=u9|2Kxf6mvbJh_JW=6` zY-okH3jJVqocEBg-q8yQMLi35Bi%WB*JgXx$U>^p>Z1!tZ{`TX{C(jrc% zx)bG#7CWMTKOT_i1kGq>m@|oPo*J^f9*n5%B;?U2HRy*tX(}sL)~xfsur~-hdjmcF zp=%bf23AB~TUk}vRN4{gQJ%=T*CzVS`Kl?a5B&YZj6nMvxk&Q#QjtZ$%4nUCe| z*HPtul~Cd%BNH*;E;DGqg+RC-kr{#JCzF`LXx4C#1ARCKn~^Gu{dsC!qo=-$ubP42HR^LgTv^08?LS0S+%#&g==gDw zHn15oPz&jaa!#1pJwLj$PQ}BSel#8Tc5KY88YFQiQ)pj)S6U!JhNN13Ad!CwRJyC8 zK`uUKK|3s=;x>ysETKUBq`P*Fr<^h;02k&v*{ zbx*+VV<;HW-PN^+dn{)J6S*!o8f5VrsfBQ=!q@X7jl zX^4$a!+>27#-?T7O?g6dQGnNk6nJ5l!7L%k)S=Udt;6PtT!JN@o)gBF6q0?j)>ZyG=K#E>X0mxu>He$N(M zCb7qD$Kd59@gMagSh4ZsS`_a|wQE^2vaxb`8JH(%e5HP54W=WeU^#@f-o%;6G>}4F zdX0im((m8_qFs_Rp*+|IQdt{xBS^}Y#gx)>hbL*42;%i1HR@MxK_nbBn5S)-Bnxm9 zMBTKVE0Z=p_1M$j96EKY9>^~h_|_G|EUFM^o;&=7L6IfBute!qHH z$i2a(R(l^U8v$NQCqH{$G0t$o>;>Ru@8B!g3L z-nn%)e|&|5v~KZbfMfS=ZDFC~fWI2FBHd>ZYI%hn{T}|8Q{6tZ%PMmTtW<$+efvT? znA@jt&+`Mn&R}+^pjcqKL4p6?CaG2M@Z)W>Ur-*O!Vp=WWkg`DkNH?dffP{AL0T9< z)MFUMxmQdf_{y2WmV$*_RqebrgW$K)C|57dwrJ+myI`pQ0+;dCg(4Rydo=|Pt$HF~ zw0t2?xT1+=w!Z7(%x&+7NkP4=v%e0KrV_zOr}hX~>3WCF(84meGTY39t2*M8)`JpM z`#wo(-)X3i{nog#^17fFMJSCkGP=0%UhbFH)qGH8lSu~u6D~oIoD{G1N_7m9clMvR zAG>Q>1WgLZj;oZA`C4&KFs0=jk{6DTzsD>4(uDcexN6W08%ZV-a_phK;D%FeZKQK< zQszE6k&I(RU|9AXT;0sto!Hl5tmO?EOxisSm?Y^UHdM(yot}1L$lTaPhVGv6>8M)d zCYY_Eaq4KO&#-$C@(cg{WZgC-;q$I`q$&wsJ&Tr33!ES~Ladi9SA$ce;1c8INlhE< z!QNHA(Pv#P=b88(r*H7ZQOLLsdCNqgj8r{+^w*b@Q=5tOn##nO8Bs_w-8w}MEKfWW zNEN(Z7ZZX8=baaTjco*{snm%k9XmFM_*2L@E6y%AD9cnBE|C_J^pM=b--LwpY!h8- z>soBqZjiptrVd#sJWUAlDi8wrZ@k zxrKltPH+&aruJVsd`G~1iwWWVZz41-)ddz0gBs+~2Mr%$NZgz3&Ij{%lTxNbv0voA zo$;PpX&|iiz@o5wY9Wn?1f9IZHHAX^G9 z7{=t+6Fc5j=s-udgm=K5%MZ0Fn6@0#dqR-#CHd|bPeC}5NnJoPLJBWjC{#qQ<^f3A zY5kbad9Hs+>$R|+rL=S1pgT)?#I5H1-FsaTTvJTY?>u~~CaedPWQrMyqCaoQ~8l(JmACFf0%SO6zltF-z=^zqgy0) zQ$tOU;gu})y=NvM;;>wn)a;;UY7N9NG^|pRZEnHd4|S0KJwBw09yr8Vc1KTvZ~s-U z*rf%9_XdGMRa;W&7_sB8?l=a{*{8Qrlxl?O8Rp%{tCZY1$1l~GmKmuS&Eq~kpDD|t zJWt9BJl{AO7qDwZp z7C`Ibt;`#*{XWF!m$-_$fVt0A3-36rcN*|MasAOq#Ssnj-t_aDNd-LM_9KUeDj3R; zh|R4yH)d7kLmdSyv{~{pa1G6k@{Q=7hj2o($Pj+rf=~gP<@x)x&)>H{kB$<$JVBH? z300tzZ#o1o!5R-89Yn!5O?%-k5`wr@TOYW1DGLeHX56ox9fi0HqP7Btx>=xs6b9i; zAcdsvKp(Pf;VD)_1>uu7VPTkg)y=%6giLsBV99P0Oy6XTN&(m=Zq|Dv5o(s1%twL; zl!vT=H{_+E(pv&9;+2qQZ(YtroW+)j=_|1hq=5Sfqc)K_de$LK`pE%C%GwY4x|~W& zHBrPyv);#=eV@aof!>Y?S7H2o5h}Eb!MAT7?m)g^7!hFnkIdLY*q2g}Qyo{nr=NX1 zWADVu5^y2yf8U7*6`j{{Iw(n+*Rm7!2!7<@)OO+@As{iQ0G^>vorzKvt^=LBdO#?S z`c-agd+TK#RCmg%xcyQ(!^zYcPqds}cQx;Hb>e+Xi!N?j__>i~)gUO-Z?iIqx1;RG z#_qI@g^Rk~TR{JuPpmsqbMAQcD|r!&*DK@O7u=7N#}BKJ)ZxNjR8vEmQQTgSnuF&M zCDQuyt~{UW9755O5j|{^c!S{WBgW~>)oX2e{3euTpE0Q_>#~K{An%0c~_VCSXMNMeXMhKiAyDMaz+LXMpsw%}ge3iTmPBjc>zB3Ro`|8r$QBCQ8PA?6W5O zJHVVe{X#?)Az6-O^UAEFaIWH1@T56ar*uv^sv+a2E)iKSE#7@e z9UfUzt`&e)rv#q29ShoGt}m2aA{>F7etNaoBc4*IN|6b6hZ#km=Q*v!XdccNUxgvXR0mibFm$6w60;0-0uwyzS%0J^W`BGOCggLa0vX$v;yN`b&|uPY@oz=eN;_RMY){c>KwV*Mkkz zC0j6E2D+)~)JTXHFC>Ab^NDtB9~Bry+hMxX+Duc08j6gvE=mF2jx9+={{(E;WpcI4 zwSsL%Ko3@y2j}NgO@*PWDB&anp9T*x>nve@=JBFWd@eZE+7YaWQ+YKNDopc`_3;}2 zu(TcQs0JW2wAJ;sF8xVF5_eW~R=AW~_32RtB)6EC8sVdv`&(J`s7E;i;2$gxuyjdWDRV0|s*k zTAiu!DH4DSwV^tieNxGRWDgr$GgC01C5Ir#@5@fW^~uSS@FQ%)UXEiSt4=$3I&tg+f#UC&$ zb0{6y>19o-5N^jw|v0LusiYb1@mD33i&T z#gE~~P~o=9cwn6E4Th5SZo844SM*o$I@)>uYzmx}Uf(TdIE8Fq<@BpCz*d|lT0Xjt zN`4!?i)|frTG4#Xus9d&Epcrx^V66yV zS@Wu1rv3`SOA-|d%(wTQ??g5!7Db`CXMQRu01a-G$$mR`Z;X$;@x|pSM*XJ$PK?<= zcvOZ3*3{;zU=D<$TE=7{%Lhm>8ygS08clehF~g^z~E^ z46judO<_0snXUW$SF*)@><{7K|qTlcGRUW!|*KS_cJ^P=vp=C?PWw3B{zY;h6x*0ewH zaXWE&q^(-eZEbis*!k@xy$PMC540tlg01U#;!Afl5ki`k1ZzjUU3LRm5dk4h5Z{CM zR2zMFgVI;P;qXtiKvSNe#sueQ<2?C4<5y}^5D*8FZ7^A#`hWYRYmr*8Xm-gf%=#u% zr4f3dknEvuQ09sLCbQJc8lk`J5br*-w{C=ngJ~v4)(j9au+lP}l-Wp z@3H$MT>7XCPaE3U`PRf-fzq&i2qZUQ_)w%dbJ{FslSV^gH*(DE=)YJH_mI5?;lXF& z^tRX@w|7(A%xYS9c~ZMQJBos#f$+zpAGL#oIwE!6f{4{K;UBH@B$d2Z>>gm$$fHvX zdGu_-bI#;-1QtKCR?NLLkvGbel^4P>2n|_^d`r>c*n3%23*fKWZDskQG`&)pwWW&%t-#&SSk|4geFbKwtJL{PS> z5qdx6Up5>w=JT@AQa8GcqCOg@QWt@3Q|?p-MRw#W2dC=`lQ%UScDzIpr>qd<7j*OG zsxV;^#@1z_`;;MC*XfwESIe7{dWI{#r7UY}kW{B5uPNSxxH ztcG>(agHr7GD#Thpic{tR4~OzRl1jE=)aW`@qD_~`ql1)Dx}$dLecPb-gR|3MiwU9 z_HF@0jbRxlT(BT76dtdST2^^pnUnU9FZ|cW6MV&8m=A7P3-2aec0cc127Y_K^De|+ zg$Y%}(MG#x@(e~NiEm7oRVeD}1eh4CDzjSS{6cE{{7gh%`HK-G!PDZ%P744q_d53A z2dRA6PC)8k^%jBs07?L+F_Bk>isAW(Qvzzr^TM5*ZUohFFWQtm<5LVt{8ntIL^v(@ z-q#T_1X2R0UXh)Ql4bFF&L~Xzkw1JjvJ=&Y{r; zM3^!i9)amN1nWs|ld&!0S^r*=O^_rTF93c8{WlKN&7_nz!MRy8PuU!svZ1m*Bj4n; zq(oyU=LSs*vu(ZiB_1*)+DJRsrSEUt#=?kl?RV*F;uE%Hwzwh2+^5 z3Pz&J2Gn@=h5EOqTO;-)XttA_?riZ{xkmLsh!T6@fP93`*OGcUdiX9z(}KI!ad+c( zPmYdzNT>)S_?i203wSgve%{zlwA{uT(KvHVHi{Pvd%1#99KxuH#;IX#RCcpo1ZJUb zY>3`54JJtghY-Tjd=ZlaNIxI*$?yImf@h>;3nC8|YFkiw4XO-?1d-us8~R6N$`kjQ z1K%78uqt!f)DO|&V!u1)@?Ab*BJUZ}Azx^9mV0$abnpKW2s6`+B(6!u$XKB|TASq0 z!Lws7ESeK~ngYk+g;Z9KmGs%8BFz1j3Erzb^yFqBz2-(n%9ndJGzrSU9q%%GnK=>2 z$Tjcu?y(=F8&(yOOgS*4qzE*zN5${$7`{Y2JWU8RB+Xfr_7Jc!D&P9Yr9k0R8V&&2 z{^9lLzQwirLPCq4q!d#&8`);ovc{WGQN%Sg;Aa=?Yyh(l_Q7*tv{JiR zBV?eR&s3Tdk=LLwYbD+;#yMMdR8=atHpG^r|2j{IB2?DR|Jt~9ImXIBims#Vn#|~{ z>A(5J`seqHDdPQ<9KP5-+nx&ulz9K1ImrR0wqVu1np~N*v2?w&#uzy4Q6x7nC;y68 zrhakOG)jLdNOR*9j4XHRR4(rX4?5vPIvI=^B~DON!cqMK^Q82BMXZzOU0-B@oW*CD zq$*EHNIzEY^dfyEe1zQsj&eSP|g3StrtZ-AP6y+h;c| zqNeHc7Bx{x6+@I1;|nuR7~kRb^x?)E{UtM$YpqmdYu1WB6_y*K6xjAsxs+fVZfY{A ztYN5sRLM~O-eiKQ7Rsa*22jH{q{0-%!dRa6_-LSr1x*A z)mKQnfeeV|;a~YnNN_OJE8%0cENkhw{D4MiBTvj0M0d}Oq*(wCX#_54N+AZ-;l~ow z4`>0S<>zDRY4Y-J*A`v`pW*uF^qP>b3!xFlCtvn>4P9s9$KD1l8PbSP&zEpr3H`r# zj6I}1#2~phE*CtA+{C7mx1p|iS>BtZq2obg1#vnRO;|!B%x=dl93yI1_S;lG{4%qL zl(kTfiUmAv$F{4%yY?zb0(_`f0hp+ti4|kkc^T`sawFLM;8=D)IzBY#K0kO>W6^B!ip@gkTl`R!@WDB2~CATQP^g$%=GaQ z7W-Wc#rM22T=0sX=c-)qUILX|h5v#AF+1Fj z*iE!OTcqOTTyZlf4jPE$hXsq{&ppoq62Lsg=i!iTl!OGMOoh?#7m94@Eu#7Ra^S*i zn1))d%EsJy_W0fE$!GBR59m^W#?txl=bTGJb3E8%UJ3i3nMwg}+c61NV+sN+T7hG|bfyw|X=&gO9+hZh170mqf zR#vcfK3Z78H5(x%M~?`ryIODNErlZ$+I1TYhsAcufQc(wGTpI){%BrAZke`4m5g2}c3)pzDWzr$ zNOZW!1B+9u*nc4s5Ang3F(!;HFI3==PPkps@2xHvEjsXR9p$l&`$1+mvYaK^{&>O3 z4r`K{A$4Q^Zpk?U~qRRQYpa``rUn~R(PwM=B}9{zjs z9Z$+KMcybT6}0^Z3m>4mbza{0++xqojc43b4>!5w#n!o==#RR{66>St67@!M3xsuv znOEB8Lwo@hNtbZ19pV=%Ql%qhiE5|i3Je9PsjytHKtC)$X$V!1>Q{;$R}+*urVC(R zZQGxx?D4|>1)h*j0sxV_wAL3&(pQ7@LbmqXy=UaN>M$d8A>Gi=HS{HGGX52zG2PnD zCPp3)EZjJd?krM$>nP?z;_B|0Co-s7H6k|6hVfplg!*G9n*5|km3)qQSy5Ny8F?na zBMvHcrgCA&c{Dda1JJ<(gQfcyb6qj8JMzQD^_?R&xY6dPeXuDf)p)LEr!@uF1b=B! z?Da88)H2v(X)F@hDiMi2G=1jda<<=Xu?%gM^QF)5_8|JdEJzNwSIy^Mk zq@j@{LDr*Yi}wS}&%GYa2Z{DV(_Z(e0#!s4;vq5waIs^6!0&o7aHv7Qagd|)CwHt% zUR>&ND>mUa1u;M;G2nhjWr0=Rx(XQ5MRmL1N7BHz0w6(UP-2L5fY&03iy7R)>J{I;Ze18CM&CuRsJ}m z6cq}0DwYQbkc^%^kG$YNc}V!}=s8kD8Djjx#pITw4s-AI=O}y@>?bHAdxsv!(&k1R z&}5_zHSS#)EOvU36lBUd>(wvd`2(ri-xReVnSTa_L!3LZRnW)13GtMy{0}cef%-Gz z<{_OFUfTy;Ef+e6*i?sT_%LcMGCgEIn{7>h`?zVYdF503_#-}8(1{CrS>#he$;;Ce z6WH3W;K^r=PsCI5YlrKAga>vI9`Msu<~sK)$SuWswl(;HoZQZ+h>}kTyF=VO|I8l_ zUqoR1<@*)x6@eR?l|Pbkh^H*oRN}DV4kx)QTMus2Ke4d;bOEtI2k@Sf;eO8V7U&^^ z*uY8ke8)pOX_vo$^%B+6g&T;5|Mjm_J_jJx&Bt@0D;nTcsmAI$HjVLBf@#IKHiI3kq1r0PmU z+})n6VbPe;pMaNT1Ltev0W?q5rTKW;EVADwzJzz(ISw@l^`8{q$+U)Bpw&2r%N)j# z+xP)<@1}sFnM_4OL2_M8FyU+u0j=m_Jqs=IO*vca&D&!k@VBD6==SBlo%BSD+-==^ zJ!S_oqK8VcRy)z==fm?FY$9p*wEDOo<_3w_o-5^Bd}Yskz0UNtsT!ni z46R5U1fM22qMWY0?K%wi+){@?#kRR}V{(Wc*fgUZP%ZC*@4DcBo(9RSsMz)&!YO9b zS^1K_m-;pUmy+9tJL$U||k0 zlk>YNi3K2f@6w2jHX(7@v3sfZ5wLBrS9P>o+7riY-7zH{vKS*+2w$4*meP5$-HNzk z_98eC08?F=15v@k)ZzmPkA<^`?%@~$o<3+v+u9lyl)LB~)^=dBk7ux}Gu`~bl0KMg z&<0xH-I8Np_=ql2yYw;)G-Pn(n14t0e?g3K;>SduAu5s%L~TG^wQ+(%alpxXkt7v; zo4sPed|R-)%v&ta_j#}8!0hktpQtX*zhvEO@hg%eWGSMt9|B1AmiEVR9Yq=QeX8C* zADjO#bHa-BaKf2uwUoBI=dZAB2ROw+1$j41uK-%>vNS6D>IJ+RL~ZqlrtFUs%>bAq5fkb(M5P?M0TG z$oGpNdUQml?+gGM6O<5#+T-9gJClK_nur#A5ewku}8J1#;A&`ur;1xuHGf#T4rbBYx z>@d5kU!-9Z-*h&y-g+ISJ!n7tO;I3JH`|S*u4jD`+K`}L)e#`0 zS;AUi6|PK6UY0H(_ZqQ|vIz=LqosqSrvUnwb+jbcwFnMB83>_UDNDWrOVxcL@IVfu z6lTf<7=)6aXF7ks7W~Vo4s0QBWp=7l{b}^>-3wp%^Or{qW+Qi{N4v`VhK7%~EEEF0F=Yo-d+7ofCIOQAsoO?QX z=2a+2j>x*?iHa^`0hCHyQlKlM-$^)?aQ!!pM@*_fg~X)a-ce{U==`6y+C={f)w+D* zbzj?M`GNN{0_8O0DpjISjlIvjW~I2J@gm^VMW#}b%is6N9kcM4)NFP0bhF;870DH% zbHv^pil-*zEQw9*0}h0wWDZla5fH5qt*3FPcKIgNKu4c3ZpM2&6e6HM3mJNmj&xLh z(GAZpi_MIP>P86>_Wpoy2V^g*IQ*AM2%$|7LGa0bP1#}6S$Svnd@Ra5SOBIok+VR4 zD)T1b@awRa&Z(Dqugm|$IqYEWvhl4#R7Wth{9mr-GSouqC{U}*7f(WdCL7FX`c}GI zIXbaiF6c;-kW?Q-Lq zwn3|M7!vzCs!z_71P#6cUS`Dgd%+U9k6*$vbH}J)D{e~r!*SD8RSYQZP9vt*T*ZOl z9@Co>4>vwe<#Zm;hgkl*Aqh4<|2F~%BLOFH)-8cVSNUA7Vr_`ZBV6wP}mKS;}5(g@}HDMgr0%vQ--_T61`!f!@6B?O}ZxqSQuVcjnd zwGXxv!~6>w4JuGwKl5THm%Eke&)k-A7|Fk=fSe&}22rHO`DA(Y=?+AYCVG~YFlJqu zFQf3{;sG&?`fgDkh4OZsk-=L}7uegsE?d@=(K->Z4-z)uX~tI0k7oX?>gok9O5SCqx9vfr-ouucaDU+43pU-oph z%N|0mK(mCEq~^~5H5gF+ddpKTJMnhEcSLyW(74*%Z%J=Uxg?yE{HX#`d4^C$)lB=3 zpO*Vy`)!RqNv-hV1Du6Pdm}>_(JCyV*KZVWm8a(z6}X79|J+~F%FlxX`~EZ27=ksH z!1lf2cdThpzQZ7x170n(4x8MO?ZPU^&z0uy2%Kt1OBUCP@|*V^SXrIm!@P&_!B>lh zd=Wro4Dj;(Prc2hh9L62emaQ}6X4na1Rt>@N)+Y%GHP-b~GGeLrh-vt|u z#^ezB;ye`J`lK4_T|qazNwdgx)fbK+E7EnVH1~DHP8S zosBL#hGDW)3qE+3?5T1uRES;@39xKEYkNUAr{-&L>!s2BDq0uHrERVu$5BWk|D5UD z(&U7*I+0STQ&9G&(0GGgf3Nm_U(Z(?JVG_9Tg&UYDWm++Nc&V3M*~EfvYX@y&+^OC zLi3N=9Ql{eNFdk!@m#Y`Ro2OGlCU@BfHjF^V89s%eY@z!fi`r;wtNi_>8f3eVNFr{ zq0}HXP9x~5xDHA-jV{szMCJj3Sp&42K<1Ho=L5{956kH^p;O9WUq;Ah?rCEMk05<} zw67_5b4Fsd2)rMCEzm}Ih6o?sVuQ?2}fG@zm{W_>Vl7Iw#pWvRrBw z36Jx%{@JEWAGwHBy%{3f%NO2>feVRqtK7Ea-gz@#?w2q=6wYh- z=wv%rQqUmKId}rzW9O^atv|H-_clWRro!#ERg}=JMUeMvVx$$=$BMBVZMB&if3H_< zSa{>V1pX*>2$Mle=4pEOrQ44|5)_-&eFcRE_oe&3*N8I z-^5ZEOYXmlFQR3l#q(q%T}AK_K7g(g=1BN)ySL#ErE-JTDhu@1%;bf2i%J;^i3}Oi zEB5tgk7ji;BcCS1!l~TY=&yr}yaqWCdoXl`m@Zj=@Rmu#M#LbMh37=8au~%=uo^eN z%Z1#sfM_N-7G1-R)h~>x^(kDEcH{M~NzRbO8n}a%5J_|_sv9pLaCfREj>m`c{v5VT zIxtjkg5g=AmrSW}mmtU^n=;JukVq8WWq)sFV#fvnOaQ_qEu8Lbn*(jx@0`L5DrCk-$I~p0QiJG8# zZ5_`lp&$Zk0#9!A$LanlFGqIytAsPMWPA?}N- zaw8E7Q=E*;(^;BcY0Y>TwpZU~PgfJ-FB{#z!qd^`+qEB2;Q_{V90|Hg_7~^|v!?~V z>xfLVX&(^!Xj>K`#@Lx1V6KTxg=B9#NC&Am=sb(r~mfIGAV|k)4nqeWGZfLUUJydez*u8t(GUg!)AJs-lSqfZ}@>vX?1E6UaIBE z?0^)^L9VKzc4<;P#4EM6EDG60G@LNLB)?xuU#m$!7wr+uwJv+abN@v@)`%UBD5$3A zdyRpLu>RzGBe(xq)yvK8v~1`wRs9%=nbPx_C{Gc${M16*P#1pHR+_u2A>@m5$SfCr z>X)okH~tLI)fDANr%0%pEjsgn&y+kXDU^^PO^l)Dgef&Oh+;b+F-iqgRX`?6C6Asd>~&CV!RI%^DOBcpU_Xe?r&0nl824l z7dfoTwfzb;ua()2eGV@+hv&%)@I6tlTSsHqHOmDe$N9|qu;@2_ki!mdpE}Z;*~FU7 z)KY#g`GysiVeWxDh%eRwwA>~K3o*^$Fu1nK0JT@pp57-}UVIUEf69qM;_mOWC%tu{ ztnaSk=*v1n^T|xZ{B+Umg+trzs^O#)}zzfdYi%*e|zcS>PQ#V54{qs!r2eNdIH9 zH#%bxrW+X=utUcSw@VB&NSO-x&e6J?wak_)eGXvk9Sngz;K-aE+b{siUqoJdY_ByY zi@Gk#=1m*PcLl@SiE{*T)~ADj-!B&g#O1ytM$-eduGy%{u6TUjAD4J>y?>x9&dJub z6NW=GZRN6d(Hj6(zqxEUKu_uG>Im{KWto4~x20HmkPWJ6(`K~DtPp5(!=)oTk)hu* z9|^!*CHk932bvDkaFIb-?4lsvXP!s;((IesyU6a#N0!$8l1YfcF5^+JSzKCfQx*id zKE+-wS!1)xbArsTLj|iYfJcgy%hIbWDTP55^u}b_uRP8~TQbMJ1;%T2^y%lolpTy1kjpTz$tz6LKKrh>@csw2H)(EfL;2-9W;*ZcQpGCHz9rg~9FTVs^(kO#sgy0I3Gx9}CvB;n= z#(uV6I4y4h(Cdabc|0#d64$+Q(!Nkb&Wh(CtP$9I?haj!&>d1hw9Sn!v%iNWFWaYP zrzZ+supmAgc)`k4FXi3BO?lOXy!A*`2ad(`cdub<{6S*Ri?xzgHscqvVkBk9l%hYTHf3i3jK5ShXJbm5mO&hyIPEr z!kCsn^VDPAmUPR&uasknFSzMN>!#Xp!E}oRldGitAj$4aM zZ;{Fa8wO#8j~cl3s0Xe#z~xJ}y(aC|ucBh##z9L|tP4ZRY3Bq+{h1*W-j|dgYn~qX zc;W!dVprbL)nP?gacQH2;lx&KSTo5mD?<`_0Hx1h)_HF##ELZ9!Z#Vb;3=`Jz1OMl zV+)DKEX4S=&ZV2qOr1;Bm_;Ig&cdm(tN3D% zUL^O_h3OQvNpJVQTDSeGMK1@j z2&;Cw$oleY#lyY>41TqAK17m?%GFT!^J}PlNSZ<$iwPr6$}l|wyN_HsS^<@%eD+7_Hmq7$tPr|#RLN1n05hu$nE<+j7Jl;klBrrPZ%?607 zfocZ9Z;jdePIEh54(SavziF3n_!VabjAjgir$t~B<`sL!f9@RD0Jq{?`sg(P!Y^hl zwC7}f%svK?K7*F6#XqpiW)-rTvKQmM4O0TdX4VMPlx}t?^4LiechBsZLpGKMvg@g! z!pQthEfnoyQ@;=zO&ox2YMwxiy<^`Yay4zWGn6Ob!Tz>BlVXs3(HpYdRX~FWMt)UA zqetcOI#}b@vUfLjjUhr!SH-(|+OQTRLABB50*>2S<*$^~b!N>#64k%@VADK7z}+|9 z+I*#~fT|P|%=^xiiX^FP@zgNVg+q&$&|hKmr?s@C%Rwp?AgzC;2gg}f6z_K*K)o7G z1V6c1Q8o+qQuPT(vE@!OY=QIvXAVuMyoW5*By|uVyvU$80sR#m*sdI9(Wn$jtj|Ms zZ`8*&O{kZ85}d80<8;;|CSf`5(aPoO0`hr)hI#`kofy{>y9$y_CjuJf^}QDB)1%cS zY0Ro?@Ht` zVS!nQ?mecOoX%aTjUBTkzo>%8p?;$}vivaDAi5^+&+G|zL{n_GK=mdbsJ2b%ZWAo} zrk_p7<9F zaXK+H7U-JHnPgH{m{)EY3PjEkQ4^#%pmuSf5*r2tEWMQJr*mcgH^6w31!YaGYXXg> zf{YLPe%0?&S7c{b9OQj-A9QFHG6{!g~CwQ*4g(r%mpI z1yB){yTD13!&we7pkb(??r2{GS&ev(h(|$R0I2V*XDYQVO?3*#=+F;gj7WC}NmdwN zcrJ6oKx+ZW+}Za@XVs})A@7Z0Etx@t)|wp?#vdO9z-`I&lI9{kYeW2q9;AsmQS6SA6Pnc!k{<2~=gvuz+(!I%e z7v3-c2BZ4<$V!*-xAmbOlNxjB1ZQF=wJRinOhEX0HW5@-W?wqA;Uz+N;`R_R~zM+-P z@RlkgTz|Pb!lKow*X&gv6#x;AnLW!HwTxfliqL_4y2|VcE6!tS4;O+768%ptwrwa( zFSSM?)^RWFM1OUBZx6=wo*-58`jnV$rIgt$E>9sMQ@2QeaORC~2Lj5z(GP;rKaA1G2|x&40$STk96I6u2E*Dptj2 zgmLeiI!(bX9}e!#lNDBh@z8e-%&PzJX-5uO$a#K)>`q%)46Z5Bn#WU8&Q&c%mD1O( zAHAR4XWs)^O^{bJSri3S)XgtuLd6I0<+jPj`WwlQD`>)+P*AWD1{T@sfWaN#=m;j)ZNP zrV6!*z0@++v$RUu4}qJS6vpZ$U!lzMf?Hq*c6b&7Ipd`~9HNxG=42f>ASw4p@Zskp z!>MVshm3kHz>x%A?n%|aSu4;c)bTw_C+X-_!_qUh9-fx1>O=y_|C^TR(^4A%_z1K1 zaJi~Y+;Va(3CN;shXLWDj#H#D2s=Xl8Uan0Ov72pG^(i}I$Paay7X?gHJ&wV`C`#DsIme-aeejQ(!tz;JC_z$HwwyB^n<6gMlo)vQGz@Rx}N%7G_%Dw6fS|4Gj#61O##QowF;Jq#8aa`bH5!Dhm_vj)B)^2d62`*v&*4j-Ca+hdbuinT- zT-pjk&pGo6`|c{|K{8e5S%m`kOw?JF{kwB=*n|>`kZ@XQ#&9Q6mL@SK)VlMR(4tC; z+tmSxoPx1WL6Lns2@;y<`#!+pnS=qjXrItOLDE+$*nVbTaF?Br1k8->OvMz(#IEipX zF>WleAj)PKOi=9b(;+qHkLEkAu<4*M%~zt2xB)H;0taVV-;?B@YE9UF3Il4DF2w;P z8^mTpr}2F!HdYQRW2v^dwn=k~!R*xUzwWZlCy?{v0;ytz+>GtxOb7#Hz~|FYQMFQq zANGsVH3aF7caT$uY-5D{uo?mTJtm!-pby{>ZF2!LuQH^m2iCTPM@HdpCg~{qCfRs4 z;T>CGVc#bjsL5G{-CU$C8ol{>As+|N^Vvz^#eZZMl2CKsp%~7~5YK8a+7BO$*3*9lhH^))srxV7%EcPkNiE6&%E#Gfo#nGQBat0SVsSVd^pUB zOG|Nt_j~>=Vj`KBv6^wd;`LZfUoLeF6(Re7z@^nL=$aV%qPWWuX9e!{kz#NxFFgBu z@~w#xFRiC&FrI-~7q$)F(;lMgjsPYD zN9bem*gl1ZgWnmmc;iYsv?F66>ujJ9jPe!QU)W&R%6q&oaI6PJK;*aKmwUjIUp$DE zX(oKFEkTjUvyOf&M)30oRHUoS-h3;th-iIN!VGsJDXN*5B0!AYzQ(zz@{66pEMn^m zv&SNdkU|L2JzbI;Eu~Qt8Z$@Y^&8B$UhUzq@Y{R@I@RYZyPS_a;Sbwukg{6Vi+eAd z{n-7uA`;Tw=*k|9xiUbUwn~aJccE4u37#ZgOKg|jAmFn4%jD^honD%aoDhS&bz*pD zwN=(MVUiH?Ar#p^s8c(igmNpPh@kmb?xzHXgaz3x7lwodP2I^Zdl|`h|8iVcQ%S)! zZ`_BZ<955o3X_qj_S#;-KoD`0<3^;~k7QuaQN!4IxDFqf7mzXEaJkw}JPkj@rmd0z z*YT!TYLG#j4zWD9inDb^dawZ!`ohC;>64r07mfGH8;GLu3HKcK!=WE0qa(dbTbbKq zTbgHbDt!$&0Oc=?j!D5SvMj74^gq z9RVYCt|fRW{M>#YV1Lj)XMLOROZ95Rh#rIB)mvDB=$t=fP~O$;O{>Sf)tWRn)o0R$ zd`j%mJb+?N@n@UdOaasOObkkrEL8*T*25j-gO0^5Euw=_QQ-8)N9HiVV{q6mtS|aI zea*gN3v1%{IOrx1>)TW!fedjr4w10Y6O|vRuN`Ea#UaABr%Ny&PyFP<7>mLBV0@Q5 z%b=m8!|{@UzmdK3Vm0A22Nexfkkx`dD6%pSUez3pd^(O8L!6C>3;*{>H7QtAl^1=Q zC|IQ#$@J8ini?STDon!a03t47YxQ_R^Dsub6$RDv${CrNyy5c2AfVR$a_ZGe@OG$8 zG_o0wxQ+3SFO*P&+hUUJCwu4Pv7Z24J6BX2vn+UQeB z&P`m-~dBOJKan+AmYSd*K z%*`47Mf#V?a~%9bGdyu1==-4?sj|&zO8Sv(!R03f%be3oaeQ)bbr5QVeRknwL;U=; zpjU3EL;HkGvpPU_1)~fg5O{WfRv08zz@1#`h!KN!sql;<&L*X$x}gepJDjZ`Eb&ivha-elWL@sy)tqGk3xH2u79`q|18NZ6xZ%?}|d-4Qce4OR8 zAoth}6NuivwHK?zdlHg)OsLCSlQ4hX4Ml^Qwbm!RAWNcJ(dmnqtUvs-^@jF$d9D>MC4L z$OF_Zc&E!vJ9b(@Vp7wlJqCpI3XD&|fA@Uv8uM{I-wv zHtxQryZ&x@oBNij5PyVPIlN*6qN+%Kwd;kdS$p-@_a=A9HqmQ;8Zbkw)^zU8Q*WZp zKVTNxM36A~lws-9>?GcQ$4i*P?~(x6B)m*oQePhUav2KNez7PLpSTROz^E2O(}#9S zr5Sd`rF5wae-*`oaVw0wY(OYO`PJQR*o|K7i3MANO9uY3O`rcCoSJoW#NKx-p$coL zAq_TYqyRMz%8B^Ghe4v2^cG|iY*go?i{1)|)3YO{cO_Fp+FeO_7$ zMpXx9b}TNO-wf>$<+?#KqVNpy@ZHH;Jv`GjhuOn}i2C&B(B*f+01!_d9iLA4#1=a4%RrPG$6MgOaZZ24~ zw{7JHz+h*wXl9TQ6{d6(R6ko9UHsUTf;d~s-bW=$DnYjrmh!e-eNN;X3(I5`xrG`* z*z@)s{H89!K$x8+96yK9fh~NY3`w}%mV>CkF@MXIVmS0!_u0-u+sBW0Et@p=C*WF= zx3Q{Neuex-&Eu*AjB(YMF&I?ihH*w}5f@9(Jsy7X8h-{VC~-#ANDFn=UA?0%gkTPU zfYi~-A35Hi6TcC?<`mv9MI5m=7Me0VJ^KbPSmPg@q}~li%zx5^%YeD-B7If z>|W-Xl%4`L-`<;&$?6y}Ab&E+8YyfV{G!^Lf;;t^ZCYjU_jVS?K7Xd?I(!MaSzVxP zV5O*sVz50PQ7W!{uB{1xJ9rF(c(HskCB`em;U+u`#Ed@jjrjZOqq~xJ4p2r&iz@0e zkUtIX^!rM3LgHYi!Xesex8M*K7fhuksKstpx322P(cwz~^46PEO6VpBdRj`Sln{Fz zhYM*T^&5#$My$z67hB;-TlFC-4lylvq;)e0b#oqUtq@PVN?UTe_6Jnm^nc7x8jA z)2q#)ZfXJb^|J02>)A6}nZV zXVj8^-nSfI#7DQs6CR6lp{%X}>Ikq%Xs+tkhY`pqk^`9x(-T%KjTy7E0s3>2>MR|8 zZ44JmdD@ipX6oVe<$3bDO#1V# zs`)iodl`_ST0X>}N_{cq6aHFtI|TQib!*A@1?CHGEBy&%p2ciRpRCyda1;_%m-W^4 z1W!tpHuRjCoqhcj*9mzIOL_xj+c<3=YBqc)ZwkllA4`RlAoQp2D&kca#y5yL!d;l8b~S9%yd^{B(8wivv`OaO&CinZy?BZ|Sn0n^Y9!?@IZ_y2iR2 zGQ9cw08mZP{|ofS_Bi9i*t}kjJPT2jCba_3kkdGi1ucn)T7Zx6#{AzGYtT?;mZChDSPDHRv8M9}Xxp10u}XDHdW?USEL zCMzP7>2?0%GBKC}^6+$+Dv8)vUI(q_=gGv!`TzmySDMTc86M}FjFB@CXM4B2?Ar=_ z=~^n*EV)??6olhcvM((`(W1=j>@pYT#ABdIcS{k7hh5)R%?mBd{1Ooc*k2Owao%1E zTJYBLTXyK*I+GiagX;ZUVIGOIe3VIK{Mu2Cf6ymfU1~=X-gxAQ`Dk{y9>I6Js6h0| zg{SiviylG+u?MIXy5+y01bxnN%Chs0yI(BsIMBU5(mujeGf`H}Oz=GHu@qY%(qgRrvLvYILmf5 zDynaTSzQQ-HXz;-Tm`9%a;LIu|4S%N5`T(A+qoK{pES0<+i7%g?E54%kvqz?`veX* zE2v5E()n9$54T_;iI-L8U`zJk%81z_^4Nu(=rcsT<)^4D{8Ybd2}Ydj58-rHwm4JVA@zRx2P$pNyGYJbHDpfd@FRM z9pZ2JZT)x>^cZfTq2ww3@n%7G?UlsRDyv{b$oB3?T=c&r3VY-)cjqx5xIVP%h6;!$e32Q3BxUyszZon1jiPjRRjjxVI?_{EN-_~XI>7L^V z3s+pjkPBcPLyRC*g#H^TA)K_10>Kp?rWTRoxZ`?Ryb($aSS61VXj7I0*%+Dsrc`C( zoGXOJKPv7+8{2A`AQEVep$N)9ahn&xlW1X*2??WG&+TrADW-N!)ch}VP>2$qJ`ttz zF@O0utmLRiNf7m^?|Rvxc*9CLU~!%sR+g1m!`#Yq;r}8XCSVS+N8kA_X3)lVAnY z+LKyAP?~6161(a&pW#EJ66>ZKE=JGs77xA}zREe4I)hC*ort56v^ZCskMc5Vb+A|i zp?i4u{6IPvXCmA?qB@Br>(b)zZ>eds8`zl_Dqn`UOhWa)!AK3w|2xdF%2Xt9x1kKj zZQHR^Mj}4)?+M6mkpvo)fSF6nJWiC`1W2_z33T z;m>Uz6Bs0J(K2j-2du(f&XDJ+wvgPX%-S(6nr-y#xF~r;f~@Rks9hC~X*NMq@B}FT zYk^BaJSTCBpaDj`6H(ez%FH}W@ogu}sfZCD!{}bYuZkU_eC1_1hR&j>niqSMYx)G3 zvhD$5m$=n6xPp!pPz4t4db2x9DeFf*L@|VezUEP@Qa$N0CZqDd8nV5jOPpS;i@fe# zI_&TptB|Z|vA$22w<4Ygf{VYbK!q**X*S|&2xnsW5%mFa65J&Q#AuN-i#rzL3iqZ-{S%%0Sr4?!&X|TbGzSltju&;ZUEoi2 zU|Y?J6TD6FR6F@oS>lrn)91O(nLzB_L0Nsg`b|bzM_hQ7my8xqDseOr2Y>_Frjg%= za^DZ=tVnb<0|*FTGreTRH%Dmub74L&rs$NpO9VNbenkK>$8%G6Fpxz#m9pgq&`Ns| zM&(aq2p2)HJyt_}_cis93yYtiHi3J`Iu`LfWA~l-kEUgM4Z3|{dF>1ZgUkSTvT5s~ zOZ>_@Pzg*^KE$YC4=<2L$pOtsa9H$5#<;{&5le@GPmlUx!u;#JP5o4O5Afm8QYv_x zgK=(!&YPH?F?})7-n-6d`N=h{nSw-E^GHP|f>*S1F}Ac|yJXd$P4)YG+5=821keNi zCo673h!WD07R^p0u&R;2m-H3R7J%11RwIJ-$C%A^JxC6e+B#na#) zL;%V|ZXH!_smk1T_C{^G{{KglAf%l(iu-H)n$5 zIW0wF+v6Qkh4B0)RM8yhEzHac3v&q=UNTQO;7AiGd*c7Rs$vgW`M^JuC~?>#}hw zKO7g;%ae>JxdT;&E@U{V(9?sN?tC|}njwHx6ToK5DL9l=q|)bajq8qNmiVaL!Ubv~ z#d<_DWKeHFj16=@FSEfyE~kjsUwR0cHBRzcx92g2kjZ@?~C@aZ`W>v?|}i z&A-Y=gcU2E4gz~E%Iv0UaunsVePCvCI%MN}`frL3i?dlG9{R5t8iiRc0`be|`gntVvOo0RNtw*qiAJY8nFAstY!ay{OlBNS6a zC5*rt@fl1Yv;R+~Ofi*M@t-(kj?0;>P*Lm@tD!Tyobh;yxN>by?+JIEJJXU^awrU` z?Ix~i?qY~3By1ROlS1!?^~JyQTBJ+fqR7tNS1-1+t&}d--fG5OAutG?lo>W@na|sj zHR|TmuEa_)Wr9qZkOLVRiD{*aTmPu4G=k6fVn3lPta=j|Ucns@qs@x07$JvD)t<7ja!n74v+LiTl!oH7v$B+>C<0>o$(JR3_AuM6j|f|3eszwa%}x0ZA6(t6%kpqONiPXl_^#E zY>Ja}^S(jV1|LJ*EMe!Fvd&CrHH~<*H8}VdtIpn`9Mb)MB{IBxQe6Yin1Sn#)l`L; zl+F0IUo@e?Ct#@97nONB7R)EXD1>aX>Z`YXv4Q!U-c<~NVOrLZ@74a2*qdK|?_wbJ zQyg5&Cq&lDyQBlHXjD(#vgs|FG%}E>9lnC5oFxE3gO8h6cb1r+rN04|4QcX75B8!w zw<-jj1DY^uBA(?Kf!^*rsBG%^><@JA+`HZzfl^WJUmi{cxQP7TF^lkE8tR_R6KMH_ z^W5X?Ezzqef&nC5%q7|BHe(lLR2U&@S~#O;%0a7(fvtnUS|3HrHYehAQUz= zH|XY9M`DQ+o;S&o#^hvP6m;$g;J?^go^K5>#BDGd3&!>D#HOI+BwGs9`#2IA?Ekwu9KYf^*-}31I95*4E$dHT4MT%dkfEtFe`ZHo+l%Ow z>yYOTT;88hqb(Q{(?)ZT#}DLgk;q~cY(Ju~2Cc85wx3 z1zSZm#Ci9*^`>6ykpKvdzwXV$hsLiPh`!|MDpw&2)iTXV001+MNILGC^Q81h3%6jh zqUbx??6+}B)_K|-KQN3QmvGO&Y=rGG)+u`qd6&Qnqjz^CVU{JR`6L}JT~;Fe@trrF zOTIm(%jkR0QqZC0ei1{gRs!xJw|!;ytM|N$a~n5B+MPe$>z1cl@-Fia-`Tv9kXG(I zUvuOo8l*h>Gm{v?02`@ITG1iUTiNkndW2PV?@%giAD_5ka``;Q3WvGeo``n6ns9qaLD>JO*j+V+VbAuh- zWB?g+7uv#%oK9K0Va{t^C>Vf3?tQj1+>)t}MBC{b@&%Ke=rGlc?WMFVI}jrx|NQ!w zX$9Drf*-6qXGVRf-b;Mi93iJR!kL%Wnpc*c3o;QeJU%5oW?<53t=4c%g^WyFTU|IN zlutz7QS}X;pu|}I$gV_QlSMSx*^~w*d*+Ex=RKZ|wop#XFZzmM1A-C=yeA~Jj(CxG zw35j;JY29X8xmQRFL}%(#II89RvM#ZozA;Ow1Q!XC`=bd23CQwF8IrTfA|xcbO$!F z5`g#^vfUZ#d9@mQU*(cZi*ii?9Xd*v4My)W{^53_HJ@?#j9{oNn*Zm|-=G3Tu0+5BI3+M6;UfM*_P$IDqBVXuX!DYbv`?S>dqn=w_W(a8tte72~( zbgP*%Vm80@Lu*K5Y(#**QsA)$RhCHSO4`w7LW{h5OiL829?Jkel*jg~U-P=tNQj?S z7^>fXdv8m+N~ISV6w^RY@uu^k`9(3;)v122Jv9r;!2z>-ts|D4`Qh)e3UE`Dq& zb?f=Kx=bE|u_R%HQI3%dHr7}GkJlousS;C=Dr^a4%zYjGj&))`FSaCXJB?x0hVT<(denP#3-K6}iDBUaA!{Hln!g zGU_xWD`4Z+0MU{!-HC~Sl%*HpBch7Ul=}_sq;I({|7Q0v3n8k|&Zm3l1DKBfLP9W9 zyb;(_ESz2gS$8j(o2znDP3zpP;Mx~>F4H2!7gPvu5j+VhJT?{PEe}?l*|kcmQQ$CG z8<(q%)EVTmrS^aHK&c-!(f|dzHw_Sr!k!k9A{$L_q0N=R?g4sukbl37@bV!1Zy@1~ z0s=QV;6x$y!kLPQ;94xWbGG4ecMIkF?IEf+xp1LDn2IKx^5?XH4iBkd7sIa_)UeDF zEhW-9N(x&NXvD%oW<4?GJZZs(_e3#6>dAvYw^DF38@nR=&>Fw-r=?LZi$N42JMhm6 zNfYQc2da6N)Xn$l;FAQRrl+JbvwKqh{1T<;asyA0jQ^^GTF1#b@>!n7+bN z6C1`e;-erqh#yURi%#jQ{E@zd)oV{zo+X3piOLuCWmRW4>@jID zh0c%qv`@DmSH5DRMQICjx_+%B77-SwJoylP7)RFRRVt#&O8f#qM!G!A1q_xzUGwbz zU&Zs(A5*~m`?nDSDJ&E4+z(TTs9ErnP$}G-mp8Nm3_Cphq5v#Bcw#CdZ*5wjX`nJt z7K7kgo}Ta!h-e&1s6QX#h1@|k$9%ZolXj8sd?V)R_RYjgfMXh@AzFGfZa#X6?+94M z@+~}V;qDMrWA0yfTHAFDaJC<$f5TDg9&;MjlC*g%kY!{J=C{nm>DXHDccLiNp&@S5RgqPj?| zgLYSZdohw=)E)Za-yk7!OPe$DWo# zYv;{r>*h;!EC(i-KJ?fHdcFBqzF{-Ps;qu+`&C}01Nt1D30VLeqVNjzG}j(Q-?)Fe z2Dg0-1Cjg?L9o&Ux`#Dnj4H&2F6CQv?&(kyxJFUXukL7uu5rUB1p4i6A%jYI)95eOHw7#(XXHqacOf0~@iud#?r+v=X*bB+dEBdrI(_M-s#tux~ikVE38!s%l$A+Kc)YP#|u zhNTEp7K83MJh*jLvz~$%xjcon&@bij)3=4Mv{{SL7fCGVgv7yRMg4Dc^oHzEVJZrz zP+C5cZ`^;zBNPAm>i49wNiyy390-Z~$PI`X*;PcTEUA}tK&58!28;5GQ2G6+F~`fS zEd*_7wuL`~TFxq;?I!haKxu3Gf*m|2LN!YksSImsY3gYZ=u)=Ao)nS~5I+9It6Slw zvKh>JGYbeKW#^g{rU7Y=H$-p;2$lC4S}qavYJ%S_3P2udDo-tex+&oHO?b*?6^wfw zES;)zIl^LwJzirzf|KDUi%v@F4nXXWi`Rl4ygkt|)#`k`v*)mdxH5xDDYR`~pM$b+ z(NA->ER@bWtjC-8O0?Cdg2R8qrc|C$`iG=J^gJ^CU@z|y?VugMqySL>oA>OpS4nnWxTPru&9 zSH5k|-`nYML>9XKp?K(a(&Lc8a>b>0K_}fy(&Xk zW;tv?#X+P>xAywm3wGPaegYWy0s-MlHmS0!L+!EHP7O4#VdDBM>tYC5YT0|D)1gIV z)VN0q2d5CuVdE_jG9GtcIY1XSlV-v%RG=Z{xqcFhSxYCWF*;NxO9P3rNyc{aV}DJZ z3$Mmhu5c-E_uL3A%>KhSNJ-I(0)89GsvHvE{OIk~@3|3|8`fOkJ#6Dh9g9xqWo$Fr<-zW?ab~VB<&~sQY){4dh zCu{1x)_3WZaZ#$mn>IBf{uu1*Q(+lb12=&EHpag=#XBpPwnMIVHmE!a%wQzi%wk&F zGxu+LCA)Q#_kXtG+TWkjcepSmA@&KM^;Hm<|0tt&fuQI3>PsXM zy8Xkj;J-|>A~BjSAS(-DbG}0&8#9Md^{q&fRq{`_Y(>Z5mgT+_(*?iv64GTjMTDsE7r1ARr(|c)1>tU+Of+`! z63{YLm7$Je3Aq0ba@&ZdH&0pZ>`BAG0#Qgn#w=Bg!tjll8swZNi#TZ|uMmv!s0$9o sk5GuHp$n%-Z=j$9F+$`vxCD(za^;ABf(Z`_oxVsA4n(2f50fAO06a0(XaE2J literal 0 HcmV?d00001 diff --git a/tests/inpainting/coyote-mask.webp b/tests/inpainting/coyote-mask.webp new file mode 100644 index 0000000000000000000000000000000000000000..1716bd5f187160d8d33e35356a0f437b464e15fa GIT binary patch literal 1576 zcmV+@2G{vgNk&E>1^@t8MM6+kP&iB!1^@sr|G|F%)%cA7$&w`7wuXX5!=hl((4-70 zifBAo9zw&4FSN>BAZ{=V3BfoI4Q@;T- zO`e_#ut;88|Za(WC69Oob%Ho;*QYI+x?5 zra0KbOO7Vt@*NJ>(4yl}wB&daEgUddYD9$cLc@KYsQX@blFOd29F)eFndRpTn==$7erV zkCL`p50y4u50|!FkCe7vkCrxmK0;wL_!aydehoi9`vv@TNlerdehojO&)^sEQ~3Fs zgrxPyB$G6LOxl8O@C*1A{2YD>zlI+_Cbgg${Ay8B(0ox`(0Wl+(731tzmPLql`ZE5 zKZjp0N(&knHR0#*D>;i*2|3eMF*(as*>c|S;|HaNU&@)SipyE8O39h8O2}ESipd#Q zbwAA*{0e>!zh0G=Gp=gF&&AA^rNk_j<=YZ7U6w883BQIPKPol+T+D1)RLo*oLd)wwNdUdR6*)hRd3n?pnaRvC|8CZ>FM;WET3!R`ijr=p(zN$(D;u__4n(QHm8K z6@E-XszqbUk**qBjC|SHQk3h)h96UgdgZ9_V@VL#z72g$__5&E@T0?z_;dIXe-1z5 zui)44BmNYAEjji+ApS~l;+>F!Q||-{j>6C13n4f8UU2xe-1K{a&l`L<7W^E34L?5r z1^gU-4L?5r1^gU-oQ_36?;m$xAj!mnO~v?jL&2w#KcFLlGYQ@Zy1wOmkWlk5-Wo{g zc_=D458iB0h*<q36)IH5fPt-tK!xeN8~k4wNNHW}u*E z5CJ$(yszN@3CKI3%8&SDEv$+c_>Qb_X#J>NS=;G3^u;mVF04 z5!w}&Jhe@L^17ahUXeUG&nRip>o^nIdCJrEgpiiF1Di>mX1vgjnY7qi_-T5DiqBq) zKCQ24JG8~6mx5;nFAeeeT5Dk^`86&+doALmzNY0TuIfFNI*Av{&#sGfE_#k#W%9F4 zuV|Z#3*Fw^^sKf$mwt%<%h29&O6wl;6+9=d3MsN98D@Lf7}-O=#7_AsWcz?lD^4*f z>)Uc#DtUezV^%47c7Qf1O5W}R4Y)W!o0Li3_5(4gI^(CD?Ip-FLeS#2J*k$x9k=+b zc!o*Zw*EoxY1!ko?Cc#)vUSg^#Bc9yd-NAM0>*z)-`~$ei5}jceK>IV#KS3k^w`gP aEBNHGxVL~00rm&(?Ju*d{%(i<_74z>pBK{r literal 0 HcmV?d00001 diff --git a/tests/inpainting/original.json b/tests/inpainting/original.json new file mode 100644 index 00000000000..f057f6d0dc5 --- /dev/null +++ b/tests/inpainting/original.json @@ -0,0 +1,30 @@ +{ + "model": "stable diffusion", + "model_id": null, + "model_hash": "cc6cb27103417325ff94f52b7a5d2dde45a7515b25c255d8e396c90014281516", + "app_id": "invoke-ai/InvokeAI", + "app_version": "v2.2.3", + "image": { + "height": 512, + "steps": 50, + "facetool": "gfpgan", + "facetool_strength": 0, + "seed": 1948097268, + "perlin": 0, + "init_mask": null, + "width": 512, + "upscale": null, + "cfg_scale": 7.5, + "prompt": [ + { + "prompt": "a coyote, deep palette knife oil painting, red aloe, plants, desert landscape, award winning", + "weight": 1 + } + ], + "threshold": 0, + "postprocessing": null, + "sampler": "k_lms", + "variations": [], + "type": "txt2img" + } +} From 520c17ab86442c4042e8bfd5a2e834429a0fd9a6 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 10 Dec 2022 21:19:32 -0800 Subject: [PATCH 066/199] diffusers(AddsMaskedGuidance): partial fix for k-schedulers Prevents them from crashing, but results are still hot garbage. --- ldm/invoke/generator/diffusers_pipeline.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index e3bb47cec4a..85689f09277 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -90,7 +90,7 @@ def are_like_tensors(a: torch.Tensor, b: object) -> bool: class AddsMaskGuidance: mask: torch.FloatTensor mask_latents: torch.FloatTensor - _scheduler: SchedulerMixin + scheduler: SchedulerMixin noise: torch.Tensor _debug: Optional[Callable] = None @@ -117,10 +117,15 @@ def _t_for_field(self, field_name:str, t): def apply_mask(self, latents: torch.Tensor, t) -> torch.Tensor: batch_size = latents.size(0) mask = einops.repeat(self.mask, 'b c h w -> (repeat b) c h w', repeat=batch_size) + if t.dim() == 0: + # some schedulers expect t to be one-dimensional. + # TODO: file diffusers bug about inconsistency? + t = einops.repeat(t, '-> batch', batch=batch_size) # Noise shouldn't be re-randomized between steps here. The multistep schedulers # get very confused about what is happening from step to step when we do that. - mask_latents = self._scheduler.add_noise(self.mask_latents, self.noise, t) + mask_latents = self.scheduler.add_noise(self.mask_latents, self.noise, t) # TODO: Do we need to also apply scheduler.scale_model_input? Or is add_noise appropriately scaled already? + # mask_latents = self.scheduler.scale_model_input(mask_latents, t) mask_latents = einops.repeat(mask_latents, 'b c h w -> (repeat b) c h w', repeat=batch_size) masked_input = torch.lerp(mask_latents.to(dtype=latents.dtype), latents, mask.to(dtype=latents.dtype)) if self._debug: From 0b48f2e801f6b6bfee7c9f107b26116523882502 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 12 Dec 2022 16:17:10 -0800 Subject: [PATCH 067/199] fix --safety_checker arg parsing and add note to diffusers loader about where safety checker gets called --- ldm/invoke/args.py | 18 +++++++++--------- ldm/invoke/model_cache.py | 7 ++++++- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index aa9b94ffaf5..c18a5d35344 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -81,18 +81,18 @@ """ import argparse -from argparse import Namespace, RawTextHelpFormatter -import pydoc -import json +import base64 +import copy +import functools import hashlib +import json import os +import pydoc import re -import sys import shlex -import copy -import base64 -import functools -import warnings +import sys +from argparse import Namespace + import ldm.invoke.pngwriter from ldm.invoke.globals import Globals from ldm.invoke.prompt_parser import split_weighted_subprompts @@ -477,7 +477,7 @@ def _create_arg_parser(self): default='auto', ) model_group.add_argument( - '--nsfw_checker' + '--nsfw_checker', '--safety_checker', action=argparse.BooleanOptionalAction, dest='safety_checker', diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index f57dc6de198..0bfbfe32be7 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -356,7 +356,12 @@ def _load_diffusers_model(self, mconfig): pipeline = StableDiffusionGeneratorPipeline.from_pretrained( name_or_path, - safety_checker=None, # TODO + # TODO: Safety checker is currently handled at a different stage in the code: + # ldm.invoke.generator.base.Generator.safety_check + # We might want to move that here for consistency with diffusers API, or we might + # want to leave it as a separate processing node. It ends up using the same diffusers + # code either way, so we can table it for now. + safety_checker=None, # TODO: alternate VAE # TODO: local_files_only=True **pipeline_args From 31fd285c82ea0292740edcbb5a2c917f8cab1d86 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 12 Dec 2022 16:17:46 -0800 Subject: [PATCH 068/199] generate: fix import error --- ldm/generate.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 79a543247ea..2d60a479cdd 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -16,6 +16,7 @@ import skimage import torch import transformers +from PIL import Image, ImageOps from diffusers import HeunDiscreteScheduler from diffusers.pipeline_utils import DiffusionPipeline from diffusers.schedulers.scheduling_ddim import DDIMScheduler @@ -26,9 +27,9 @@ from diffusers.schedulers.scheduling_lms_discrete import LMSDiscreteScheduler from diffusers.schedulers.scheduling_pndm import PNDMScheduler from omegaconf import OmegaConf -from PIL import Image, ImageOps from pytorch_lightning import seed_everything, logging +import ldm.invoke.conditioning from ldm.invoke.args import metadata_from_png from ldm.invoke.concepts_lib import Concepts from ldm.invoke.conditioning import get_uc_and_c_and_ec @@ -616,9 +617,9 @@ def apply_postprocessor( # used by multiple postfixers # todo: cross-attention control uc, c, extra_conditioning_info = get_uc_and_c_and_ec( - prompt, model =self.model, + prompt, model=self.model, skip_normalize_legacy_blend=opt.skip_normalize, - log_tokens =ldm.invoke.conditioning.log_tokenization + log_tokens=ldm.invoke.conditioning.log_tokenization ) if tool in ('gfpgan','codeformer','upscale'): From 26eac9d94ba2f3a794316ef14ea9dfd9413a8dee Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 13 Dec 2022 16:13:44 -0800 Subject: [PATCH 069/199] CI: don't try to read the old init location --- .github/workflows/test-invoke-pip.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index d6cd0ac891d..14658be02b2 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -113,10 +113,6 @@ jobs: --no-interactive --yes \ --full-precision # can't use fp16 weights without a GPU - - name: cat ~/.invokeai - id: cat-invokeai - run: cat ~/.invokeai - - name: Run the tests id: run-tests if: matrix.os != 'windows-2022' From 3607042c9da617b7c6dd7bc224a72cf63040cf7e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 14 Dec 2022 09:05:45 -0800 Subject: [PATCH 070/199] diffusers: support loading an alternate VAE --- configs/models.yaml.example | 16 +++++++++--- ldm/invoke/model_cache.py | 49 ++++++++++++++++++++++++++++++++++--- 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/configs/models.yaml.example b/configs/models.yaml.example index 1eb96638969..5187d29b3ea 100644 --- a/configs/models.yaml.example +++ b/configs/models.yaml.example @@ -6,14 +6,24 @@ # and the width and height of the images it # was trained on. diffusers-1.4: - description: Diffusers version of Stable Diffusion version 1.4 + description: 🤗🧨 Stable Diffusion v1.4 format: diffusers repo_name: CompVis/stable-diffusion-v1-4 - default: true diffusers-1.5: - description: Diffusers version of Stable Diffusion version 1.5 + description: 🤗🧨 Stable Diffusion v1.5 format: diffusers repo_name: runwayml/stable-diffusion-v1-5 + default: true +diffusers-1.5+mse: + description: 🤗🧨 Stable Diffusion v1.5 + MSE-finetuned VAE + format: diffusers + repo_name: runwayml/stable-diffusion-v1-5 + vae: + repo_name: stabilityai/sd-vae-ft-mse +diffusers-inpainting-1.5: + description: 🤗🧨 inpainting for Stable Diffusion v1.5 + format: diffusers + repo_name: runwayml/stable-diffusion-inpainting stable-diffusion-1.5: description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 0bfbfe32be7..e6f1c518f32 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -21,6 +21,7 @@ import torch import transformers +from diffusers import AutoencoderKL from huggingface_hub import hf_hub_download from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf @@ -337,14 +338,20 @@ def _load_diffusers_model(self, mconfig): # TODO: scan weights maybe? + if 'vae' in mconfig: + vae = self._load_vae(mconfig['vae']) + pipeline_args.update(vae=vae) + if self.precision == 'float16': print(' | Using faster float16 precision') if not isinstance(name_or_path, Path): + # hub has no explicit API for different data types, but the main Stable Diffusion + # releases set a precedent for putting float16 weights in a fp16 branch. try: hf_hub_download(name_or_path, "model_index.json", revision="fp16") - except RevisionNotFoundError as e: - pass + except RevisionNotFoundError: + pass # no such branch, assume we should use the default. else: pipeline_args.update(revision="fp16") @@ -362,7 +369,6 @@ def _load_diffusers_model(self, mconfig): # want to leave it as a separate processing node. It ends up using the same diffusers # code either way, so we can table it for now. safety_checker=None, - # TODO: alternate VAE # TODO: local_files_only=True **pipeline_args ) @@ -535,3 +541,40 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: with open(hashpath,'w') as f: f.write(hash) return hash + + def _load_vae(self, vae_config): + vae_args = {} + + if 'repo_name' in vae_config: + name_or_path = vae_config['repo_name'] + elif 'path' in vae_config: + name_or_path = Path(vae_config['path']) + if not name_or_path.is_absolute(): + name_or_path = Path(Globals.root, name_or_path).resolve() + else: + raise ValueError("VAE config must specify either repo_name or path.") + + print(f'>> Loading diffusers VAE from {name_or_path}') + if self.precision == 'float16': + print(' | Using faster float16 precision') + + if not isinstance(name_or_path, Path): + try: + hf_hub_download(name_or_path, "model_index.json", revision="fp16") + except RevisionNotFoundError: + pass + else: + vae_args.update(revision="fp16") + + vae_args.update(torch_dtype=torch.float16) + else: + # TODO: more accurately, "using the model's default precision." + # How do we find out what that is? + print(' | Using more accurate float32 precision') + + if 'subfolder' in vae_config: + vae_args['subfolder'] = vae_config['subfolder'] + + # At some point we might need to be able to use different classes here? But for now I think + # all Stable Diffusion VAE are AutoencoderKL. + return AutoencoderKL.from_pretrained(name_or_path, **vae_args) From 0605cf46097c801b791f6eff144a184d0ebee885 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 14 Dec 2022 09:14:25 -0800 Subject: [PATCH 071/199] CI: remove sh-syntax if-statement so it doesn't crash powershell --- .github/workflows/test-invoke-conda.yml | 6 ++---- .github/workflows/test-invoke-pip.yml | 6 ++---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index e3f1523162b..792f1e9d626 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -99,11 +99,9 @@ jobs: - name: run configure_invokeai.py id: run-preload-models + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} run: | - if [ "${HAVE_SECRETS}" == true ] ; then - mkdir -p ~/.huggingface - echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token - fi python scripts/configure_invokeai.py \ --no-interactive --yes \ --full-precision # can't use fp16 weights without a GPU diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 14658be02b2..9f0a69b1a42 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -104,11 +104,9 @@ jobs: - name: run configure_invokeai.py id: run-preload-models + env: + HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} run: | - if [ "${HAVE_SECRETS}" == true ] ; then - mkdir -p ~/.huggingface - echo -n '${{ secrets.HUGGINGFACE_TOKEN }}' > ~/.huggingface/token - fi python3 scripts/configure_invokeai.py \ --no-interactive --yes \ --full-precision # can't use fp16 weights without a GPU From 2c6db2e77cdb93180d7109abf21819d4e8c1d88e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 14 Dec 2022 10:43:16 -0800 Subject: [PATCH 072/199] CI: fold strings in yaml because backslash is not line-continuation in powershell --- .github/workflows/test-invoke-conda.yml | 24 ++++++++++++------------ .github/workflows/test-invoke-pip.yml | 24 ++++++++++++------------ 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 792f1e9d626..0f623416a68 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -101,10 +101,10 @@ jobs: id: run-preload-models env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} - run: | - python scripts/configure_invokeai.py \ - --no-interactive --yes \ - --full-precision # can't use fp16 weights without a GPU + run: > + python scripts/configure_invokeai.py + --no-interactive --yes + --full-precision # can't use fp16 weights without a GPU - name: cat invokeai.init id: cat-invokeai @@ -118,14 +118,14 @@ jobs: HF_HUB_OFFLINE: 1 HF_DATASETS_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 - run: | - time python scripts/invoke.py \ - --no-patchmatch \ - --no-nsfw_checker \ - --model ${{ matrix.stable-diffusion-model }} \ - --from_file ${{ env.TEST_PROMPTS }} \ - --root="${{ env.INVOKEAI_ROOT }}" \ - --outdir="${{ env.INVOKEAI_ROOT }}/outputs" + run: > + python scripts/invoke.py + --no-patchmatch + --no-nsfw_checker + --model ${{ matrix.stable-diffusion-model }} + --from_file ${{ env.TEST_PROMPTS }} + --root="${{ env.INVOKEAI_ROOT }}" + --outdir="${{ env.INVOKEAI_ROOT }}/outputs" - name: export conda env id: export-conda-env diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 9f0a69b1a42..fb4ff13a15a 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -106,10 +106,10 @@ jobs: id: run-preload-models env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} - run: | - python3 scripts/configure_invokeai.py \ - --no-interactive --yes \ - --full-precision # can't use fp16 weights without a GPU + run: > + python3 scripts/configure_invokeai.py + --no-interactive --yes + --full-precision # can't use fp16 weights without a GPU - name: Run the tests id: run-tests @@ -119,14 +119,14 @@ jobs: HF_HUB_OFFLINE: 1 HF_DATASETS_OFFLINE: 1 TRANSFORMERS_OFFLINE: 1 - run: | - time python3 scripts/invoke.py \ - --no-patchmatch \ - --no-nsfw_checker \ - --model ${{ matrix.stable-diffusion-model }} \ - --from_file ${{ env.TEST_PROMPTS }} \ - --root="${{ env.INVOKEAI_ROOT }}" \ - --outdir="${{ env.INVOKEAI_OUTDIR }}" + run: > + python3 scripts/invoke.py + --no-patchmatch + --no-nsfw_checker + --model ${{ matrix.stable-diffusion-model }} + --from_file ${{ env.TEST_PROMPTS }} + --root="${{ env.INVOKEAI_ROOT }}" + --outdir="${{ env.INVOKEAI_OUTDIR }}" - name: Archive results id: archive-results From 23eb80b40421b2bb8f4b6d3dd30490d11c447b36 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Wed, 14 Dec 2022 21:04:55 +0100 Subject: [PATCH 073/199] attention maps callback stuff for diffusers --- ldm/invoke/generator/base.py | 1 - ldm/invoke/generator/diffusers_pipeline.py | 62 ++++++--- ldm/invoke/generator/img2img.py | 7 +- ldm/invoke/generator/inpaint.py | 2 + ldm/invoke/generator/txt2img.py | 8 +- ldm/invoke/generator/txt2img2img.py | 2 +- .../diffusion/cross_attention_control.py | 122 ------------------ ldm/models/diffusion/ksampler.py | 10 +- 8 files changed, 63 insertions(+), 151 deletions(-) diff --git a/ldm/invoke/generator/base.py b/ldm/invoke/generator/base.py index f207b3dc244..68c5ccdeff5 100644 --- a/ldm/invoke/generator/base.py +++ b/ldm/invoke/generator/base.py @@ -63,7 +63,6 @@ def set_variation(self, seed, variation_amount, with_variations): def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None, image_callback=None, step_callback=None, threshold=0.0, perlin=0.0, safety_checker:dict=None, - attention_maps_callback = None, **kwargs): scope = choose_autocast(self.precision) self.safety_checker = safety_checker diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 85689f09277..410761cd794 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -7,12 +7,14 @@ import PIL.Image import einops +import numpy as np import torch import torchvision.transforms as T from diffusers.models import attention from diffusers.utils.import_utils import is_xformers_available from ...models.diffusion import cross_attention_control +from ...models.diffusion.cross_attention_map_saving import AttentionMapSaver # monkeypatch diffusers CrossAttention 🙈 # this is to make prompt2prompt and (future) attention maps work @@ -41,6 +43,7 @@ class PipelineIntermediateState: timestep: int latents: torch.Tensor predicted_original: Optional[torch.Tensor] = None + attention_map_saver: Optional[AttentionMapSaver] = None # copied from configs/stable-diffusion/v1-inference.yaml @@ -180,6 +183,17 @@ def __call__(self, *args: ParamType.args, raise AssertionError("why was that an empty generator?") return result +@dataclass +class InvokeAIStableDiffusionPipelineOutput(StableDiffusionPipelineOutput): + r""" + Output class for InvokeAI's Stable Diffusion pipeline. + + Args: + attention_map_saver (`AttentionMapSaver`): Object containing attention maps that can be displayed to the user + after generation completes. Optional. + """ + attention_map_saver: Optional[AttentionMapSaver] + class StableDiffusionGeneratorPipeline(StableDiffusionPipeline): r""" @@ -255,7 +269,7 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, *, callback: Callable[[PipelineIntermediateState], None]=None, extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo=None, run_id=None, - **extra_step_kwargs) -> StableDiffusionPipelineOutput: + **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: r""" Function invoked when calling the pipeline for generation. @@ -273,7 +287,7 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, :param run_id: :param extra_step_kwargs: """ - result_latents = self.latents_from_embeddings( + result_latents, result_attention_map_saver = self.latents_from_embeddings( latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, run_id=run_id, callback=callback, **extra_step_kwargs @@ -283,7 +297,7 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, with torch.inference_mode(): image = self.decode_latents(result_latents) - output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_map_saver) return self.check_for_safety(output, dtype=text_embeddings.dtype) def latents_from_embeddings( @@ -302,13 +316,14 @@ def latents_from_embeddings( self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) timesteps = self.scheduler.timesteps infer_latents_from_embeddings = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) - return infer_latents_from_embeddings( + result: PipelineIntermediateState = infer_latents_from_embeddings( latents, timesteps, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, additional_guidance=additional_guidance, run_id=run_id, callback=callback, - **extra_step_kwargs).latents + **extra_step_kwargs) + return result.latents, result.attention_map_saver def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, guidance_scale: float, *, @@ -334,6 +349,8 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, tex batched_t = torch.full((batch_size,), timesteps[0], dtype=timesteps.dtype, device=self.unet.device) + attention_map_saver: AttentionMapSaver = None + self.invokeai_diffuser.remove_attention_map_saving() for i, t in enumerate(self.progress_bar(timesteps)): batched_t.fill_(t) step_output = self.step(batched_t, latents, guidance_scale, @@ -342,9 +359,18 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, tex **extra_step_kwargs) latents = step_output.prev_sample predicted_original = getattr(step_output, 'pred_original_sample', None) + + if i == len(timesteps)-1 and extra_conditioning_info is not None: + eos_token_index = extra_conditioning_info.tokens_count_including_eos_bos - 1 + attention_map_token_ids = range(1, eos_token_index) + attention_map_saver = AttentionMapSaver(token_ids=attention_map_token_ids, latents_shape=latents.shape[-2:]) + self.invokeai_diffuser.setup_attention_map_saving(attention_map_saver) + yield PipelineIntermediateState(run_id=run_id, step=i, timestep=int(t), latents=latents, - predicted_original=predicted_original) - return latents + predicted_original=predicted_original, attention_map_saver=attention_map_saver) + + self.invokeai_diffuser.remove_attention_map_saving() + return latents, attention_map_saver @torch.inference_mode() def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, @@ -393,7 +419,7 @@ def img2img_from_embeddings(self, extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, run_id=None, noise_func=None, - **extra_step_kwargs) -> StableDiffusionPipelineOutput: + **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) @@ -412,7 +438,7 @@ def img2img_from_embeddings(self, def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, strength, extra_conditioning_info, - noise_func, run_id=None, callback=None, **extra_step_kwargs): + noise_func, run_id=None, callback=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device batch_size = initial_latents.size(0) img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) @@ -423,7 +449,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste noised_latents = self.scheduler.add_noise(initial_latents, noise, latent_timestep) latents = noised_latents - result_latents = self.latents_from_embeddings( + result_latents, result_attention_maps = self.latents_from_embeddings( latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, timesteps=timesteps, @@ -435,7 +461,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste with torch.inference_mode(): image = self.decode_latents(result_latents) - output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_maps) return self.check_for_safety(output, dtype=text_embeddings.dtype) def inpaint_from_embeddings( @@ -450,7 +476,7 @@ def inpaint_from_embeddings( extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, run_id=None, noise_func=None, - **extra_step_kwargs) -> StableDiffusionPipelineOutput: + **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device latents_dtype = self.unet.dtype batch_size = 1 @@ -493,7 +519,7 @@ def inpaint_from_embeddings( guidance.append(AddsMaskGuidance(mask, init_image_latents, self.scheduler, noise)) try: - result_latents = self.latents_from_embeddings( + result_latents, result_attention_maps = self.latents_from_embeddings( latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, extra_conditioning_info=extra_conditioning_info, timesteps=timesteps, @@ -508,7 +534,7 @@ def inpaint_from_embeddings( with torch.inference_mode(): image = self.decode_latents(result_latents) - output = StableDiffusionPipelineOutput(images=image, nsfw_content_detected=[]) + output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_maps) return self.check_for_safety(output, dtype=text_embeddings.dtype) def non_noised_latents_from_image(self, init_image, *, device, dtype): @@ -523,7 +549,13 @@ def check_for_safety(self, output, dtype): with torch.inference_mode(): screened_images, has_nsfw_concept = self.run_safety_checker( output.images, device=self._execution_device, dtype=dtype) - return StableDiffusionPipelineOutput(screened_images, has_nsfw_concept) + screened_attention_map_saver = None + if has_nsfw_concept is None or not has_nsfw_concept: + screened_attention_map_saver = output.attention_map_saver + return InvokeAIStableDiffusionPipelineOutput(screened_images, + has_nsfw_concept, + # block the attention maps if NSFW content is detected + attention_map_saver=screened_attention_map_saver) @torch.inference_mode() def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fragment_weights=None): diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 6ea41fda33c..1a470d1ebf1 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -14,7 +14,9 @@ def __init__(self, model, precision): self.init_latent = None # by get_noise() def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, - conditioning,init_image,strength,step_callback=None,threshold=0.0,perlin=0.0,**kwargs): + conditioning,init_image,strength,step_callback=None,threshold=0.0,perlin=0.0, + attention_maps_callback=None, + **kwargs): """ Returns a function returning an image derived from the prompt and the initial image Return value depends on the seed at the time you call it. @@ -35,7 +37,8 @@ def make_image(x_T): noise_func=self.get_noise_like, callback=step_callback ) - + if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: + attention_maps_callback(pipeline_output.attention_map_saver) return pipeline.numpy_to_pil(pipeline_output.images)[0] return make_image diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 976121d720f..79fbd542c17 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -273,6 +273,8 @@ def make_image(x_T): callback=step_callback, ) + if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: + attention_maps_callback(pipeline_output.attention_map_saver) result = pipeline.numpy_to_pil(pipeline_output.images)[0] # Seam paint if this is our first pass (seam_size set to 0 during seam painting) diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index ef3d35dbca7..6c4c7a3f13a 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -37,14 +37,12 @@ def make_image(x_T) -> PIL.Image.Image: unconditioned_embeddings=uc, guidance_scale=cfg_scale, callback=step_callback, - extra_conditioning_info=extra_conditioning_info, + extra_conditioning_info=extra_conditioning_info # TODO: eta = ddim_eta, # TODO: threshold = threshold, - # FIXME: Attention Maps Callback merged from main, but not hooked up - # in diffusers branch yet. - keturn - # attention_maps_callback = attention_maps_callback, ) - + if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: + attention_maps_callback(pipeline_output.attention_map_saver) return pipeline.numpy_to_pil(pipeline_output.images)[0] return make_image diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 29a7106246f..56ebcc5bf43 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -36,7 +36,7 @@ def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_e def make_image(x_T): - first_pass_latent_output = pipeline.latents_from_embeddings( + first_pass_latent_output, _ = pipeline.latents_from_embeddings( latents=x_T, num_inference_steps=steps, text_embeddings=c, diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index b32ccecae7e..66c5567ebdb 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -442,128 +442,6 @@ def get_mem_free_total(device): return mem_free_total -class InvokeAICrossAttentionMixin: - """ - Enable InvokeAI-flavoured CrossAttention calculation, which does aggressive low-memory slicing and calls - through both to an attention_slice_wrangler and a slicing_strategy_getter for custom attention map wrangling - and dymamic slicing strategy selection. - """ - def __init__(self): - self.mem_total_gb = psutil.virtual_memory().total // (1 << 30) - self.attention_slice_wrangler = None - self.slicing_strategy_getter = None - - def set_attention_slice_wrangler(self, wrangler: Optional[Callable[[nn.Module, torch.Tensor, int, int, int], torch.Tensor]]): - ''' - Set custom attention calculator to be called when attention is calculated - :param wrangler: Callback, with args (module, suggested_attention_slice, dim, offset, slice_size), - which returns either the suggested_attention_slice or an adjusted equivalent. - `module` is the current CrossAttention module for which the callback is being invoked. - `suggested_attention_slice` is the default-calculated attention slice - `dim` is -1 if the attenion map has not been sliced, or 0 or 1 for dimension-0 or dimension-1 slicing. - If `dim` is >= 0, `offset` and `slice_size` specify the slice start and length. - - Pass None to use the default attention calculation. - :return: - ''' - self.attention_slice_wrangler = wrangler - - def set_slicing_strategy_getter(self, getter: Optional[Callable[[nn.Module], tuple[int,int]]]): - self.slicing_strategy_getter = getter - - def einsum_lowest_level(self, query, key, value, dim, offset, slice_size): - # calculate attention scores - #attention_scores = torch.einsum('b i d, b j d -> b i j', q, k) - if dim is not None: - print(f"sliced dim {dim}, offset {offset}, slice_size {slice_size}") - attention_scores = torch.baddbmm( - torch.empty(query.shape[0], query.shape[1], key.shape[1], dtype=query.dtype, device=query.device), - query, - key.transpose(-1, -2), - beta=0, - alpha=self.scale, - ) - - # calculate attention slice by taking the best scores for each latent pixel - default_attention_slice = attention_scores.softmax(dim=-1, dtype=attention_scores.dtype) - attention_slice_wrangler = self.attention_slice_wrangler - if attention_slice_wrangler is not None: - attention_slice = attention_slice_wrangler(self, default_attention_slice, dim, offset, slice_size) - else: - attention_slice = default_attention_slice - - hidden_states = torch.bmm(attention_slice, value) - return hidden_states - - def einsum_op_slice_dim0(self, q, k, v, slice_size): - r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) - for i in range(0, q.shape[0], slice_size): - end = i + slice_size - r[i:end] = self.einsum_lowest_level(q[i:end], k[i:end], v[i:end], dim=0, offset=i, slice_size=slice_size) - return r - - def einsum_op_slice_dim1(self, q, k, v, slice_size): - r = torch.zeros(q.shape[0], q.shape[1], v.shape[2], device=q.device, dtype=q.dtype) - for i in range(0, q.shape[1], slice_size): - end = i + slice_size - r[:, i:end] = self.einsum_lowest_level(q[:, i:end], k, v, dim=1, offset=i, slice_size=slice_size) - return r - - def einsum_op_mps_v1(self, q, k, v): - if q.shape[1] <= 4096: # (512x512) max q.shape[1]: 4096 - return self.einsum_lowest_level(q, k, v, None, None, None) - else: - slice_size = math.floor(2**30 / (q.shape[0] * q.shape[1])) - return self.einsum_op_slice_dim1(q, k, v, slice_size) - - def einsum_op_mps_v2(self, q, k, v): - if self.mem_total_gb > 8 and q.shape[1] <= 4096: - return self.einsum_lowest_level(q, k, v, None, None, None) - else: - return self.einsum_op_slice_dim0(q, k, v, 1) - - def einsum_op_tensor_mem(self, q, k, v, max_tensor_mb): - size_mb = q.shape[0] * q.shape[1] * k.shape[1] * q.element_size() // (1 << 20) - if size_mb <= max_tensor_mb: - return self.einsum_lowest_level(q, k, v, None, None, None) - div = 1 << int((size_mb - 1) / max_tensor_mb).bit_length() - if div <= q.shape[0]: - return self.einsum_op_slice_dim0(q, k, v, q.shape[0] // div) - return self.einsum_op_slice_dim1(q, k, v, max(q.shape[1] // div, 1)) - - def einsum_op_cuda(self, q, k, v): - # check if we already have a slicing strategy (this should only happen during cross-attention controlled generation) - slicing_strategy_getter = self.slicing_strategy_getter - if slicing_strategy_getter is not None: - (dim, slice_size) = slicing_strategy_getter(self) - if dim is not None: - # print("using saved slicing strategy with dim", dim, "slice size", slice_size) - if dim == 0: - return self.einsum_op_slice_dim0(q, k, v, slice_size) - elif dim == 1: - return self.einsum_op_slice_dim1(q, k, v, slice_size) - - # fallback for when there is no saved strategy, or saved strategy does not slice - mem_free_total = self.cached_mem_free_total or get_mem_free_total(q.device) - # Divide factor of safety as there's copying and fragmentation - return self.einsum_op_tensor_mem(q, k, v, mem_free_total / 3.3 / (1 << 20)) - - - def get_invokeai_attention_mem_efficient(self, q, k, v): - if q.device.type == 'cuda': - #print("in get_attention_mem_efficient with q shape", q.shape, ", k shape", k.shape, ", free memory is", get_mem_free_total(q.device)) - return self.einsum_op_cuda(q, k, v) - - if q.device.type == 'mps' or q.device.type == 'cpu': - if self.mem_total_gb >= 32: - return self.einsum_op_mps_v1(q, k, v) - return self.einsum_op_mps_v2(q, k, v) - - # Smaller slices are faster due to L2/L3/SLC caches. - # Tested on i7 with 8MB L3 cache. - return self.einsum_op_tensor_mem(q, k, v, 32) - - class InvokeAIDiffusersCrossAttention(diffusers.models.attention.CrossAttention, InvokeAICrossAttentionMixin): def __init__(self, **kwargs): diff --git a/ldm/models/diffusion/ksampler.py b/ldm/models/diffusion/ksampler.py index 336ce1d7a08..0038c481e8b 100644 --- a/ldm/models/diffusion/ksampler.py +++ b/ldm/models/diffusion/ksampler.py @@ -209,12 +209,12 @@ def route_callback(k_callback_values): model_wrap_cfg.prepare_to_sample(S, extra_conditioning_info=extra_conditioning_info) # setup attention maps saving. checks for None are because there are multiple code paths to get here. - attention_maps_saver = None + attention_map_saver = None if attention_maps_callback is not None and extra_conditioning_info is not None: eos_token_index = extra_conditioning_info.tokens_count_including_eos_bos - 1 attention_map_token_ids = range(1, eos_token_index) - attention_maps_saver = AttentionMapSaver(token_ids = attention_map_token_ids, latents_shape=x.shape[-2:]) - model_wrap_cfg.invokeai_diffuser.setup_attention_map_saving(attention_maps_saver) + attention_map_saver = AttentionMapSaver(token_ids = attention_map_token_ids, latents_shape=x.shape[-2:]) + model_wrap_cfg.invokeai_diffuser.setup_attention_map_saving(attention_map_saver) extra_args = { 'cond': conditioning, @@ -229,8 +229,8 @@ def route_callback(k_callback_values): ), None, ) - if attention_maps_saver is not None: - attention_maps_callback(attention_maps_saver) + if attention_map_saver is not None: + attention_maps_callback(attention_map_saver) return sampling_result # this code will support inpainting if and when ksampler API modified or From 12a88c8410b2c608c78e58b1557d5ab50a5c770e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:53:04 -0800 Subject: [PATCH 074/199] build: fix syntax error in environment-mac --- environments-and-requirements/environment-mac.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index dbb6c6717c2..f6d9816843b 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -49,7 +49,7 @@ dependencies: - sympy=1.10 - send2trash=1.8 - tensorboard=2.10 - - transformers=~4.25 + - transformers~=4.25 - pip: - getpass_asterisk - picklescan From 6e4dad60c866209001dda3dc55826e6b5a093991 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 14 Dec 2022 12:55:31 -0800 Subject: [PATCH 075/199] diffusers: add INITIAL_MODELS with diffusers-compatible repos --- configs/INITIAL_MODELS.diffusers.yaml | 56 +++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 configs/INITIAL_MODELS.diffusers.yaml diff --git a/configs/INITIAL_MODELS.diffusers.yaml b/configs/INITIAL_MODELS.diffusers.yaml new file mode 100644 index 00000000000..73a8d77c578 --- /dev/null +++ b/configs/INITIAL_MODELS.diffusers.yaml @@ -0,0 +1,56 @@ +stable-diffusion-1.5: + description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) + repo_id: runwayml/stable-diffusion-v1-5 + format: diffusers + recommended: true + width: 512 + height: 512 +inpainting-1.5: + description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB) + repo_id: runwayml/stable-diffusion-inpainting + format: diffusers + recommended: True + width: 512 + height: 512 +ft-mse-improved-autoencoder-840000: + description: StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) + repo_id: stabilityai/sd-vae-ft-mse + format: diffusers + recommended: True + width: 512 + height: 512 +stable-diffusion-1.4: + description: The original Stable Diffusion version 1.4 weight file (4.27 GB) + repo_id: CompVis/stable-diffusion-v1-4 + format: diffusers + recommended: False + width: 512 + height: 512 +waifu-diffusion-1.3: + description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27) + repo_id: hakurei/waifu-diffusion + format: diffusers + recommended: False + width: 512 + height: 512 +trinart-2.0: + description: An SD model finetuned with ~40,000 assorted high resolution manga/anime-style pictures (2.13 GB) + repo_id: naclbit/trinart_stable_diffusion_v2 + format: diffusers + recommended: False + width: 512 + height: 512 +papercut-1.0: + description: SD 1.5 fine-tuned for papercut art (use "PaperCut" in your prompts) (2.13 GB) + repo_id: Fictiverse/Stable_Diffusion_PaperCut_Model + format: diffusers + recommended: False + width: 512 + height: 512 +voxel_art-1.0: + description: Stable Diffusion trained on voxel art (use "VoxelArt" in your prompts) (4.27 GB) + repo_id: Fictiverse/Stable_Diffusion_VoxelArt_Model + format: diffusers + recommended: False + width: 512 + height: 512 From 87b60582bd5fee1ffadb01ca24c572effcdb1601 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Fri, 16 Dec 2022 14:10:16 +0100 Subject: [PATCH 076/199] re-enable the embedding manager; closes #1778 --- ldm/invoke/generator/diffusers_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 410761cd794..2e96903074a 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -562,7 +562,7 @@ def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fr """ Compatibility function for ldm.models.diffusion.ddpm.LatentDiffusion. """ - return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights) + return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights, embedding_manager=self.embedding_manager) @property def cond_stage_model(self): From 8bcca8cc2dd3900577a3e4225a37647a84bb087d Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sun, 18 Dec 2022 16:06:33 +0100 Subject: [PATCH 077/199] Squashed commit of the following: commit e4a956abc37fcb5cf188388b76b617bc5c8fda7d Author: Damian Stewart Date: Sun Dec 18 15:43:07 2022 +0100 import new load handling from EmbeddingManager and cleanup commit c4abe91a5ba0d415b45bf734068385668b7a66e6 Merge: 032e856e 1efc6397 Author: Damian Stewart Date: Sun Dec 18 15:09:53 2022 +0100 Merge branch 'feature_textual_inversion_mgr' into dev/diffusers_with_textual_inversion_manager commit 032e856eefb3bbc39534f5daafd25764bcfcef8b Merge: 8b4f0fe9 bc515e24 Author: Damian Stewart Date: Sun Dec 18 15:08:01 2022 +0100 Merge remote-tracking branch 'upstream/dev/diffusers' into dev/diffusers_with_textual_inversion_manager commit 1efc6397fc6e61c1aff4b0258b93089d61de5955 Author: Damian Stewart Date: Sun Dec 18 15:04:28 2022 +0100 cleanup and add performance notes commit e400f804ac471a0ca2ba432fd658778b20c7bdab Author: Damian Stewart Date: Sun Dec 18 14:45:07 2022 +0100 fix bug and update unit tests commit deb9ae0ae1016750e93ce8275734061f7285a231 Author: Damian Stewart Date: Sun Dec 18 14:28:29 2022 +0100 textual inversion manager seems to work commit 162e02505dec777e91a983c4d0fb52e950d25ff0 Merge: cbad4583 12769b3d Author: Damian Stewart Date: Sun Dec 18 11:58:03 2022 +0100 Merge branch 'main' into feature_textual_inversion_mgr commit cbad45836c6aace6871a90f2621a953f49433131 Author: Damian Stewart Date: Sun Dec 18 11:54:10 2022 +0100 use position embeddings commit 070344c69b0e0db340a183857d0a787b348681d3 Author: Damian Stewart Date: Sun Dec 18 11:53:47 2022 +0100 Don't crash CLI on exceptions commit b035ac8c6772dfd9ba41b8eeb9103181cda028f8 Author: Damian Stewart Date: Sun Dec 18 11:11:55 2022 +0100 add missing position_embeddings commit 12769b3d3562ef71e0f54946b532ad077e10043c Author: Damian Stewart Date: Fri Dec 16 13:33:25 2022 +0100 debugging why it don't work commit bafb7215eabe1515ca5e8388fd3bb2f3ac5362cf Author: Damian Stewart Date: Fri Dec 16 13:21:33 2022 +0100 debugging why it don't work commit 664a6e9e146b42d96703f0cc8baf8f5efec04ee1 Author: Damian Stewart Date: Fri Dec 16 12:48:38 2022 +0100 use TextualInversionManager in place of embeddings (wip, doesn't work) commit 8b4f0fe9d6e4e2643b36dfa27864294785d7ba4e Author: Damian Stewart Date: Fri Dec 16 12:48:38 2022 +0100 use TextualInversionManager in place of embeddings (wip, doesn't work) commit ffbe1ab11163ba712e353d89404e301d0e0c6cdf Merge: 6e4dad60 023df37e Author: Damian Stewart Date: Fri Dec 16 02:37:31 2022 +0100 Merge branch 'feature_textual_inversion_mgr' into dev/diffusers commit 023df37efffa67434f77def7fc3c9dfb29f699fd Author: Damian Stewart Date: Fri Dec 16 02:36:54 2022 +0100 cleanup commit 05fac594eaf79d0058e3c48deee93df603f136c2 Author: Damian Stewart Date: Fri Dec 16 02:07:49 2022 +0100 tweak error checking commit 009f32ed39a7280997c3ffab112adadee0b44279 Author: damian Date: Thu Dec 15 21:29:47 2022 +0100 unit tests passing for embeddings with vector length >1 commit beb1b08d9a98112ed2fe073580568e1a18698da3 Author: Damian Stewart Date: Thu Dec 15 13:39:09 2022 +0100 more explicit equality tests when overwriting commit 44d8a5a7c85cdabc9ce3a54fd0769a10597b3ca9 Author: Damian Stewart Date: Thu Dec 15 13:30:13 2022 +0100 wip textual inversion manager (unit tests passing for 1v embedding overwriting) commit 417c2b57d90924a839616bfb66804faab8039e4c Author: Damian Stewart Date: Thu Dec 15 12:30:55 2022 +0100 wip textual inversion manager (unit tests passing for base stuff + padding) commit 2e80872e3b6f7fd7d8eb8928822bd824b63cb2ff Author: Damian Stewart Date: Thu Dec 15 10:57:57 2022 +0100 wip new TextualInversionManager --- ldm/generate.py | 28 ++- ldm/invoke/CLI.py | 11 +- ldm/invoke/concepts_lib.py | 8 +- ldm/invoke/conditioning.py | 2 +- ldm/invoke/generator/diffusers_pipeline.py | 11 +- ldm/invoke/readline.py | 4 +- ldm/models/diffusion/ddpm.py | 4 + ldm/modules/embedding_manager.py | 103 +++------ ldm/modules/encoders/modules.py | 111 +++++---- ldm/modules/textual_inversion_manager.py | 241 ++++++++++++++++++++ tests/test_textual_inversion.py | 253 +++++++++++++++++++++ 11 files changed, 646 insertions(+), 130 deletions(-) create mode 100644 ldm/modules/textual_inversion_manager.py create mode 100644 tests/test_textual_inversion.py diff --git a/ldm/generate.py b/ldm/generate.py index 2d60a479cdd..61de50579b3 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -31,7 +31,7 @@ import ldm.invoke.conditioning from ldm.invoke.args import metadata_from_png -from ldm.invoke.concepts_lib import Concepts +from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary from ldm.invoke.conditioning import get_uc_and_c_and_ec from ldm.invoke.devices import choose_torch_device, choose_precision from ldm.invoke.globals import Globals @@ -443,7 +443,7 @@ def process_image(image,seed): self._set_sampler() # apply the concepts library to the prompt - prompt = self.concept_lib().replace_concepts_with_triggers(prompt, lambda concepts: self.load_concepts(concepts)) + prompt = self.huggingface_concepts_library.replace_concepts_with_triggers(prompt, lambda concepts: self.load_huggingface_concepts(concepts)) # bit of a hack to change the cached sampler's karras threshold to # whatever the user asked for @@ -550,7 +550,7 @@ def process_image(image,seed): print('**Interrupted** Partial results will be returned.') else: raise KeyboardInterrupt - except RuntimeError as e: + except (RuntimeError, Exception) as e: print(traceback.format_exc(), file=sys.stderr) print('>> Could not generate image.') @@ -867,19 +867,27 @@ def set_model(self,model_name): seed_everything(random.randrange(0, np.iinfo(np.uint32).max)) if self.embedding_path is not None: - self.model.embedding_manager.load( - self.embedding_path, self.precision == 'float32' or self.precision == 'autocast' - ) + for root, _, files in os.walk(self.embedding_path): + # loading textual inversions is slow + # see note in TextualInversionManager._get_or_create_token_id_and_assign_embedding() + verbose = len(files)>4 + for name in files: + if verbose: + print(f'>> Loading textual inversion from {name}') + ti_path = os.path.join(root, name) + self.model.textual_inversion_manager.load_textual_inversion(ti_path) + print(f'>> Textual inversions available: {", ".join(self.model.textual_inversion_manager.get_all_trigger_strings())}') self.model_name = model_name self._set_sampler() # requires self.model_name to be set first return self.model - def load_concepts(self,concepts:list[str]): - self.model.embedding_manager.load_concepts(concepts, self.precision=='float32' or self.precision=='autocast') + def load_huggingface_concepts(self, concepts:list[str]): + self.model.textual_inversion_manager.load_huggingface_concepts(concepts) - def concept_lib(self)->Concepts: - return self.model.embedding_manager.concepts_library + @property + def huggingface_concepts_library(self) -> HuggingFaceConceptsLibrary: + return self.model.textual_inversion_manager.hf_concepts_library def correct_colors(self, image_list, diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index d6804ed61a3..45687703f6a 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -16,7 +16,7 @@ from ldm.invoke.pngwriter import PngWriter, retrieve_metadata, write_metadata from ldm.invoke.image_util import make_grid from ldm.invoke.log import write_log -from ldm.invoke.concepts_lib import Concepts +from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary from omegaconf import OmegaConf from pathlib import Path import pyparsing @@ -141,6 +141,10 @@ def main(): main_loop(gen, opt) except KeyboardInterrupt: print("\ngoodbye!") + except Exception: + print(">> An error occurred:") + traceback.print_exc() + # TODO: main_loop() has gotten busy. Needs to be refactored. def main_loop(gen, opt): @@ -318,7 +322,7 @@ def image_writer(image, seed, upscaled=False, first_seed=None, use_prefix=None, if use_prefix is not None: prefix = use_prefix postprocessed = upscaled if upscaled else operation=='postprocess' - opt.prompt = gen.concept_lib().replace_triggers_with_concepts(opt.prompt or prompt_in) # to avoid the problem of non-unique concept triggers + opt.prompt = gen.huggingface_concepts_library.replace_triggers_with_concepts(opt.prompt or prompt_in) # to avoid the problem of non-unique concept triggers filename, formatted_dream_prompt = prepare_image_metadata( opt, prefix, @@ -817,7 +821,8 @@ def add_embedding_terms(gen,completer): Called after setting the model, updates the autocompleter with any terms loaded by the embedding manager. ''' - completer.add_embedding_terms(gen.model.embedding_manager.list_terms()) + trigger_strings = gen.model.textual_inversion_manager.get_all_trigger_strings() + completer.add_embedding_terms(trigger_strings) def split_variations(variations_string) -> list: # shotgun parsing, woo diff --git a/ldm/invoke/concepts_lib.py b/ldm/invoke/concepts_lib.py index e40eda814d3..92bf67ca800 100644 --- a/ldm/invoke/concepts_lib.py +++ b/ldm/invoke/concepts_lib.py @@ -12,7 +12,7 @@ from huggingface_hub import HfFolder, hf_hub_url, ModelSearchArguments, ModelFilter, HfApi from ldm.invoke.globals import Globals -class Concepts(object): +class HuggingFaceConceptsLibrary(object): def __init__(self, root=None): ''' Initialize the Concepts object. May optionally pass a root directory. @@ -119,11 +119,11 @@ def get_concept_file(self, concept_name:str, file_name:str='learned_embeds.bin' self.download_concept(concept_name) path = os.path.join(self._concept_path(concept_name), file_name) return path if os.path.exists(path) else None - + def concept_is_downloaded(self, concept_name)->bool: concept_directory = self._concept_path(concept_name) return os.path.exists(concept_directory) - + def download_concept(self,concept_name)->bool: repo_id = self._concept_id(concept_name) dest = self._concept_path(concept_name) @@ -136,7 +136,7 @@ def download_concept(self,concept_name)->bool: os.makedirs(dest, exist_ok=True) succeeded = True - + bytes = 0 def tally_download_size(chunk, size, total): nonlocal bytes diff --git a/ldm/invoke/conditioning.py b/ldm/invoke/conditioning.py index cf6e84ec608..c24e69b27fa 100644 --- a/ldm/invoke/conditioning.py +++ b/ldm/invoke/conditioning.py @@ -238,7 +238,7 @@ def _get_embeddings_and_tokens_for_prompt(model, flattened_prompt: FlattenedProm def _get_tokens_length(model, fragments: list[Fragment]): fragment_texts = [x.text for x in fragments] - tokens = model.cond_stage_model.get_tokens(fragment_texts, include_start_and_end_markers=False) + tokens = model.cond_stage_model.get_token_ids(fragment_texts, include_start_and_end_markers=False) return sum([len(x) for x in tokens]) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 2e96903074a..c4be2b82304 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -32,7 +32,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent -from ldm.modules.embedding_manager import EmbeddingManager +from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.modules.encoders.modules import WeightedFrozenCLIPEmbedder @@ -238,7 +238,8 @@ def __init__( scheduler: Union[DDIMScheduler, PNDMScheduler, LMSDiscreteScheduler], safety_checker: Optional[StableDiffusionSafetyChecker], feature_extractor: Optional[CLIPFeatureExtractor], - requires_safety_checker: bool = False + requires_safety_checker: bool = False, + precision: str = 'full', ): super().__init__(vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker) @@ -258,7 +259,9 @@ def __init__( transformer=self.text_encoder ) self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) - self.embedding_manager = EmbeddingManager(self.clip_embedder, **_default_personalization_config_params) + use_full_precision = (precision == 'float32' or precision == 'autocast') + self.textual_inversion_manager = TextualInversionManager(self.clip_embedder, use_full_precision) + self.clip_embedder.set_textual_inversion_manager(self.textual_inversion_manager) if is_xformers_available(): self.enable_xformers_memory_efficient_attention() @@ -562,7 +565,7 @@ def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fr """ Compatibility function for ldm.models.diffusion.ddpm.LatentDiffusion. """ - return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights, embedding_manager=self.embedding_manager) + return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights) @property def cond_stage_model(self): diff --git a/ldm/invoke/readline.py b/ldm/invoke/readline.py index 9c21180dea5..376e0092960 100644 --- a/ldm/invoke/readline.py +++ b/ldm/invoke/readline.py @@ -12,7 +12,7 @@ import re import atexit from ldm.invoke.args import Args -from ldm.invoke.concepts_lib import Concepts +from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary from ldm.invoke.globals import Globals # ---------------readline utilities--------------------- @@ -276,7 +276,7 @@ def add_embedding_terms(self, terms:list[str]): def _concept_completions(self, text, state): if self.concepts is None: - self.concepts = set(Concepts().list_concepts()) + self.concepts = set(HuggingFaceConceptsLibrary().list_concepts()) self.embedding_terms.update(self.concepts) partial = text[1:] # this removes the leading '<' diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py index e2e38459ff7..d9fa762f0b9 100644 --- a/ldm/models/diffusion/ddpm.py +++ b/ldm/models/diffusion/ddpm.py @@ -22,6 +22,7 @@ from omegaconf import ListConfig import urllib +from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.util import ( log_txt_as_img, exists, @@ -678,6 +679,9 @@ def __init__( self.embedding_manager = self.instantiate_embedding_manager( personalization_config, self.cond_stage_model ) + self.textual_inversion_manager = TextualInversionManager(self.cond_stage_model, full_precision=True) + # this circular component dependency is gross and bad, needs to be rethought + self.cond_stage_model.set_textual_inversion_manager(self.textual_inversion_manager) self.emb_ckpt_counter = 0 diff --git a/ldm/modules/embedding_manager.py b/ldm/modules/embedding_manager.py index af9383bbd6c..36704eb64b9 100644 --- a/ldm/modules/embedding_manager.py +++ b/ldm/modules/embedding_manager.py @@ -1,11 +1,12 @@ import os.path from cmath import log import torch +from attr import dataclass from torch import nn import sys -from ldm.invoke.concepts_lib import Concepts +from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary from ldm.data.personalized import per_img_token_list from transformers import CLIPTokenizer from functools import partial @@ -14,35 +15,22 @@ PROGRESSIVE_SCALE = 2000 -def get_clip_token_for_string(tokenizer, string): - batch_encoding = tokenizer( - string, - truncation=True, - max_length=77, - return_length=True, - return_overflowing_tokens=False, - padding='max_length', - return_tensors='pt', - ) - tokens = batch_encoding['input_ids'] - """ assert ( - torch.count_nonzero(tokens - 49407) == 2 - ), f"String '{string}' maps to more than a single token. Please use another string" """ +def get_clip_token_id_for_string(tokenizer: CLIPTokenizer, token_str: str) -> int: + token_id = tokenizer.convert_tokens_to_ids(token_str) + return token_id - return tokens[0, 1] - - -def get_bert_token_for_string(tokenizer, string): +def get_bert_token_id_for_string(tokenizer, string) -> int: token = tokenizer(string) # assert torch.count_nonzero(token) == 3, f"String '{string}' maps to more than a single token. Please use another string" - token = token[0, 1] + return token.item() - return token +def get_embedding_for_clip_token_id(embedder, token_id): + if type(token_id) is not torch.Tensor: + token_id = torch.tensor(token_id, dtype=torch.int) + return embedder(token_id.unsqueeze(0))[0, 0] -def get_embedding_for_clip_token(embedder, token): - return embedder(token.unsqueeze(0))[0, 0] class EmbeddingManager(nn.Module): def __init__( @@ -58,8 +46,7 @@ def __init__( super().__init__() self.embedder = embedder - self.concepts_library=Concepts() - self.concepts_loaded = dict() + self.concepts_library=HuggingFaceConceptsLibrary() self.string_to_token_dict = {} self.string_to_param_dict = nn.ParameterDict() @@ -77,11 +64,11 @@ def __init__( embedder, 'tokenizer' ): # using Stable Diffusion's CLIP encoder self.is_clip = True - get_token_for_string = partial( - get_clip_token_for_string, embedder.tokenizer + get_token_id_for_string = partial( + get_clip_token_id_for_string, embedder.tokenizer ) - get_embedding_for_tkn = partial( - get_embedding_for_clip_token, + get_embedding_for_tkn_id = partial( + get_embedding_for_clip_token_id, embedder.transformer.text_model.embeddings, ) # per bug report #572 @@ -89,10 +76,10 @@ def __init__( token_dim = 768 else: # using LDM's BERT encoder self.is_clip = False - get_token_for_string = partial( - get_bert_token_for_string, embedder.tknz_fn + get_token_id_for_string = partial( + get_bert_token_id_for_string, embedder.tknz_fn ) - get_embedding_for_tkn = embedder.transformer.token_emb + get_embedding_for_tkn_id = embedder.transformer.token_emb token_dim = 1280 if per_image_tokens: @@ -100,15 +87,13 @@ def __init__( for idx, placeholder_string in enumerate(placeholder_strings): - token = get_token_for_string(placeholder_string) + token_id = get_token_id_for_string(placeholder_string) if initializer_words and idx < len(initializer_words): - init_word_token = get_token_for_string(initializer_words[idx]) + init_word_token_id = get_token_id_for_string(initializer_words[idx]) with torch.no_grad(): - init_word_embedding = get_embedding_for_tkn( - init_word_token.cpu() - ) + init_word_embedding = get_embedding_for_tkn_id(init_word_token_id) token_params = torch.nn.Parameter( init_word_embedding.unsqueeze(0).repeat( @@ -132,7 +117,7 @@ def __init__( ) ) - self.string_to_token_dict[placeholder_string] = token + self.string_to_token_dict[placeholder_string] = token_id self.string_to_param_dict[placeholder_string] = token_params def forward( @@ -140,6 +125,8 @@ def forward( tokenized_text, embedded_text, ): + # torch.save(embedded_text, '/tmp/embedding-manager-uglysonic-pre-rewrite.pt') + b, n, device = *tokenized_text.shape, tokenized_text.device for ( @@ -164,7 +151,7 @@ def forward( ) placeholder_rows, placeholder_cols = torch.where( - tokenized_text == placeholder_token.to(tokenized_text.device) + tokenized_text == placeholder_token ) if placeholder_rows.nelement() == 0: @@ -182,9 +169,7 @@ def forward( new_token_row = torch.cat( [ tokenized_text[row][:col], - placeholder_token.repeat(num_vectors_for_token).to( - device - ), + torch.tensor([placeholder_token] * num_vectors_for_token, device=device), tokenized_text[row][col + 1 :], ], axis=0, @@ -212,22 +197,6 @@ def save(self, ckpt_path): ckpt_path, ) - def load_concepts(self, concepts:list[str], full=True): - bin_files = list() - for concept_name in concepts: - if concept_name in self.concepts_loaded: - continue - else: - bin_file = self.concepts_library.get_concept_model_path(concept_name) - if not bin_file: - continue - bin_files.append(bin_file) - self.concepts_loaded[concept_name]=True - self.load(bin_files, full) - - def list_terms(self) -> list[str]: - return self.concepts_loaded.keys() - def load(self, ckpt_paths, full=True): if len(ckpt_paths) == 0: return @@ -241,7 +210,7 @@ def load(self, ckpt_paths, full=True): # both will be stored in this dictionary for term in self.string_to_param_dict.keys(): term = term.strip('<').strip('>') - self.concepts_loaded[term] = True + self.concepts_loaded[term] = True print(f'>> Current embedding manager terms: {", ".join(self.string_to_param_dict.keys())}') def _expand_directories(self, paths:list[str]): @@ -282,14 +251,16 @@ def add_embedding(self, token_str, embedding, full): if len(embedding.shape) == 1: embedding = embedding.unsqueeze(0) - num_tokens_added = self.embedder.tokenizer.add_tokens(token_str) - current_embeddings = self.embedder.transformer.resize_token_embeddings(None) - current_token_count = current_embeddings.num_embeddings - new_token_count = current_token_count + num_tokens_added - self.embedder.transformer.resize_token_embeddings(new_token_count) + existing_token_id = get_clip_token_id_for_string(self.embedder.tokenizer, token_str) + if existing_token_id == self.embedder.tokenizer.unk_token_id: + num_tokens_added = self.embedder.tokenizer.add_tokens(token_str) + current_embeddings = self.embedder.transformer.resize_token_embeddings(None) + current_token_count = current_embeddings.num_embeddings + new_token_count = current_token_count + num_tokens_added + self.embedder.transformer.resize_token_embeddings(new_token_count) - token = get_clip_token_for_string(self.embedder.tokenizer, token_str) - self.string_to_token_dict[token_str] = token + token_id = get_clip_token_id_for_string(self.embedder.tokenizer, token_str) + self.string_to_token_dict[token_str] = token_id self.string_to_param_dict[token_str] = torch.nn.Parameter(embedding) def parse_embedding(self, embedding_file: str): diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index cf460519333..e119bdef5ad 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -11,6 +11,7 @@ import kornia from ldm.invoke.devices import choose_torch_device from ldm.invoke.globals import Globals +#from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.modules.x_transformer import ( Encoder, @@ -477,7 +478,12 @@ class WeightedFrozenCLIPEmbedder(FrozenCLIPEmbedder): fragment_weights_key = "fragment_weights" return_tokens_key = "return_tokens" + def set_textual_inversion_manager(self, manager): #TextualInversionManager): + # TODO all of the weighting and expanding stuff needs be moved out of this class + self.textual_inversion_manager = manager + def forward(self, text: list, **kwargs): + # TODO all of the weighting and expanding stuff needs be moved out of this class ''' :param text: A batch of prompt strings, or, a batch of lists of fragments of prompt strings to which different @@ -572,19 +578,42 @@ def forward(self, text: list, **kwargs): else: return batch_z - def get_tokens(self, fragments: list[str], include_start_and_end_markers: bool = True) -> list[list[int]]: - tokens = self.tokenizer( + def get_token_ids(self, fragments: list[str], include_start_and_end_markers: bool = True) -> list[list[int]]: + """ + Convert a list of strings like `["a cat", "sitting", "on a mat"]` into a list of lists of token ids like + `[[bos, 0, 1, eos], [bos, 2, eos], [bos, 3, 0, 4, eos]]`. bos/eos markers are skipped if + `include_start_and_end_markers` is `False`. Each list will be restricted to the maximum permitted length + (typically 75 tokens + eos/bos markers). + + :param fragments: The strings to convert. + :param include_start_and_end_markers: + :return: + """ + # for args documentation see ENCODE_KWARGS_DOCSTRING in tokenization_utils_base.py (in `transformers` lib) + token_ids_list = self.tokenizer( fragments, truncation=True, max_length=self.max_length, return_overflowing_tokens=False, padding='do_not_pad', - return_tensors=None, # just give me a list of ints + return_tensors=None, # just give me lists of ints )['input_ids'] - if include_start_and_end_markers: - return tokens - else: - return [x[1:-1] for x in tokens] + + result = [] + for token_ids in token_ids_list: + # trim eos/bos + token_ids = token_ids[1:-1] + # pad for textual inversions with vector length >1 + token_ids = self.textual_inversion_manager.expand_textual_inversion_token_ids(token_ids) + # restrict length to max_length-2 (leaving room for bos/eos) + token_ids = token_ids[0:self.max_length - 2] + # add back eos/bos if requested + if include_start_and_end_markers: + token_ids = [self.tokenizer.bos_token_id] + token_ids + [self.tokenizer.eos_token_id] + + result.append(token_ids) + + return result @classmethod @@ -609,56 +638,58 @@ def get_tokens_and_weights(self, fragments: list[str], weights: list[float]) -> if len(fragments) == 0 and len(weights) == 0: fragments = [''] weights = [1] - item_encodings = self.tokenizer( - fragments, - truncation=True, - max_length=self.max_length, - return_overflowing_tokens=True, - padding='do_not_pad', - return_tensors=None, # just give me a list of ints - )['input_ids'] - all_tokens = [] + per_fragment_token_ids = self.get_token_ids(fragments, include_start_and_end_markers=False) + all_token_ids = [] per_token_weights = [] #print("all fragments:", fragments, weights) - for index, fragment in enumerate(item_encodings): - weight = weights[index] + for index, fragment in enumerate(per_fragment_token_ids): + weight = float(weights[index]) #print("processing fragment", fragment, weight) - fragment_tokens = item_encodings[index] - #print("fragment", fragment, "processed to", fragment_tokens) - # trim bos and eos markers before appending - all_tokens.extend(fragment_tokens[1:-1]) - per_token_weights.extend([weight] * (len(fragment_tokens) - 2)) - - if (len(all_tokens) + 2) > self.max_length: - excess_token_count = (len(all_tokens) + 2) - self.max_length + this_fragment_token_ids = per_fragment_token_ids[index] + #print("fragment", fragment, "processed to", this_fragment_token_ids) + # append + all_token_ids += this_fragment_token_ids + # fill out weights tensor with one float per token + per_token_weights += [weight] * len(this_fragment_token_ids) + + # leave room for bos/eos + if len(all_token_ids) > self.max_length - 2: + excess_token_count = len(all_token_ids) - self.max_length - 2 + # TODO build nice description string of how the truncation was applied + # this should be done by calling self.tokenizer.convert_ids_to_tokens() then passing the result to + # self.tokenizer.convert_tokens_to_string() for the token_ids on each side of the truncation limit. print(f">> Prompt is {excess_token_count} token(s) too long and has been truncated") - all_tokens = all_tokens[:self.max_length - 2] - per_token_weights = per_token_weights[:self.max_length - 2] + all_token_ids = all_token_ids[0:self.max_length] + per_token_weights = per_token_weights[0:self.max_length] # pad out to a 77-entry array: [eos_token, , eos_token, ..., eos_token] # (77 = self.max_length) - pad_length = self.max_length - 1 - len(all_tokens) - all_tokens.insert(0, self.tokenizer.bos_token_id) - all_tokens.extend([self.tokenizer.eos_token_id] * pad_length) - per_token_weights.insert(0, 1) - per_token_weights.extend([1] * pad_length) + all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id] + per_token_weights = [1.0] + per_token_weights + [1.0] + pad_length = self.max_length - len(all_token_ids) + all_token_ids += [self.tokenizer.eos_token_id] * pad_length + per_token_weights += [1.0] * pad_length - all_tokens_tensor = torch.tensor(all_tokens, dtype=torch.long).to(self.device) + all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long).to(self.device) per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch.float32).to(self.device) - #print(f"assembled all_tokens_tensor with shape {all_tokens_tensor.shape}") - return all_tokens_tensor, per_token_weights_tensor + #print(f"assembled all_token_ids_tensor with shape {all_token_ids_tensor.shape}") + return all_token_ids_tensor, per_token_weights_tensor - def build_weighted_embedding_tensor(self, tokens: torch.Tensor, per_token_weights: torch.Tensor, weight_delta_from_empty=True, **kwargs) -> torch.Tensor: + def build_weighted_embedding_tensor(self, token_ids: torch.Tensor, per_token_weights: torch.Tensor, weight_delta_from_empty=True, **kwargs) -> torch.Tensor: ''' Build a tensor representing the passed-in tokens, each of which has a weight. - :param tokens: A tensor of shape (77) containing token ids (integers) + :param token_ids: A tensor of shape (77) containing token ids (integers) :param per_token_weights: A tensor of shape (77) containing weights (floats) :param method: Whether to multiply the whole feature vector for each token or just its distance from an "empty" feature vector :param kwargs: passed on to self.transformer() :return: A tensor of shape (1, 77, 768) representing the requested weighted embeddings. ''' #print(f"building weighted embedding tensor for {tokens} with weights {per_token_weights}") - z = self.transformer(input_ids=tokens.unsqueeze(0), **kwargs) + if token_ids.shape != torch.Size([self.max_length]): + raise ValueError(f"token_ids has shape {token_ids.shape} - expected [{self.max_length}]") + + z = self.transformer(input_ids=token_ids.unsqueeze(0), **kwargs) + batch_weights_expanded = per_token_weights.reshape(per_token_weights.shape + (1,)).expand(z.shape) if weight_delta_from_empty: @@ -672,7 +703,7 @@ def build_weighted_embedding_tensor(self, tokens: torch.Tensor, per_token_weight z_delta_from_empty = z - empty_z weighted_z = empty_z + (z_delta_from_empty * batch_weights_expanded) - weighted_z_delta_from_empty = (weighted_z-empty_z) + #weighted_z_delta_from_empty = (weighted_z-empty_z) #print("weighted z has delta from empty with sum", weighted_z_delta_from_empty.sum().item(), "mean", weighted_z_delta_from_empty.mean().item() ) #print("using empty-delta method, first 5 rows:") diff --git a/ldm/modules/textual_inversion_manager.py b/ldm/modules/textual_inversion_manager.py new file mode 100644 index 00000000000..dc19037387f --- /dev/null +++ b/ldm/modules/textual_inversion_manager.py @@ -0,0 +1,241 @@ +import os +import traceback +from typing import Union + +import torch +from attr import dataclass +from picklescan.scanner import scan_file_path + +from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary +from ldm.modules.embedding_manager import get_clip_token_id_for_string +from ldm.modules.encoders.modules import FrozenCLIPEmbedder + + +@dataclass +class TextualInversion: + trigger_string: str + trigger_token_id: int + pad_token_ids: list[int] + embedding: torch.Tensor + + @property + def embedding_vector_length(self) -> int: + return self.embedding.shape[0] + +class TextualInversionManager(): + def __init__(self, clip_embedder: FrozenCLIPEmbedder, full_precision: bool=True): + self.clip_embedder = clip_embedder + self.full_precision = full_precision + self.hf_concepts_library = HuggingFaceConceptsLibrary() + default_textual_inversions: list[TextualInversion] = [] + self.textual_inversions = default_textual_inversions + + def load_huggingface_concepts(self, concepts: list[str]): + for concept_name in concepts: + if concept_name in self.hf_concepts_library.concepts_loaded: + continue + bin_file = self.hf_concepts_library.get_concept_model_path(concept_name) + if not bin_file: + continue + self.load_textual_inversion(bin_file) + self.hf_concepts_library.concepts_loaded[concept_name]=True + + def get_all_trigger_strings(self) -> list[str]: + return [ti.trigger_string for ti in self.textual_inversions] + + def load_textual_inversion(self, ckpt_path): + try: + scan_result = scan_file_path(ckpt_path) + if scan_result.infected_files == 1: + print(f'\n### Security Issues Found in Model: {scan_result.issues_count}') + print('### For your safety, InvokeAI will not load this embed.') + return + except Exception: + print(f"### WARNING::: Invalid or corrupt embeddings found. Ignoring: {ckpt_path}") + return + + embedding_info = self._parse_embedding(ckpt_path) + if embedding_info: + self._add_textual_inversion(embedding_info['name'], embedding_info['embedding']) + else: + print(f'>> Failed to load embedding located at {ckpt_path}. Unsupported file.') + + def _add_textual_inversion(self, trigger_str, embedding) -> int: + """ + Add a textual inversion to be recognised. + :param trigger_str: The trigger text in the prompt that activates this textual inversion. If unknown to the embedder's tokenizer, will be added. + :param embedding: The actual embedding data that will be inserted into the conditioning at the point where the token_str appears. + :return: The token id for the added embedding, either existing or newly-added. + """ + if trigger_str in [ti.trigger_string for ti in self.textual_inversions]: + print(f">> TextualInversionManager refusing to overwrite already-loaded token '{trigger_str}'") + return + if not self.full_precision: + embedding = embedding.half() + if len(embedding.shape) == 1: + embedding = embedding.unsqueeze(0) + elif len(embedding.shape) > 2: + raise ValueError(f"TextualInversionManager cannot add {trigger_str} because the embedding shape {embedding.shape} is incorrect. The embedding must have shape [token_dim] or [V, token_dim] where V is vector length and token_dim is 768 for SD1 or 1280 for SD2.") + + # for embeddings with vector length > 1 + pad_token_strings = [trigger_str + "-!pad-" + str(pad_index) for pad_index in range(1, embedding.shape[0])] + + try: + trigger_token_id = self._get_or_create_token_id_and_assign_embedding(trigger_str, embedding[0]) + # todo: batched UI for faster loading when vector length >2 + pad_token_ids = [self._get_or_create_token_id_and_assign_embedding(pad_token_str, embedding[1 + i]) \ + for (i, pad_token_str) in enumerate(pad_token_strings)] + self.textual_inversions.append(TextualInversion( + trigger_string=trigger_str, + trigger_token_id=trigger_token_id, + pad_token_ids=pad_token_ids, + embedding=embedding + )) + return trigger_token_id + + except ValueError: + traceback.print_exc() + print(f">> TextualInversionManager was unable to add a textual inversion with trigger string {trigger_str}.") + raise + + + def has_textual_inversion_for_trigger_string(self, trigger_string: str) -> bool: + try: + ti = self.get_textual_inversion_for_trigger_string(trigger_string) + return ti is not None + except StopIteration: + return False + + def get_textual_inversion_for_trigger_string(self, trigger_string: str) -> TextualInversion: + return next(ti for ti in self.textual_inversions if ti.trigger_string == trigger_string) + + + def get_textual_inversion_for_token_id(self, token_id: int) -> TextualInversion: + return next(ti for ti in self.textual_inversions if ti.trigger_token_id == token_id) + + def expand_textual_inversion_token_ids(self, prompt_token_ids: list[int]) -> list[int]: + """ + Insert padding tokens as necessary into the passed-in list of token ids to match any textual inversions it includes. + + :param prompt_token_ids: The prompt as a list of token ids (`int`s). Should not include bos and eos markers. + :return: The prompt token ids with any necessary padding to account for textual inversions inserted. May be too + long - caller is responsible for prepending/appending eos and bos token ids, and truncating if necessary. + """ + if len(prompt_token_ids) == 0: + return prompt_token_ids + + if prompt_token_ids[0] == self.clip_embedder.tokenizer.bos_token_id: + raise ValueError("prompt_token_ids must not start with bos_token_id") + if prompt_token_ids[-1] == self.clip_embedder.tokenizer.eos_token_id: + raise ValueError("prompt_token_ids must not end with eos_token_id") + textual_inversion_trigger_token_ids = [ti.trigger_token_id for ti in self.textual_inversions] + prompt_token_ids = prompt_token_ids.copy() + for i, token_id in reversed(list(enumerate(prompt_token_ids))): + if token_id in textual_inversion_trigger_token_ids: + textual_inversion = next(ti for ti in self.textual_inversions if ti.trigger_token_id == token_id) + for pad_idx in range(0, textual_inversion.embedding_vector_length-1): + prompt_token_ids.insert(i+pad_idx+1, textual_inversion.pad_token_ids[pad_idx]) + + return prompt_token_ids + + def _get_or_create_token_id_and_assign_embedding(self, token_str: str, embedding: torch.Tensor): + if len(embedding.shape) != 1: + raise ValueError("Embedding has incorrect shape - must be [token_dim] where token_dim is 768 for SD1 or 1280 for SD2") + existing_token_id = get_clip_token_id_for_string(self.clip_embedder.tokenizer, token_str) + if existing_token_id == self.clip_embedder.tokenizer.unk_token_id: + num_tokens_added = self.clip_embedder.tokenizer.add_tokens(token_str) + current_embeddings = self.clip_embedder.transformer.resize_token_embeddings(None) + current_token_count = current_embeddings.num_embeddings + new_token_count = current_token_count + num_tokens_added + # the following call is slow - todo make batched for better performance with vector length >1 + self.clip_embedder.transformer.resize_token_embeddings(new_token_count) + + token_id = get_clip_token_id_for_string(self.clip_embedder.tokenizer, token_str) + self.clip_embedder.transformer.get_input_embeddings().weight.data[token_id] = embedding + + return token_id + def _parse_embedding(self, embedding_file: str): + file_type = embedding_file.split('.')[-1] + if file_type == 'pt': + return self._parse_embedding_pt(embedding_file) + elif file_type == 'bin': + return self._parse_embedding_bin(embedding_file) + else: + print(f'>> Not a recognized embedding file: {embedding_file}') + + def _parse_embedding_pt(self, embedding_file): + embedding_ckpt = torch.load(embedding_file, map_location='cpu') + embedding_info = {} + + # Check if valid embedding file + if 'string_to_token' and 'string_to_param' in embedding_ckpt: + + # Catch variants that do not have the expected keys or values. + try: + embedding_info['name'] = embedding_ckpt['name'] or os.path.basename(os.path.splitext(embedding_file)[0]) + + # Check num of embeddings and warn user only the first will be used + embedding_info['num_of_embeddings'] = len(embedding_ckpt["string_to_token"]) + if embedding_info['num_of_embeddings'] > 1: + print('>> More than 1 embedding found. Will use the first one') + + embedding = list(embedding_ckpt['string_to_param'].values())[0] + except (AttributeError,KeyError): + return self._handle_broken_pt_variants(embedding_ckpt, embedding_file) + + embedding_info['embedding'] = embedding + embedding_info['num_vectors_per_token'] = embedding.size()[0] + embedding_info['token_dim'] = embedding.size()[1] + + try: + embedding_info['trained_steps'] = embedding_ckpt['step'] + embedding_info['trained_model_name'] = embedding_ckpt['sd_checkpoint_name'] + embedding_info['trained_model_checksum'] = embedding_ckpt['sd_checkpoint'] + except AttributeError: + print(">> No Training Details Found. Passing ...") + + # .pt files found at https://cyberes.github.io/stable-diffusion-textual-inversion-models/ + # They are actually .bin files + elif len(embedding_ckpt.keys())==1: + print('>> Detected .bin file masquerading as .pt file') + embedding_info = self._parse_embedding_bin(embedding_file) + + else: + print('>> Invalid embedding format') + embedding_info = None + + return embedding_info + + def _parse_embedding_bin(self, embedding_file): + embedding_ckpt = torch.load(embedding_file, map_location='cpu') + embedding_info = {} + + if list(embedding_ckpt.keys()) == 0: + print(">> Invalid concepts file") + embedding_info = None + else: + for token in list(embedding_ckpt.keys()): + embedding_info['name'] = token or os.path.basename(os.path.splitext(embedding_file)[0]) + embedding_info['embedding'] = embedding_ckpt[token] + embedding_info['num_vectors_per_token'] = 1 # All Concepts seem to default to 1 + embedding_info['token_dim'] = embedding_info['embedding'].size()[0] + + return embedding_info + + def _handle_broken_pt_variants(self, embedding_ckpt:dict, embedding_file:str)->dict: + ''' + This handles the broken .pt file variants. We only know of one at present. + ''' + embedding_info = {} + if isinstance(list(embedding_ckpt['string_to_token'].values())[0],torch.Tensor): + print('>> Detected .pt file variant 1') # example at https://github.com/invoke-ai/InvokeAI/issues/1829 + for token in list(embedding_ckpt['string_to_token'].keys()): + embedding_info['name'] = token if token != '*' else os.path.basename(os.path.splitext(embedding_file)[0]) + embedding_info['embedding'] = embedding_ckpt['string_to_param'].state_dict()[token] + embedding_info['num_vectors_per_token'] = embedding_info['embedding'].shape[0] + embedding_info['token_dim'] = embedding_info['embedding'].size()[0] + else: + print('>> Invalid embedding format') + embedding_info = None + + return embedding_info diff --git a/tests/test_textual_inversion.py b/tests/test_textual_inversion.py new file mode 100644 index 00000000000..01d15a6319f --- /dev/null +++ b/tests/test_textual_inversion.py @@ -0,0 +1,253 @@ + +import unittest +from typing import Union + +import torch + +from ldm.modules.textual_inversion_manager import TextualInversionManager + + +KNOWN_WORDS = ['a', 'b', 'c'] +KNOWN_WORDS_TOKEN_IDS = [0, 1, 2] +UNKNOWN_WORDS = ['d', 'e', 'f'] + +class DummyEmbeddingsList(list): + def __getattr__(self, name): + if name == 'num_embeddings': + return len(self) + elif name == 'weight': + return self + elif name == 'data': + return self + +class DummyTransformer: + def __init__(self): + self.embeddings = DummyEmbeddingsList([0] * len(KNOWN_WORDS)) + + def resize_token_embeddings(self, new_size=None): + if new_size is None: + return self.embeddings + else: + while len(self.embeddings) > new_size: + self.embeddings.pop(-1) + while len(self.embeddings) < new_size: + self.embeddings.append(0) + + def get_input_embeddings(self): + return self.embeddings + +class DummyTokenizer(): + def __init__(self): + self.tokens = KNOWN_WORDS.copy() + self.bos_token_id = 49406 # these are what the real CLIPTokenizer has + self.eos_token_id = 49407 + self.pad_token_id = 49407 + self.unk_token_id = 49407 + + def convert_tokens_to_ids(self, token_str): + try: + return self.tokens.index(token_str) + except ValueError: + return self.unk_token_id + + def add_tokens(self, token_str): + self.tokens.append(token_str) + return 1 + + +class DummyClipEmbedder: + def __init__(self): + self.max_length = 77 + self.transformer = DummyTransformer() + self.tokenizer = DummyTokenizer() + self.position_embeddings_tensor = torch.randn([77,768], dtype=torch.float32) + + def position_embedding(self, indices: Union[list,torch.Tensor]): + if type(indices) is list: + indices = torch.tensor(indices, dtype=int) + return torch.index_select(self.position_embeddings_tensor, 0, indices) + + +def was_embedding_overwritten_correctly(tim: TextualInversionManager, overwritten_embedding: torch.Tensor, ti_indices: list, ti_embedding: torch.Tensor) -> bool: + return torch.allclose(overwritten_embedding[ti_indices], ti_embedding + tim.clip_embedder.position_embedding(ti_indices)) + +class TextualInversionManagerTestCase(unittest.TestCase): + + + def test_construction(self): + tim = TextualInversionManager(DummyClipEmbedder()) + + def test_add_embedding_for_known_token(self): + tim = TextualInversionManager(DummyClipEmbedder()) + test_embedding = torch.randn([1, 768]) + test_embedding_name = KNOWN_WORDS[0] + self.assertFalse(tim.has_textual_inversion_for_trigger_string(test_embedding_name)) + + pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + + token_id = tim._add_textual_inversion(test_embedding_name, test_embedding) + self.assertEqual(token_id, 0) + + + # check adding 'test' did not create a new word + embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + self.assertEqual(pre_embeddings_count, embeddings_count) + + # check it was added + self.assertTrue(tim.has_textual_inversion_for_trigger_string(test_embedding_name)) + textual_inversion = tim.get_textual_inversion_for_trigger_string(test_embedding_name) + self.assertIsNotNone(textual_inversion) + self.assertTrue(torch.equal(textual_inversion.embedding, test_embedding)) + self.assertEqual(textual_inversion.trigger_string, test_embedding_name) + self.assertEqual(textual_inversion.trigger_token_id, token_id) + + def test_add_embedding_for_unknown_token(self): + tim = TextualInversionManager(DummyClipEmbedder()) + test_embedding_1 = torch.randn([1, 768]) + test_embedding_name_1 = UNKNOWN_WORDS[0] + + pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + + added_token_id_1 = tim._add_textual_inversion(test_embedding_name_1, test_embedding_1) + # new token id should get added on the end + self.assertEqual(added_token_id_1, len(KNOWN_WORDS)) + + # check adding did create a new word + embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + self.assertEqual(pre_embeddings_count+1, embeddings_count) + + # check it was added + self.assertTrue(tim.has_textual_inversion_for_trigger_string(test_embedding_name_1)) + textual_inversion = next(ti for ti in tim.textual_inversions if ti.trigger_token_id == added_token_id_1) + self.assertIsNotNone(textual_inversion) + self.assertTrue(torch.equal(textual_inversion.embedding, test_embedding_1)) + self.assertEqual(textual_inversion.trigger_string, test_embedding_name_1) + self.assertEqual(textual_inversion.trigger_token_id, added_token_id_1) + + # add another one + test_embedding_2 = torch.randn([1, 768]) + test_embedding_name_2 = UNKNOWN_WORDS[1] + + pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + + added_token_id_2 = tim._add_textual_inversion(test_embedding_name_2, test_embedding_2) + self.assertEqual(added_token_id_2, len(KNOWN_WORDS)+1) + + # check adding did create a new word + embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + self.assertEqual(pre_embeddings_count+1, embeddings_count) + + # check it was added + self.assertTrue(tim.has_textual_inversion_for_trigger_string(test_embedding_name_2)) + textual_inversion = next(ti for ti in tim.textual_inversions if ti.trigger_token_id == added_token_id_2) + self.assertIsNotNone(textual_inversion) + self.assertTrue(torch.equal(textual_inversion.embedding, test_embedding_2)) + self.assertEqual(textual_inversion.trigger_string, test_embedding_name_2) + self.assertEqual(textual_inversion.trigger_token_id, added_token_id_2) + + # check the old one is still there + self.assertTrue(tim.has_textual_inversion_for_trigger_string(test_embedding_name_1)) + textual_inversion = next(ti for ti in tim.textual_inversions if ti.trigger_token_id == added_token_id_1) + self.assertIsNotNone(textual_inversion) + self.assertTrue(torch.equal(textual_inversion.embedding, test_embedding_1)) + self.assertEqual(textual_inversion.trigger_string, test_embedding_name_1) + self.assertEqual(textual_inversion.trigger_token_id, added_token_id_1) + + + def test_pad_raises_on_eos_bos(self): + tim = TextualInversionManager(DummyClipEmbedder()) + prompt_token_ids_with_eos_bos = [tim.clip_embedder.tokenizer.bos_token_id] + \ + [KNOWN_WORDS_TOKEN_IDS] + \ + [tim.clip_embedder.tokenizer.eos_token_id] + with self.assertRaises(ValueError): + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_with_eos_bos) + + def test_pad_tokens_list_vector_length_1(self): + tim = TextualInversionManager(DummyClipEmbedder()) + prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() + + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) + self.assertEqual(prompt_token_ids, expanded_prompt_token_ids) + + test_embedding_1v = torch.randn([1, 768]) + test_embedding_1v_token = "" + test_embedding_1v_token_id = tim._add_textual_inversion(test_embedding_1v_token, test_embedding_1v) + self.assertEqual(test_embedding_1v_token_id, len(KNOWN_WORDS)) + + # at the end + prompt_token_ids_1v_append = prompt_token_ids + [test_embedding_1v_token_id] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_1v_append) + self.assertEqual(prompt_token_ids_1v_append, expanded_prompt_token_ids) + + # at the start + prompt_token_ids_1v_prepend = [test_embedding_1v_token_id] + prompt_token_ids + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_1v_prepend) + self.assertEqual(prompt_token_ids_1v_prepend, expanded_prompt_token_ids) + + # in the middle + prompt_token_ids_1v_insert = prompt_token_ids[0:2] + [test_embedding_1v_token_id] + prompt_token_ids[2:3] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_1v_insert) + self.assertEqual(prompt_token_ids_1v_insert, expanded_prompt_token_ids) + + def test_pad_tokens_list_vector_length_2(self): + tim = TextualInversionManager(DummyClipEmbedder()) + prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() + + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) + self.assertEqual(prompt_token_ids, expanded_prompt_token_ids) + + test_embedding_2v = torch.randn([2, 768]) + test_embedding_2v_token = "" + test_embedding_2v_token_id = tim._add_textual_inversion(test_embedding_2v_token, test_embedding_2v) + test_embedding_2v_pad_token_ids = tim.get_textual_inversion_for_token_id(test_embedding_2v_token_id).pad_token_ids + self.assertEqual(test_embedding_2v_token_id, len(KNOWN_WORDS)) + + # at the end + prompt_token_ids_2v_append = prompt_token_ids + [test_embedding_2v_token_id] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_2v_append) + self.assertNotEqual(prompt_token_ids_2v_append, expanded_prompt_token_ids) + self.assertEqual(prompt_token_ids + [test_embedding_2v_token_id] + test_embedding_2v_pad_token_ids, expanded_prompt_token_ids) + + # at the start + prompt_token_ids_2v_prepend = [test_embedding_2v_token_id] + prompt_token_ids + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_2v_prepend) + self.assertNotEqual(prompt_token_ids_2v_prepend, expanded_prompt_token_ids) + self.assertEqual([test_embedding_2v_token_id] + test_embedding_2v_pad_token_ids + prompt_token_ids, expanded_prompt_token_ids) + + # in the middle + prompt_token_ids_2v_insert = prompt_token_ids[0:2] + [test_embedding_2v_token_id] + prompt_token_ids[2:3] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_2v_insert) + self.assertNotEqual(prompt_token_ids_2v_insert, expanded_prompt_token_ids) + self.assertEqual(prompt_token_ids[0:2] + [test_embedding_2v_token_id] + test_embedding_2v_pad_token_ids + prompt_token_ids[2:3], expanded_prompt_token_ids) + + def test_pad_tokens_list_vector_length_8(self): + tim = TextualInversionManager(DummyClipEmbedder()) + prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() + + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) + self.assertEqual(prompt_token_ids, expanded_prompt_token_ids) + + test_embedding_8v = torch.randn([8, 768]) + test_embedding_8v_token = "" + test_embedding_8v_token_id = tim._add_textual_inversion(test_embedding_8v_token, test_embedding_8v) + test_embedding_8v_pad_token_ids = tim.get_textual_inversion_for_token_id(test_embedding_8v_token_id).pad_token_ids + self.assertEqual(test_embedding_8v_token_id, len(KNOWN_WORDS)) + + # at the end + prompt_token_ids_8v_append = prompt_token_ids + [test_embedding_8v_token_id] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_8v_append) + self.assertNotEqual(prompt_token_ids_8v_append, expanded_prompt_token_ids) + self.assertEqual(prompt_token_ids + [test_embedding_8v_token_id] + test_embedding_8v_pad_token_ids, expanded_prompt_token_ids) + + # at the start + prompt_token_ids_8v_prepend = [test_embedding_8v_token_id] + prompt_token_ids + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_8v_prepend) + self.assertNotEqual(prompt_token_ids_8v_prepend, expanded_prompt_token_ids) + self.assertEqual([test_embedding_8v_token_id] + test_embedding_8v_pad_token_ids + prompt_token_ids, expanded_prompt_token_ids) + + # in the middle + prompt_token_ids_8v_insert = prompt_token_ids[0:2] + [test_embedding_8v_token_id] + prompt_token_ids[2:3] + expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_8v_insert) + self.assertNotEqual(prompt_token_ids_8v_insert, expanded_prompt_token_ids) + self.assertEqual(prompt_token_ids[0:2] + [test_embedding_8v_token_id] + test_embedding_8v_pad_token_ids + prompt_token_ids[2:3], expanded_prompt_token_ids) + From c7585003ffdbbd20974c8236d61630265f5e43f9 Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Sun, 18 Dec 2022 18:43:54 +0100 Subject: [PATCH 078/199] stop using WeightedFrozenCLIPEmbedder --- ldm/invoke/conditioning.py | 3 +- ldm/invoke/generator/diffusers_pipeline.py | 25 +- ldm/modules/embedding_manager.py | 9 +- ldm/modules/prompt_to_embeddings_converter.py | 235 ++++++++++++++++++ ldm/modules/textual_inversion_manager.py | 32 ++- tests/test_textual_inversion.py | 41 +-- 6 files changed, 298 insertions(+), 47 deletions(-) create mode 100644 ldm/modules/prompt_to_embeddings_converter.py diff --git a/ldm/invoke/conditioning.py b/ldm/invoke/conditioning.py index c24e69b27fa..4930d3c25bf 100644 --- a/ldm/invoke/conditioning.py +++ b/ldm/invoke/conditioning.py @@ -16,6 +16,7 @@ from ..models.diffusion import cross_attention_control from ..models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from ..modules.encoders.modules import WeightedFrozenCLIPEmbedder +from ..modules.prompt_to_embeddings_converter import WeightedPromptFragmentsToEmbeddingsConverter def get_uc_and_c_and_ec(prompt_string, model, log_tokens=False, skip_normalize_legacy_blend=False): @@ -216,7 +217,7 @@ def _get_conditioning_for_blend(model, blend: Blend, log_tokens: bool = False): log_display_label=f"(blend part {i + 1}, weight={blend.weights[i]})") embeddings_to_blend = this_embedding if embeddings_to_blend is None else torch.cat( (embeddings_to_blend, this_embedding)) - conditioning = WeightedFrozenCLIPEmbedder.apply_embedding_weights(embeddings_to_blend.unsqueeze(0), + conditioning = WeightedPromptFragmentsToEmbeddingsConverter.apply_embedding_weights(embeddings_to_blend.unsqueeze(0), blend.weights, normalize=blend.normalize_weights) return conditioning diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index c4be2b82304..5091e6c7611 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -15,6 +15,7 @@ from ...models.diffusion import cross_attention_control from ...models.diffusion.cross_attention_map_saving import AttentionMapSaver +from ...modules.prompt_to_embeddings_converter import WeightedPromptFragmentsToEmbeddingsConverter # monkeypatch diffusers CrossAttention 🙈 # this is to make prompt2prompt and (future) attention maps work @@ -239,7 +240,7 @@ def __init__( safety_checker: Optional[StableDiffusionSafetyChecker], feature_extractor: Optional[CLIPFeatureExtractor], requires_safety_checker: bool = False, - precision: str = 'full', + precision: str = 'float32', ): super().__init__(vae, text_encoder, tokenizer, unet, scheduler, safety_checker, feature_extractor, requires_safety_checker) @@ -253,15 +254,17 @@ def __init__( safety_checker=safety_checker, feature_extractor=feature_extractor, ) + self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) + use_full_precision = (precision == 'float32' or precision == 'autocast') + self.textual_inversion_manager = TextualInversionManager(tokenizer=self.tokenizer, + text_encoder=self.text_encoder, + full_precision=use_full_precision) # InvokeAI's interface for text embeddings and whatnot - self.clip_embedder = WeightedFrozenCLIPEmbedder( + self.prompt_fragments_to_embeddings_converter = WeightedPromptFragmentsToEmbeddingsConverter( tokenizer=self.tokenizer, - transformer=self.text_encoder + text_encoder=self.text_encoder, + textual_inversion_manager=self.textual_inversion_manager ) - self.invokeai_diffuser = InvokeAIDiffuserComponent(self.unet, self._unet_forward) - use_full_precision = (precision == 'float32' or precision == 'autocast') - self.textual_inversion_manager = TextualInversionManager(self.clip_embedder, use_full_precision) - self.clip_embedder.set_textual_inversion_manager(self.textual_inversion_manager) if is_xformers_available(): self.enable_xformers_memory_efficient_attention() @@ -565,12 +568,16 @@ def get_learned_conditioning(self, c: List[List[str]], *, return_tokens=True, fr """ Compatibility function for ldm.models.diffusion.ddpm.LatentDiffusion. """ - return self.clip_embedder.encode(c, return_tokens=return_tokens, fragment_weights=fragment_weights) + return self.prompt_fragments_to_embeddings_converter.get_embeddings_for_weighted_prompt_fragments( + text=c, + fragment_weights=fragment_weights, + should_return_tokens=return_tokens, + device=self.device) @property def cond_stage_model(self): warnings.warn("legacy compatibility layer", DeprecationWarning) - return self.clip_embedder + return self.prompt_fragments_to_embeddings_converter @torch.inference_mode() def _tokenize(self, prompt: Union[str, List[str]]): diff --git a/ldm/modules/embedding_manager.py b/ldm/modules/embedding_manager.py index 36704eb64b9..8bbdcd91a4d 100644 --- a/ldm/modules/embedding_manager.py +++ b/ldm/modules/embedding_manager.py @@ -19,13 +19,6 @@ def get_clip_token_id_for_string(tokenizer: CLIPTokenizer, token_str: str) -> in token_id = tokenizer.convert_tokens_to_ids(token_str) return token_id -def get_bert_token_id_for_string(tokenizer, string) -> int: - token = tokenizer(string) - # assert torch.count_nonzero(token) == 3, f"String '{string}' maps to more than a single token. Please use another string" - token = token[0, 1] - return token.item() - - def get_embedding_for_clip_token_id(embedder, token_id): if type(token_id) is not torch.Tensor: token_id = torch.tensor(token_id, dtype=torch.int) @@ -289,7 +282,7 @@ def parse_embedding_pt(self, embedding_file): print('>> More than 1 embedding found. Will use the first one') embedding = list(embedding_ckpt['string_to_param'].values())[0] - except (AttributeError,KeyError): + except (AttributeError,KeyError): return self.handle_broken_pt_variants(embedding_ckpt, embedding_file) embedding_info['embedding'] = embedding diff --git a/ldm/modules/prompt_to_embeddings_converter.py b/ldm/modules/prompt_to_embeddings_converter.py new file mode 100644 index 00000000000..a024a1bc006 --- /dev/null +++ b/ldm/modules/prompt_to_embeddings_converter.py @@ -0,0 +1,235 @@ +import math + +import torch +from transformers import CLIPTokenizer, CLIPTextModel + +from ldm.modules.textual_inversion_manager import TextualInversionManager + + +class WeightedPromptFragmentsToEmbeddingsConverter(): + + def __init__(self, + tokenizer: CLIPTokenizer, # converts strings to lists of int token ids + text_encoder: CLIPTextModel, # convert a list of int token ids to a tensor of embeddings + textual_inversion_manager: TextualInversionManager = None + ): + self.tokenizer = tokenizer + self.text_encoder = text_encoder + self.textual_inversion_manager = textual_inversion_manager + + @property + def max_length(self): + return self.tokenizer.model_max_length + + def get_embeddings_for_weighted_prompt_fragments(self, + text: list[str], + fragment_weights: list[float], + should_return_tokens: bool = False, + device='cpu' + ) -> torch.Tensor: + ''' + + :param text: A list of fragments of text to which different weights are to be applied. + :param fragment_weights: A batch of lists of weights, one for each entry in `fragments`. + :return: A tensor of shape `[1, 77, token_dim]` containing weighted embeddings where token_dim is 768 for SD1 + and 1280 for SD2 + ''' + if len(text) != len(fragment_weights): + raise ValueError(f"lengths of text and fragment_weights lists are not the same ({len(text)} != {len(fragment_weights)})") + + batch_z = None + batch_tokens = None + for fragments, weights in zip(text, fragment_weights): + + # First, weight tokens in individual fragments by scaling the feature vectors as requested (effectively + # applying a multiplier to the CFG scale on a per-token basis). + # For tokens weighted<1, intuitively we want SD to become not merely *less* interested in the concept + # captured by the fragment but actually *dis*interested in it (a 0.01 interest in "red" is still an active + # interest, however small, in redness; what the user probably intends when they attach the number 0.01 to + # "red" is to tell SD that it should almost completely *ignore* redness). + # To do this, the embedding is lerped away from base_embedding in the direction of an embedding for a prompt + # string from which the low-weighted fragment has been simply removed. The closer the weight is to zero, the + # closer the resulting embedding is to an embedding for a prompt that simply lacks this fragment. + + # handle weights >=1 + tokens, per_token_weights = self.get_token_ids_and_expand_weights(fragments, weights, device=device) + base_embedding = self.build_weighted_embedding_tensor(tokens, per_token_weights) + + # this is our starting point + embeddings = base_embedding.unsqueeze(0) + per_embedding_weights = [1.0] + + # now handle weights <1 + # Do this by building extra embeddings tensors that lack the words being <1 weighted. These will be lerped + # with the embeddings tensors that have the words, such that if the weight of a word is 0.5, the resulting + # embedding will be exactly half-way between the unweighted prompt and the prompt with the <1 weighted words + # removed. + # eg for "mountain:1 man:0.5", intuitively the "man" should be "half-gone". therefore, append an embedding + # for "mountain" (i.e. without "man") to the already-produced embedding for "mountain man", and weight it + # such that the resulting lerped embedding is exactly half-way between "mountain man" and "mountain". + for index, fragment_weight in enumerate(weights): + if fragment_weight < 1: + fragments_without_this = fragments[:index] + fragments[index+1:] + weights_without_this = weights[:index] + weights[index+1:] + tokens, per_token_weights = self.get_token_ids_and_expand_weights(fragments_without_this, weights_without_this, device=device) + embedding_without_this = self.build_weighted_embedding_tensor(tokens, per_token_weights) + + embeddings = torch.cat((embeddings, embedding_without_this.unsqueeze(0)), dim=1) + # weight of the embedding *without* this fragment gets *stronger* as its weight approaches 0 + # if fragment_weight = 0, basically we want embedding_without_this to completely overwhelm base_embedding + # therefore: + # fragment_weight = 1: we are at base_z => lerp weight 0 + # fragment_weight = 0.5: we are halfway between base_z and here => lerp weight 1 + # fragment_weight = 0: we're now entirely overriding base_z ==> lerp weight inf + # so let's use tan(), because: + # tan is 0.0 at 0, + # 1.0 at PI/4, and + # inf at PI/2 + # -> tan((1-weight)*PI/2) should give us ideal lerp weights + epsilon = 1e-9 + fragment_weight = max(epsilon, fragment_weight) # inf is bad + embedding_lerp_weight = math.tan((1.0 - fragment_weight) * math.pi / 2) + # todo handle negative weight? + + per_embedding_weights.append(embedding_lerp_weight) + + lerped_embeddings = self.apply_embedding_weights(embeddings, per_embedding_weights, normalize=True).squeeze(0) + + #print(f"assembled tokens for '{fragments}' into tensor of shape {lerped_embeddings.shape}") + + # append to batch + batch_z = lerped_embeddings.unsqueeze(0) if batch_z is None else torch.cat([batch_z, lerped_embeddings.unsqueeze(0)], dim=1) + batch_tokens = tokens.unsqueeze(0) if batch_tokens is None else torch.cat([batch_tokens, tokens.unsqueeze(0)], dim=1) + + # should have shape (B, 77, 768) + #print(f"assembled all tokens into tensor of shape {batch_z.shape}") + + if should_return_tokens: + return batch_z, batch_tokens + else: + return batch_z + + def get_token_ids(self, fragments: list[str], include_start_and_end_markers: bool = True) -> list[list[int]]: + """ + Convert a list of strings like `["a cat", "sitting", "on a mat"]` into a list of lists of token ids like + `[[bos, 0, 1, eos], [bos, 2, eos], [bos, 3, 0, 4, eos]]`. bos/eos markers are skipped if + `include_start_and_end_markers` is `False`. Each list will be restricted to the maximum permitted length + (typically 75 tokens + eos/bos markers). + + :param fragments: The strings to convert. + :param include_start_and_end_markers: + :return: + """ + # for args documentation see ENCODE_KWARGS_DOCSTRING in tokenization_utils_base.py (in `transformers` lib) + token_ids_list = self.tokenizer( + fragments, + truncation=True, + max_length=self.max_length, + return_overflowing_tokens=False, + padding='do_not_pad', + return_tensors=None, # just give me lists of ints + )['input_ids'] + + result = [] + for token_ids in token_ids_list: + # trim eos/bos + token_ids = token_ids[1:-1] + # pad for textual inversions with vector length >1 + token_ids = self.textual_inversion_manager.expand_textual_inversion_token_ids(token_ids) + # restrict length to max_length-2 (leaving room for bos/eos) + token_ids = token_ids[0:self.max_length - 2] + # add back eos/bos if requested + if include_start_and_end_markers: + token_ids = [self.tokenizer.bos_token_id] + token_ids + [self.tokenizer.eos_token_id] + + result.append(token_ids) + + return result + + + @classmethod + def apply_embedding_weights(self, embeddings: torch.Tensor, per_embedding_weights: list[float], normalize:bool) -> torch.Tensor: + per_embedding_weights = torch.tensor(per_embedding_weights, dtype=embeddings.dtype, device=embeddings.device) + if normalize: + per_embedding_weights = per_embedding_weights / torch.sum(per_embedding_weights) + reshaped_weights = per_embedding_weights.reshape(per_embedding_weights.shape + (1, 1,)) + #reshaped_weights = per_embedding_weights.reshape(per_embedding_weights.shape + (1,1,)).expand(embeddings.shape) + return torch.sum(embeddings * reshaped_weights, dim=1) + # lerped embeddings has shape (77, 768) + + + def get_token_ids_and_expand_weights(self, fragments: list[str], weights: list[float], device: str) -> (torch.Tensor, torch.Tensor): + ''' + Given a list of text fragments and corresponding weights: tokenize each fragment, append the token sequences + together and return a padded token sequence starting with the bos marker, ending with the eos marker, and padded + or truncated as appropriate to `self.max_length`. Also return a list of weights expanded from the passed-in + weights to match each token. + + :param fragments: Text fragments to tokenize and concatenate. May be empty. + :param weights: Per-fragment weights (i.e. quasi-CFG scaling). Values from 0 to inf are permitted. In practise with SD1.5 + values >1.6 tend to produce garbage output. Must have same length as `fragment`. + :return: A tuple of tensors `(token_ids, weights)`. `token_ids` is ints, `weights` is floats, both have shape `[self.max_length]`. + ''' + if len(fragments) != len(weights): + raise ValueError(f"lengths of text and fragment_weights lists are not the same ({len(fragments)} != {len(weights)})") + + # empty is meaningful + if len(fragments) == 0: + fragments = [''] + weights = [1.0] + per_fragment_token_ids = self.get_token_ids(fragments, include_start_and_end_markers=False) + all_token_ids = [] + per_token_weights = [] + #print("all fragments:", fragments, weights) + for this_fragment_token_ids, weight in zip(per_fragment_token_ids, weights): + # append + all_token_ids += this_fragment_token_ids + # fill out weights tensor with one float per token + per_token_weights += [float(weight)] * len(this_fragment_token_ids) + + # leave room for bos/eos + if len(all_token_ids) > self.max_length - 2: + excess_token_count = len(all_token_ids) - self.max_length - 2 + # TODO build nice description string of how the truncation was applied + # this should be done by calling self.tokenizer.convert_ids_to_tokens() then passing the result to + # self.tokenizer.convert_tokens_to_string() for the token_ids on each side of the truncation limit. + print(f">> Prompt is {excess_token_count} token(s) too long and has been truncated") + all_token_ids = all_token_ids[0:self.max_length] + per_token_weights = per_token_weights[0:self.max_length] + + # pad out to a self.max_length-entry array: [eos_token, , eos_token, ..., eos_token] + # (typically self.max_length == 77) + all_token_ids = [self.tokenizer.bos_token_id] + all_token_ids + [self.tokenizer.eos_token_id] + per_token_weights = [1.0] + per_token_weights + [1.0] + pad_length = self.max_length - len(all_token_ids) + all_token_ids += [self.tokenizer.eos_token_id] * pad_length + per_token_weights += [1.0] * pad_length + + all_token_ids_tensor = torch.tensor(all_token_ids, dtype=torch.long, device=device) + per_token_weights_tensor = torch.tensor(per_token_weights, dtype=torch.float32, device=device) + #print(f"assembled all_token_ids_tensor with shape {all_token_ids_tensor.shape}") + return all_token_ids_tensor, per_token_weights_tensor + + def build_weighted_embedding_tensor(self, token_ids: torch.Tensor, per_token_weights: torch.Tensor) -> torch.Tensor: + ''' + Build a tensor that embeds the passed-in token IDs and applyies the given per_token weights + :param token_ids: A tensor of shape `[self.max_length]` containing token IDs (ints) + :param per_token_weights: A tensor of shape `[self.max_length]` containing weights (floats) + :return: A tensor of shape `[1, self.max_length, token_dim]` representing the requested weighted embeddings + where `token_dim` is 768 for SD1 and 1280 for SD2. + ''' + #print(f"building weighted embedding tensor for {tokens} with weights {per_token_weights}") + if token_ids.shape != torch.Size([self.max_length]): + raise ValueError(f"token_ids has shape {token_ids.shape} - expected [{self.max_length}]") + + z = self.text_encoder.forward(input_ids=token_ids.unsqueeze(0), + return_dict=False)[0] + empty_token_ids = torch.tensor([self.tokenizer.bos_token_id] + + [self.tokenizer.pad_token_id] * (self.max_length-2) + + [self.tokenizer.eos_token_id], dtype=torch.int, device=token_ids.device).unsqueeze(0) + empty_z = self.text_encoder(input_ids=empty_token_ids).last_hidden_state + batch_weights_expanded = per_token_weights.reshape(per_token_weights.shape + (1,)).expand(z.shape) + z_delta_from_empty = z - empty_z + weighted_z = empty_z + (z_delta_from_empty * batch_weights_expanded) + + return weighted_z diff --git a/ldm/modules/textual_inversion_manager.py b/ldm/modules/textual_inversion_manager.py index dc19037387f..543fa52111e 100644 --- a/ldm/modules/textual_inversion_manager.py +++ b/ldm/modules/textual_inversion_manager.py @@ -5,10 +5,9 @@ import torch from attr import dataclass from picklescan.scanner import scan_file_path +from transformers import CLIPTokenizer, CLIPTextModel from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary -from ldm.modules.embedding_manager import get_clip_token_id_for_string -from ldm.modules.encoders.modules import FrozenCLIPEmbedder @dataclass @@ -23,8 +22,12 @@ def embedding_vector_length(self) -> int: return self.embedding.shape[0] class TextualInversionManager(): - def __init__(self, clip_embedder: FrozenCLIPEmbedder, full_precision: bool=True): - self.clip_embedder = clip_embedder + def __init__(self, + tokenizer: CLIPTokenizer, + text_encoder: CLIPTextModel, + full_precision: bool=True): + self.tokenizer = tokenizer + self.text_encoder = text_encoder self.full_precision = full_precision self.hf_concepts_library = HuggingFaceConceptsLibrary() default_textual_inversions: list[TextualInversion] = [] @@ -124,9 +127,9 @@ def expand_textual_inversion_token_ids(self, prompt_token_ids: list[int]) -> lis if len(prompt_token_ids) == 0: return prompt_token_ids - if prompt_token_ids[0] == self.clip_embedder.tokenizer.bos_token_id: + if prompt_token_ids[0] == self.tokenizer.bos_token_id: raise ValueError("prompt_token_ids must not start with bos_token_id") - if prompt_token_ids[-1] == self.clip_embedder.tokenizer.eos_token_id: + if prompt_token_ids[-1] == self.tokenizer.eos_token_id: raise ValueError("prompt_token_ids must not end with eos_token_id") textual_inversion_trigger_token_ids = [ti.trigger_token_id for ti in self.textual_inversions] prompt_token_ids = prompt_token_ids.copy() @@ -141,19 +144,22 @@ def expand_textual_inversion_token_ids(self, prompt_token_ids: list[int]) -> lis def _get_or_create_token_id_and_assign_embedding(self, token_str: str, embedding: torch.Tensor): if len(embedding.shape) != 1: raise ValueError("Embedding has incorrect shape - must be [token_dim] where token_dim is 768 for SD1 or 1280 for SD2") - existing_token_id = get_clip_token_id_for_string(self.clip_embedder.tokenizer, token_str) - if existing_token_id == self.clip_embedder.tokenizer.unk_token_id: - num_tokens_added = self.clip_embedder.tokenizer.add_tokens(token_str) - current_embeddings = self.clip_embedder.transformer.resize_token_embeddings(None) + existing_token_id = self.tokenizer.convert_tokens_to_ids(token_str) + if existing_token_id == self.tokenizer.unk_token_id: + num_tokens_added = self.tokenizer.add_tokens(token_str) + current_embeddings = self.text_encoder.resize_token_embeddings(None) current_token_count = current_embeddings.num_embeddings new_token_count = current_token_count + num_tokens_added # the following call is slow - todo make batched for better performance with vector length >1 - self.clip_embedder.transformer.resize_token_embeddings(new_token_count) + self.text_encoder.resize_token_embeddings(new_token_count) - token_id = get_clip_token_id_for_string(self.clip_embedder.tokenizer, token_str) - self.clip_embedder.transformer.get_input_embeddings().weight.data[token_id] = embedding + token_id = self.tokenizer.convert_tokens_to_ids(token_str) + if token_id == self.tokenizer.unk_token_id: + raise RuntimeError(f"Unable to find token id for token '{token_str}'") + self.text_encoder.get_input_embeddings().weight.data[token_id] = embedding return token_id + def _parse_embedding(self, embedding_file: str): file_type = embedding_file.split('.')[-1] if file_type == 'pt': diff --git a/tests/test_textual_inversion.py b/tests/test_textual_inversion.py index 01d15a6319f..6e150f1d21d 100644 --- a/tests/test_textual_inversion.py +++ b/tests/test_textual_inversion.py @@ -51,6 +51,8 @@ def convert_tokens_to_ids(self, token_str): return self.unk_token_id def add_tokens(self, token_str): + if token_str in self.tokens: + return 0 self.tokens.append(token_str) return 1 @@ -71,26 +73,33 @@ def position_embedding(self, indices: Union[list,torch.Tensor]): def was_embedding_overwritten_correctly(tim: TextualInversionManager, overwritten_embedding: torch.Tensor, ti_indices: list, ti_embedding: torch.Tensor) -> bool: return torch.allclose(overwritten_embedding[ti_indices], ti_embedding + tim.clip_embedder.position_embedding(ti_indices)) + +def make_dummy_textual_inversion_manager(): + return TextualInversionManager( + tokenizer=DummyTokenizer(), + text_encoder=DummyTransformer() + ) + class TextualInversionManagerTestCase(unittest.TestCase): def test_construction(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() def test_add_embedding_for_known_token(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() test_embedding = torch.randn([1, 768]) test_embedding_name = KNOWN_WORDS[0] self.assertFalse(tim.has_textual_inversion_for_trigger_string(test_embedding_name)) - pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + pre_embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) token_id = tim._add_textual_inversion(test_embedding_name, test_embedding) self.assertEqual(token_id, 0) # check adding 'test' did not create a new word - embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) self.assertEqual(pre_embeddings_count, embeddings_count) # check it was added @@ -102,18 +111,18 @@ def test_add_embedding_for_known_token(self): self.assertEqual(textual_inversion.trigger_token_id, token_id) def test_add_embedding_for_unknown_token(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() test_embedding_1 = torch.randn([1, 768]) test_embedding_name_1 = UNKNOWN_WORDS[0] - pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + pre_embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) added_token_id_1 = tim._add_textual_inversion(test_embedding_name_1, test_embedding_1) # new token id should get added on the end self.assertEqual(added_token_id_1, len(KNOWN_WORDS)) # check adding did create a new word - embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) self.assertEqual(pre_embeddings_count+1, embeddings_count) # check it was added @@ -128,13 +137,13 @@ def test_add_embedding_for_unknown_token(self): test_embedding_2 = torch.randn([1, 768]) test_embedding_name_2 = UNKNOWN_WORDS[1] - pre_embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + pre_embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) added_token_id_2 = tim._add_textual_inversion(test_embedding_name_2, test_embedding_2) self.assertEqual(added_token_id_2, len(KNOWN_WORDS)+1) # check adding did create a new word - embeddings_count = len(tim.clip_embedder.transformer.resize_token_embeddings(None)) + embeddings_count = len(tim.text_encoder.resize_token_embeddings(None)) self.assertEqual(pre_embeddings_count+1, embeddings_count) # check it was added @@ -155,15 +164,15 @@ def test_add_embedding_for_unknown_token(self): def test_pad_raises_on_eos_bos(self): - tim = TextualInversionManager(DummyClipEmbedder()) - prompt_token_ids_with_eos_bos = [tim.clip_embedder.tokenizer.bos_token_id] + \ + tim = make_dummy_textual_inversion_manager() + prompt_token_ids_with_eos_bos = [tim.tokenizer.bos_token_id] + \ [KNOWN_WORDS_TOKEN_IDS] + \ - [tim.clip_embedder.tokenizer.eos_token_id] + [tim.tokenizer.eos_token_id] with self.assertRaises(ValueError): - expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_with_eos_bos) + tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids_with_eos_bos) def test_pad_tokens_list_vector_length_1(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) @@ -190,7 +199,7 @@ def test_pad_tokens_list_vector_length_1(self): self.assertEqual(prompt_token_ids_1v_insert, expanded_prompt_token_ids) def test_pad_tokens_list_vector_length_2(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) @@ -221,7 +230,7 @@ def test_pad_tokens_list_vector_length_2(self): self.assertEqual(prompt_token_ids[0:2] + [test_embedding_2v_token_id] + test_embedding_2v_pad_token_ids + prompt_token_ids[2:3], expanded_prompt_token_ids) def test_pad_tokens_list_vector_length_8(self): - tim = TextualInversionManager(DummyClipEmbedder()) + tim = make_dummy_textual_inversion_manager() prompt_token_ids = KNOWN_WORDS_TOKEN_IDS.copy() expanded_prompt_token_ids = tim.expand_textual_inversion_token_ids(prompt_token_ids=prompt_token_ids) From ca95445a0d3cc726c7667569f3f4c8ac9e652dcf Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 21 Dec 2022 01:56:27 +0000 Subject: [PATCH 079/199] store diffusion models locally - configure_invokeai.py reconfigured to store diffusion models rather than CompVis models - hugging face caching model is used, but cache is set to ~/invokeai/models/repo_id - models.yaml does **NOT** use path, just repo_id - "repo_name" changed to "repo_id" to following hugging face conventions - Models are loaded with full precision pending further work. --- configs/INITIAL_MODELS.yaml | 32 ++-- ldm/invoke/generator/diffusers_pipeline.py | 7 +- ldm/invoke/model_cache.py | 81 +++++---- scripts/configure_invokeai.py | 190 ++++----------------- 4 files changed, 98 insertions(+), 212 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index 6cfb863df8d..4139c67d58b 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -1,72 +1,63 @@ stable-diffusion-1.5: description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) repo_id: runwayml/stable-diffusion-v1-5 - config: v1-inference.yaml - file: v1-5-pruned-emaonly.ckpt - recommended: true + format: diffusers + recommended: True width: 512 height: 512 inpainting-1.5: description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB) repo_id: runwayml/stable-diffusion-inpainting - config: v1-inpainting-inference.yaml - file: sd-v1-5-inpainting.ckpt + format: diffusers recommended: True width: 512 height: 512 ft-mse-improved-autoencoder-840000: description: StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) - repo_id: stabilityai/sd-vae-ft-mse-original - config: VAE/default - file: vae-ft-mse-840000-ema-pruned.ckpt + repo_id: stabilityai/sd-vae-ft-mse recommended: True + format: vae width: 512 height: 512 stable-diffusion-1.4: description: The original Stable Diffusion version 1.4 weight file (4.27 GB) repo_id: CompVis/stable-diffusion-v-1-4-original - config: v1-inference.yaml - file: sd-v1-4.ckpt recommended: False + format: diffusers width: 512 height: 512 waifu-diffusion-1.3: description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27 GB) repo_id: hakurei/waifu-diffusion-v1-3 - config: v1-inference.yaml - file: model-epoch09-float32.ckpt + format: diffusers recommended: False width: 512 height: 512 trinart-2.0: description: An SD model finetuned with ~40,000 assorted high resolution manga/anime-style pictures (2.13 GB) repo_id: naclbit/trinart_stable_diffusion_v2 - config: v1-inference.yaml - file: trinart2_step95000.ckpt + format: diffusers recommended: False width: 512 height: 512 trinart_characters-1.0: description: An SD model finetuned with 19.2M anime/manga style images (2.13 GB) repo_id: naclbit/trinart_characters_19.2m_stable_diffusion_v1 - config: v1-inference.yaml - file: trinart_characters_it4_v1.ckpt + format: diffusers recommended: False width: 512 height: 512 trinart_vae: description: Custom autoencoder for trinart_characters repo_id: naclbit/trinart_characters_19.2m_stable_diffusion_v1 - config: VAE/trinart - file: autoencoder_fix_kl-f8-trinart_characters.ckpt + format: vae recommended: False width: 512 height: 512 papercut-1.0: description: SD 1.5 fine-tuned for papercut art (use "PaperCut" in your prompts) (2.13 GB) repo_id: Fictiverse/Stable_Diffusion_PaperCut_Model - config: v1-inference.yaml - file: PaperCut_v1.ckpt + format: diffusers recommended: False width: 512 height: 512 @@ -75,6 +66,7 @@ voxel_art-1.0: repo_id: Fictiverse/Stable_Diffusion_VoxelArt_Model config: v1-inference.yaml file: VoxelArt_v1.ckpt + format: diffusers recommended: False width: 512 height: 512 diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 5091e6c7611..771184c4824 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -3,7 +3,12 @@ import secrets import warnings from dataclasses import dataclass -from typing import List, Optional, Union, Callable, Type, TypeVar, Generic, Any, ParamSpec +import sys +from typing import List, Optional, Union, Callable, Type, TypeVar, Generic, Any +if sys.version_info < (3, 10): + from typing_extensions import ParamSpec +else: + from typing import ParamSpec import PIL.Image import einops diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index e6f1c518f32..1c057ac9058 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -320,21 +320,19 @@ def _load_ckpt_model(self, model_name, mconfig): def _load_diffusers_model(self, mconfig): pipeline_args = {} - if 'repo_name' in mconfig: - name_or_path = mconfig['repo_name'] + # look for local files first, then use the hugging face cache mechanism + if 'repo_id' in mconfig: # "repo_name" => "repo_id" in order to be consistent with Hugging Face convention + name = mconfig['repo_id'] model_hash = "FIXME" # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash - elif 'path' in mconfig: - name_or_path = Path(mconfig['path']) - if not name_or_path.is_absolute(): - name_or_path = Path(Globals.root, name_or_path).resolve() - # FIXME: What should the model_hash be? A hash of the unet weights? Of all files of all - # the submodels hashed together? The commit ID from the repo? - model_hash = "FIXME TOO" + elif 'repo_name' in mconfig: # to avoid breakage + print('** "repo_name" is deprecated in models.yaml. Please use "repo_id"') + name = mconfig['repo_id'] + model_hash = "FIXME" else: - raise ValueError("Model config must specify either repo_name or path.") + raise ValueError("Model config must specify repo_id.") - print(f'>> Loading diffusers model from {name_or_path}') + print(f'>> Loading diffusers model from {name}') # TODO: scan weights maybe? @@ -342,35 +340,38 @@ def _load_diffusers_model(self, mconfig): vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) - if self.precision == 'float16': - print(' | Using faster float16 precision') - - if not isinstance(name_or_path, Path): - # hub has no explicit API for different data types, but the main Stable Diffusion - # releases set a precedent for putting float16 weights in a fp16 branch. - try: - hf_hub_download(name_or_path, "model_index.json", revision="fp16") - except RevisionNotFoundError: - pass # no such branch, assume we should use the default. - else: - pipeline_args.update(revision="fp16") - - pipeline_args.update(torch_dtype=torch.float16) - else: - # TODO: more accurately, "using the model's default precision." - # How do we find out what that is? - print(' | Using more accurate float32 precision') + cache_dir = os.path.join(Globals.root,'models',name) + # FIX: this commented block is causing the cache read to fail in from_pretrained() + # if self.precision == 'float16': + # print(' | Using faster float16 precision') + + # if not isinstance(name, Path): + # # hub has no explicit API for different data types, but the main Stable Diffusion + # # releases set a precedent for putting float16 weights in a fp16 branch. + # try: + # hf_hub_download(name, "model_index.json", revision="fp16", cache_dir=cache_dir) + # except RevisionNotFoundError: + # pass # no such branch, assume we should use the default. + # else: + # pipeline_args.update(revision="fp16") + + # pipeline_args.update(torch_dtype=torch.float16) + # else: + # # TODO: more accurately, "using the model's default precision." + # # How do we find out what that is? + # print(' | Using more accurate float32 precision') pipeline = StableDiffusionGeneratorPipeline.from_pretrained( - name_or_path, + name, # TODO: Safety checker is currently handled at a different stage in the code: # ldm.invoke.generator.base.Generator.safety_check # We might want to move that here for consistency with diffusers API, or we might # want to leave it as a separate processing node. It ends up using the same diffusers # code either way, so we can table it for now. safety_checker=None, - # TODO: local_files_only=True - **pipeline_args + cache_dir=cache_dir, + local_files_only=True, + **pipeline_args, ) pipeline.to(self.device) @@ -384,9 +385,12 @@ def model_name_or_path(self, model_name:str) -> str | Path: raise ValueError(f'"{model_name}" is not a known model name. Please check your models.yaml file') mconfig = self.config[model_name] - if 'repo_name' in mconfig: + if 'repo_id' in mconfig: + return mconfig['repo_id'] + elif 'repo_name' in mconfig: return mconfig['repo_name'] elif 'path' in mconfig: + assert f'there should be no paths in {mconfig}' path = Path(mconfig['path']) if not path.is_absolute(): path = Path(Globals.root, path).resolve() @@ -545,14 +549,17 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: def _load_vae(self, vae_config): vae_args = {} - if 'repo_name' in vae_config: - name_or_path = vae_config['repo_name'] - elif 'path' in vae_config: + if 'path' in vae_config: name_or_path = Path(vae_config['path']) if not name_or_path.is_absolute(): name_or_path = Path(Globals.root, name_or_path).resolve() + elif 'repo_id' in vae_config: + name_or_path = vae_config['repo_id'] + elif 'repo_name' in vae_config: + print('** "repo_name" is deprecated in models.yaml. Use "repo_id" instead.') + name_or_path = vae_config['repo_name'] else: - raise ValueError("VAE config must specify either repo_name or path.") + raise ValueError("VAE config must specify either repo_id or path.") print(f'>> Loading diffusers VAE from {name_or_path}') if self.precision == 'float16': diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 39b31c8afd9..78995e55e70 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -22,6 +22,7 @@ import requests import transformers from diffusers import StableDiffusionPipeline, AutoencoderKL +from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline from getpass_asterisk import getpass_asterisk from huggingface_hub import HfFolder, hf_hub_url, login as hf_hub_login, whoami as hf_whoami from omegaconf import OmegaConf @@ -247,6 +248,7 @@ def authenticate(yes_to_all=False): print("=" * os.get_terminal_size()[0]) print('Authenticating to Huggingface') hf_envvars = [ "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_TOKEN" ] + token_found = False if not (access_token := HfFolder.get_token()): print(f"Huggingface token not found in cache.") @@ -264,13 +266,16 @@ def authenticate(yes_to_all=False): print(f"Huggingface token found in cache.") try: HfLogin(access_token) + token_found = True except ValueError: print(f"Login failed due to invalid token found in cache") - if not yes_to_all: - print(''' -You may optionally enter your Huggingface token now. InvokeAI *will* work without it, but some functionality may be limited. -See https://invoke-ai.github.io/InvokeAI/features/CONCEPTS/#using-a-hugging-face-concept for more information. + if not (yes_to_all or token_found): + print(''' You may optionally enter your Huggingface token now. InvokeAI +*will* work without it but you will not be able to automatically +download some of the Hugging Face style concepts. See +https://invoke-ai.github.io/InvokeAI/features/CONCEPTS/#using-a-hugging-face-concept +for more information. Visit https://huggingface.co/settings/tokens to generate a token. (Sign up for an account if needed). @@ -325,27 +330,14 @@ def download_weight_datasets(models:dict, access_token:str): successful = dict() for mod in models.keys(): repo_id = Datasets[mod]['repo_id'] - filename = Datasets[mod]['file'] - dest = os.path.join(Globals.root,Model_dir,Weights_dir) - success = hf_download_with_resume( - repo_id=repo_id, - model_dir=dest, - model_name=filename, - access_token=access_token + model_class = StableDiffusionGeneratorPipeline if Datasets[mod]['format']=='diffusers' else AutoencoderKL + path = download_from_hf( + model_class, + repo_id, + safety_checker=None, ) - if success: - successful[mod] = True - if len(successful) < len(models): - print(f'\n\n** There were errors downloading one or more files. **') - print('Please double-check your license agreements, and your access token.') - HfFolder.delete_token() - print('Press any key to try again. Type ^C to quit.\n') - input() - return None - - HfFolder.save_token(access_token) - keys = ', '.join(successful.keys()) - print(f'Successfully installed {keys}') + if path: + successful[mod] = path return successful #--------------------------------------------- @@ -361,56 +353,6 @@ def is_huggingface_authenticated(): pass return False -#--------------------------------------------- -def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_token:str=None)->bool: - model_dest = os.path.join(model_dir, model_name) - os.makedirs(model_dir, exist_ok=True) - - url = hf_hub_url(repo_id, model_name) - - header = {"Authorization": f'Bearer {access_token}'} if access_token else {} - open_mode = 'wb' - exist_size = 0 - - if os.path.exists(model_dest): - exist_size = os.path.getsize(model_dest) - header['Range'] = f'bytes={exist_size}-' - open_mode = 'ab' - - resp = requests.get(url, headers=header, stream=True) - total = int(resp.headers.get('content-length', 0)) - - if resp.status_code==416: # "range not satisfiable", which means nothing to return - print(f'* {model_name}: complete file found. Skipping.') - return True - elif resp.status_code != 200: - print(f'** An error occurred during downloading {model_name}: {resp.reason}') - elif exist_size > 0: - print(f'* {model_name}: partial file found. Resuming...') - else: - print(f'* {model_name}: Downloading...') - - try: - if total < 2000: - print(f'*** ERROR DOWNLOADING {model_name}: {resp.text}') - return False - - with open(model_dest, open_mode) as file, tqdm( - desc=model_name, - initial=exist_size, - total=total+exist_size, - unit='iB', - unit_scale=True, - unit_divisor=1000, - ) as bar: - for data in resp.iter_content(chunk_size=1024): - size = file.write(data) - bar.update(size) - except Exception as e: - print(f'An error occurred while downloading {model_name}: {str(e)}') - return False - return True - #--------------------------------------------- def download_with_progress_bar(model_url:str, model_dest:str, label:str='the'): try: @@ -427,59 +369,6 @@ def download_with_progress_bar(model_url:str, model_dest:str, label:str='the'): print(f'Error downloading {label} model') print(traceback.format_exc()) - -#--------------------------------------------- -def download_diffusers(models: Dict, full_precision: bool): - # This is a minimal implementation until https://github.com/invoke-ai/InvokeAI/pull/1490 lands, - # which moves a bunch of stuff. - # We can be more complete after we know it won't be all merge conflicts. - diffusers_repos = { - 'CompVis/stable-diffusion-v1-4-original': 'CompVis/stable-diffusion-v1-4', - 'runwayml/stable-diffusion-v1-5': 'runwayml/stable-diffusion-v1-5', - 'runwayml/stable-diffusion-inpainting': 'runwayml/stable-diffusion-inpainting', - 'hakurei/waifu-diffusion-v1-3': 'hakurei/waifu-diffusion' - } - vae_repos = { - 'stabilityai/sd-vae-ft-mse-original': 'stabilityai/sd-vae-ft-mse', - } - precision_args = {} - if not full_precision: - precision_args.update(revision='fp16') - - for model_name, model in models.items(): - repo_id = model['repo_id'] - if repo_id in vae_repos: - print(f" * Downloading diffusers VAE {model_name}...") - # TODO: can we autodetect when a repo has no fp16 revision? - AutoencoderKL.from_pretrained(repo_id) - elif repo_id not in diffusers_repos: - print(f" * Downloading diffusers {model_name}...") - StableDiffusionPipeline.from_pretrained(repo_id, **precision_args) - else: - warnings.warn(f" ⚠ FIXME: add diffusers repo for {repo_id}") - continue - - -def download_diffusers_in_config(config_path: Path, full_precision: bool): - # This is a minimal implementation until https://github.com/invoke-ai/InvokeAI/pull/1490 lands, - # which moves a bunch of stuff. - # We can be more complete after we know it won't be all merge conflicts. - if not is_huggingface_authenticated(): - print("*⚠ No Hugging Face access token; some downloads may be blocked.") - - precision = 'full' if full_precision else 'float16' - cache = ModelCache(OmegaConf.load(config_path), precision=precision, - device_type='cpu', max_loaded_models=1) - for model_name in cache.list_models(): - # TODO: download model without loading it. - # https://github.com/huggingface/diffusers/issues/1301 - model_config = cache.config[model_name] - if model_config.get('format') == 'diffusers': - print(f" * Downloading diffusers {model_name}...") - cache.get_model(model_name) - cache.offload_model(model_name) - - #--------------------------------------------- def update_config_file(successfully_downloaded:dict,opt:dict): config_file = opt.config_file or Default_config_file @@ -516,29 +405,24 @@ def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str default_selected = False for model in successfully_downloaded: - a = Datasets[model]['config'].split('/') - if a[0] != 'VAE': - continue - vae_target = a[1] if len(a)>1 else 'default' - vaes[vae_target] = Datasets[model]['file'] - - for model in successfully_downloaded: - if Datasets[model]['config'].startswith('VAE'): # skip VAE entries + # TODO: fix VAEs + if Datasets[model]['format']=='vae': continue stanza = conf[model] if model in conf else { } - stanza['description'] = Datasets[model]['description'] - stanza['weights'] = os.path.join(Model_dir,Weights_dir,Datasets[model]['file']) - stanza['config'] = os.path.normpath(os.path.join(SD_Configs, Datasets[model]['config'])) + stanza['repo_id'] = Datasets[model]['repo_id'] + stanza['format'] = 'diffusers' stanza['width'] = Datasets[model]['width'] stanza['height'] = Datasets[model]['height'] stanza.pop('default',None) # this will be set later - if vaes: - for target in vaes: - if re.search(target, model, flags=re.IGNORECASE): - stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes[target])) - else: - stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes['default'])) + + # FIX: handle the VAE + # if vaes: + # for target in vaes: + # if re.search(target, model, flags=re.IGNORECASE): + # stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes[target])) + # else: + # stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes['default'])) # BUG - the first stanza is always the default. User should select. if not default_selected: stanza['default'] = True @@ -557,12 +441,15 @@ def download_bert(): print('...success',file=sys.stderr) #--------------------------------------------- -def download_from_hf(model_class:object, model_name:str): +def download_from_hf(model_class:object, model_name:str, **kwargs): print('',file=sys.stderr) # to prevent tqdm from overwriting - return model_class.from_pretrained(model_name, - cache_dir=os.path.join(Globals.root,Model_dir,model_name), - resume_download=True + path = os.path.join(Globals.root,Model_dir,model_name) + model = model_class.from_pretrained(model_name, + cache_dir=path, + resume_download=True, + **kwargs, ) + return path if model else None #--------------------------------------------- def download_clip(): @@ -682,8 +569,8 @@ def download_weights(opt:dict) -> Union[str, None]: else: # 'skip' return - access_token = authenticate() + HfFolder.save_token(access_token) print('\n** DOWNLOADING WEIGHTS **') successfully_downloaded = download_weight_datasets(models, access_token) @@ -859,11 +746,6 @@ def main(): else: print('** DOWNLOADING DIFFUSION WEIGHTS **') errors.add(download_weights(opt)) - config_path = Path(Globals.root, opt.config_file or Default_config_file) - if config_path.exists(): - download_diffusers_in_config(config_path, full_precision=opt.full_precision) - else: - print(f"*⚠ No config file found; downloading no weights. Looked in {config_path}") print('\n** DOWNLOADING SUPPORT MODELS **') download_bert() download_clip() From 296bd013612a4aace159ebda18740f41f842f9d9 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 21 Dec 2022 02:10:03 +0000 Subject: [PATCH 080/199] allow non-local files during development --- ldm/invoke/model_cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 1c057ac9058..7cfb6f034ec 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -370,7 +370,7 @@ def _load_diffusers_model(self, mconfig): # code either way, so we can table it for now. safety_checker=None, cache_dir=cache_dir, - local_files_only=True, +# local_files_only=True, **pipeline_args, ) pipeline.to(self.device) From b29dcc7ab682c6d9474e873351db49ff2be73ad1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 21 Dec 2022 04:36:30 +0000 Subject: [PATCH 081/199] path takes priority over repo_id --- ldm/generate.py | 1 - ldm/invoke/model_cache.py | 42 +++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index a0de4a97837..cea0ee05c33 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -40,7 +40,6 @@ from ldm.invoke.pngwriter import PngWriter from ldm.invoke.seamless import configure_model_padding from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale -from ldm.invoke.concepts_lib import Concepts from ldm.invoke.generator.inpaint import infill_methods from ldm.models.diffusion.ddim import DDIMSampler diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 7cfb6f034ec..dcca49eea60 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -319,20 +319,24 @@ def _load_ckpt_model(self, model_name, mconfig): def _load_diffusers_model(self, mconfig): pipeline_args = {} - - # look for local files first, then use the hugging face cache mechanism - if 'repo_id' in mconfig: # "repo_name" => "repo_id" in order to be consistent with Hugging Face convention - name = mconfig['repo_id'] + if 'path' in mconfig: + name_or_path = Path(mconfig['path']) + if not name_or_path.is_absolute(): + name_or_path = Path(Globals.root, name_or_path).resolve() + # FIXME: What should the model_hash be? A hash of the unet weights? Of all files of all + # the submodels hashed together? The commit ID from the repo? + model_hash = "FIXME TOO" + elif 'repo_id' in mconfig: + name_or_path = mconfig['repo_id'] model_hash = "FIXME" # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash - elif 'repo_name' in mconfig: # to avoid breakage - print('** "repo_name" is deprecated in models.yaml. Please use "repo_id"') - name = mconfig['repo_id'] + elif 'repo_name' in mconfig: + name_or_path = mconfig['repo_name'] model_hash = "FIXME" + # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash else: - raise ValueError("Model config must specify repo_id.") - - print(f'>> Loading diffusers model from {name}') + raise ValueError("Model config must specify either repo_name or path.") + print(f'>> Loading diffusers model from {name_or_path}') # TODO: scan weights maybe? @@ -340,7 +344,8 @@ def _load_diffusers_model(self, mconfig): vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) - cache_dir = os.path.join(Globals.root,'models',name) + # FIX: if name is a path, this is not right + cache_dir = os.path.join(Globals.root,'models',name_or_path) if type(name_or_path) == str else None # FIX: this commented block is causing the cache read to fail in from_pretrained() # if self.precision == 'float16': # print(' | Using faster float16 precision') @@ -362,7 +367,7 @@ def _load_diffusers_model(self, mconfig): # print(' | Using more accurate float32 precision') pipeline = StableDiffusionGeneratorPipeline.from_pretrained( - name, + name_or_path, # TODO: Safety checker is currently handled at a different stage in the code: # ldm.invoke.generator.base.Generator.safety_check # We might want to move that here for consistency with diffusers API, or we might @@ -385,18 +390,17 @@ def model_name_or_path(self, model_name:str) -> str | Path: raise ValueError(f'"{model_name}" is not a known model name. Please check your models.yaml file') mconfig = self.config[model_name] - if 'repo_id' in mconfig: - return mconfig['repo_id'] - elif 'repo_name' in mconfig: - return mconfig['repo_name'] - elif 'path' in mconfig: - assert f'there should be no paths in {mconfig}' + if 'path' in mconfig: path = Path(mconfig['path']) if not path.is_absolute(): path = Path(Globals.root, path).resolve() return path + elif 'repo_id' in mconfig: + return mconfig['repo_id'] + elif 'repo_name' in mconfig: + return mconfig['repo_name'] else: - raise ValueError("Model config must specify either repo_name or path.") + raise ValueError("Model config must specify either repo_id or path.") def offload_model(self, model_name:str) -> None: ''' From 0e8072b46f7a1582ed1e7e3351701c591c2f9975 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 21 Dec 2022 19:19:18 +0000 Subject: [PATCH 082/199] MVP for model_cache and configure_invokeai - Feature complete (almost) - configure_invokeai.py downloads both .ckpt and diffuser models, along with their VAEs. Both types of download are controlled by a unified INITIAL_MODELS.yaml file. - model_cache can load both type of model and switches back and forth in CPU. No memory leaks detected TO DO: 1. I have not yet turned on the LocalOnly flag for diffuser models, so the code will check the Hugging Face repo for updates before using the locally cached models. This will break firewalled systems. I am thinking of putting in a global check for internet connectivity at startup time and setting the LocalOnly flag based on this. It would be good to check updates if there is connectivity. 2. I have not gone completely through INITIAL_MODELS.yaml to check which models are available as diffusers and which are not. So models like PaperCut and VoxelArt may not load properly. The runway and stability models are checked, as well as the Trinart models. 3. Add stanzas for SD 2.0 and 2.1 in INITIAL_MODELS.yaml REMAINING PROBLEMS NOT DIRECTLY RELATED TO MODEL_CACHE: 1. When loading a .ckpt file there are lots of messages like this: Warning! ldm.modules.attention.CrossAttention is no longer being maintained. Please use InvokeAICrossAttention instead. I'm not sure how to address this. 2. The ckpt models ***don't actually run*** due to the lack of special-case support for them in the generator objects. For example, here's the hard crash you get when you run txt2img against the legacy waifu-diffusion-1.3 model: ``` >> An error occurred: Traceback (most recent call last): File "/data/lstein/InvokeAI/ldm/invoke/CLI.py", line 140, in main main_loop(gen, opt) File "/data/lstein/InvokeAI/ldm/invoke/CLI.py", line 371, in main_loop gen.prompt2image( File "/data/lstein/InvokeAI/ldm/generate.py", line 496, in prompt2image results = generator.generate( File "/data/lstein/InvokeAI/ldm/invoke/generator/base.py", line 108, in generate image = make_image(x_T) File "/data/lstein/InvokeAI/ldm/invoke/generator/txt2img.py", line 33, in make_image pipeline_output = pipeline.image_from_embeddings( File "/home/lstein/invokeai/.venv/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1265, in __getattr__ raise AttributeError("'{}' object has no attribute '{}'".format( AttributeError: 'LatentDiffusion' object has no attribute 'image_from_embeddings' ``` 3. The inpainting diffusion model isn't working. Here's the output of "banana sushi" when inpainting-1.5 is loaded: ``` Traceback (most recent call last): File "/data/lstein/InvokeAI/ldm/generate.py", line 496, in prompt2image results = generator.generate( File "/data/lstein/InvokeAI/ldm/invoke/generator/base.py", line 108, in generate image = make_image(x_T) File "/data/lstein/InvokeAI/ldm/invoke/generator/txt2img.py", line 33, in make_image pipeline_output = pipeline.image_from_embeddings( File "/data/lstein/InvokeAI/ldm/invoke/generator/diffusers_pipeline.py", line 301, in image_from_embeddings result_latents, result_attention_map_saver = self.latents_from_embeddings( File "/data/lstein/InvokeAI/ldm/invoke/generator/diffusers_pipeline.py", line 330, in latents_from_embeddings result: PipelineIntermediateState = infer_latents_from_embeddings( File "/data/lstein/InvokeAI/ldm/invoke/generator/diffusers_pipeline.py", line 185, in __call__ for result in self.generator_method(*args, **kwargs): File "/data/lstein/InvokeAI/ldm/invoke/generator/diffusers_pipeline.py", line 367, in generate_latents_from_embeddings step_output = self.step(batched_t, latents, guidance_scale, File "/home/lstein/invokeai/.venv/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context return func(*args, **kwargs) File "/data/lstein/InvokeAI/ldm/invoke/generator/diffusers_pipeline.py", line 409, in step step_output = self.scheduler.step(noise_pred, timestep, latents, **extra_step_kwargs) File "/home/lstein/invokeai/.venv/lib/python3.9/site-packages/diffusers/schedulers/scheduling_lms_discrete.py", line 223, in step pred_original_sample = sample - sigma * model_output RuntimeError: The size of tensor a (9) must match the size of tensor b (4) at non-singleton dimension 1 ``` --- configs/INITIAL_MODELS.yaml | 47 +++++---- ldm/invoke/model_cache.py | 178 ++++++++++++++++------------------ ldm/models/diffusion/ddpm.py | 6 +- scripts/configure_invokeai.py | 132 ++++++++++++++++++++----- 4 files changed, 221 insertions(+), 142 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index 4139c67d58b..68be4ca6801 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -3,6 +3,8 @@ stable-diffusion-1.5: repo_id: runwayml/stable-diffusion-v1-5 format: diffusers recommended: True + vae: + repo_id: stabilityai/sd-vae-ft-mse width: 512 height: 512 inpainting-1.5: @@ -12,25 +14,24 @@ inpainting-1.5: recommended: True width: 512 height: 512 -ft-mse-improved-autoencoder-840000: - description: StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) - repo_id: stabilityai/sd-vae-ft-mse - recommended: True - format: vae - width: 512 - height: 512 stable-diffusion-1.4: description: The original Stable Diffusion version 1.4 weight file (4.27 GB) repo_id: CompVis/stable-diffusion-v-1-4-original recommended: False format: diffusers + vae: + repo_id: stabilityai/sd-vae-ft-mse width: 512 height: 512 waifu-diffusion-1.3: description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27 GB) repo_id: hakurei/waifu-diffusion-v1-3 - format: diffusers - recommended: False + config: v1-inference.yaml + file: model-epoch09-float32.ckpt + format: ckpt + vae: + repo_id: stabilityai/sd-vae-ft-mse-original + file: vae-ft-mse-840000-ema-pruned.ckpt width: 512 height: 512 trinart-2.0: @@ -38,19 +39,19 @@ trinart-2.0: repo_id: naclbit/trinart_stable_diffusion_v2 format: diffusers recommended: False + vae: + repo_id: stabilityai/sd-vae-ft-mse width: 512 height: 512 -trinart_characters-1.0: - description: An SD model finetuned with 19.2M anime/manga style images (2.13 GB) - repo_id: naclbit/trinart_characters_19.2m_stable_diffusion_v1 - format: diffusers - recommended: False - width: 512 - height: 512 -trinart_vae: - description: Custom autoencoder for trinart_characters - repo_id: naclbit/trinart_characters_19.2m_stable_diffusion_v1 - format: vae +trinart_characters-2.0: + description: An SD model finetuned with 19.2M anime/manga style images (4.27 GB) + repo_id: naclbit/trinart_derrida_characters_v2_stable_diffusion + config: v1-inference.yaml + file: derrida_final.ckpt + format: ckpt + vae: + repo_id: naclbit/trinart_derrida_characters_v2_stable_diffusion + file: autoencoder_fix_kl-f8-trinart_characters.ckpt recommended: False width: 512 height: 512 @@ -58,6 +59,8 @@ papercut-1.0: description: SD 1.5 fine-tuned for papercut art (use "PaperCut" in your prompts) (2.13 GB) repo_id: Fictiverse/Stable_Diffusion_PaperCut_Model format: diffusers + vae: + repo_id: stabilityai/sd-vae-ft-mse recommended: False width: 512 height: 512 @@ -66,7 +69,9 @@ voxel_art-1.0: repo_id: Fictiverse/Stable_Diffusion_VoxelArt_Model config: v1-inference.yaml file: VoxelArt_v1.ckpt - format: diffusers + format: ckpt + vae: + repo_id: stabilityai/sd-vae-ft-mse recommended: False width: 512 height: 512 diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index dcca49eea60..867ce576caf 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -25,6 +25,7 @@ from huggingface_hub import hf_hub_download from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf +from omegaconf.dictconfig import DictConfig from omegaconf.errors import ConfigAttributeError from picklescan.scanner import scan_file_path @@ -319,77 +320,60 @@ def _load_ckpt_model(self, model_name, mconfig): def _load_diffusers_model(self, mconfig): pipeline_args = {} - if 'path' in mconfig: - name_or_path = Path(mconfig['path']) - if not name_or_path.is_absolute(): - name_or_path = Path(Globals.root, name_or_path).resolve() - # FIXME: What should the model_hash be? A hash of the unet weights? Of all files of all - # the submodels hashed together? The commit ID from the repo? - model_hash = "FIXME TOO" - elif 'repo_id' in mconfig: - name_or_path = mconfig['repo_id'] - model_hash = "FIXME" - # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash - elif 'repo_name' in mconfig: - name_or_path = mconfig['repo_name'] - model_hash = "FIXME" - # model_hash = huggingface_hub.get_hf_file_metadata(url).commit_hash - else: - raise ValueError("Model config must specify either repo_name or path.") + name_or_path = self.model_name_or_path(mconfig) + model_hash = 'FIXME' + using_fp16 = False print(f'>> Loading diffusers model from {name_or_path}') # TODO: scan weights maybe? - if 'vae' in mconfig: vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) - # FIX: if name is a path, this is not right - cache_dir = os.path.join(Globals.root,'models',name_or_path) if type(name_or_path) == str else None - # FIX: this commented block is causing the cache read to fail in from_pretrained() - # if self.precision == 'float16': - # print(' | Using faster float16 precision') - - # if not isinstance(name, Path): - # # hub has no explicit API for different data types, but the main Stable Diffusion - # # releases set a precedent for putting float16 weights in a fp16 branch. - # try: - # hf_hub_download(name, "model_index.json", revision="fp16", cache_dir=cache_dir) - # except RevisionNotFoundError: - # pass # no such branch, assume we should use the default. - # else: - # pipeline_args.update(revision="fp16") - - # pipeline_args.update(torch_dtype=torch.float16) - # else: - # # TODO: more accurately, "using the model's default precision." - # # How do we find out what that is? - # print(' | Using more accurate float32 precision') - - pipeline = StableDiffusionGeneratorPipeline.from_pretrained( - name_or_path, - # TODO: Safety checker is currently handled at a different stage in the code: - # ldm.invoke.generator.base.Generator.safety_check - # We might want to move that here for consistency with diffusers API, or we might - # want to leave it as a separate processing node. It ends up using the same diffusers - # code either way, so we can table it for now. - safety_checker=None, - cache_dir=cache_dir, -# local_files_only=True, - **pipeline_args, - ) + cache_dir = None if isinstance(name_or_path,Path) else os.path.join(Globals.root,'models',name_or_path) + if self.precision == 'float16': + print(' | Using faster float16 precision') + using_fp16 = True + pipeline_args.update(revision="fp16") + pipeline_args.update(torch_dtype=torch.float16) + else: + # TODO: more accurately, "using the model's default precision." + # How do we find out what that is? + print(' | Using more accurate float32 precision') + try: + pipeline = StableDiffusionGeneratorPipeline.from_pretrained( + name_or_path, + safety_checker=None, + cache_dir=cache_dir, + # local_files_only=True, + **pipeline_args, + ) + except Exception as e: + if using_fp16: + pipeline_args.pop('revision') + pipeline = StableDiffusionGeneratorPipeline.from_pretrained( + name_or_path, + safety_checker=None, + cache_dir=cache_dir, + # local_files_only=True, + **pipeline_args, + ) + pipeline.to(self.device) - width = pipeline.vae.block_out_channels[-1] + width = pipeline.vae.block_out_channels[-2] height = pipeline.vae.block_out_channels[-1] return pipeline, width, height, model_hash - def model_name_or_path(self, model_name:str) -> str | Path: - if model_name not in self.config: + def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path: + if isinstance(model_name,DictConfig): + mconfig = model_name + elif model_name in self.config: + mconfig = self.config[model_name] + else: raise ValueError(f'"{model_name}" is not a known model name. Please check your models.yaml file') - mconfig = self.config[model_name] if 'path' in mconfig: path = Path(mconfig['path']) if not path.is_absolute(): @@ -484,27 +468,36 @@ def _invalidate_cached_model(self,model_name:str) -> None: self.models.pop(model_name,None) def _model_to_cpu(self,model): - if self.device != 'cpu': - try: - model.cond_stage_model.device = 'cpu' - model.first_stage_model.to('cpu') - model.cond_stage_model.to('cpu') - model.model.to('cpu') - except AttributeError as e: - warnings.warn(f"TODO: clean up legacy model-management: {e}") - return model.to('cpu') - else: + if self.device == 'cpu': return model + # diffusers really really doesn't like us moving a float16 model onto CPU + import logging + logging.getLogger('diffusers.pipeline_utils').setLevel(logging.CRITICAL) + model.cond_stage_model.device = 'cpu' + model.to('cpu') + logging.getLogger('pipeline_utils').setLevel(logging.INFO) + + for submodel in ('first_stage_model','cond_stage_model','model'): + try: + getattr(model,submodel).to('cpu') + except AttributeError: + pass + return model + def _model_from_cpu(self,model): - if self.device != 'cpu': - model.to(self.device) + if self.device == 'cpu': + return model + + model.to(self.device) + model.cond_stage_model.device = self.device + + for submodel in ('first_stage_model','cond_stage_model','model'): try: - model.first_stage_model.to(self.device) - model.cond_stage_model.to(self.device) - model.cond_stage_model.device = self.device - except AttributeError as e: - warnings.warn(f"TODO: clean up legacy model-management: {e}") + getattr(model,submodel).to(self.device) + except AttributeError: + pass + return model def _pop_oldest_model(self): @@ -552,40 +545,31 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: def _load_vae(self, vae_config): vae_args = {} - - if 'path' in vae_config: - name_or_path = Path(vae_config['path']) - if not name_or_path.is_absolute(): - name_or_path = Path(Globals.root, name_or_path).resolve() - elif 'repo_id' in vae_config: - name_or_path = vae_config['repo_id'] - elif 'repo_name' in vae_config: - print('** "repo_name" is deprecated in models.yaml. Use "repo_id" instead.') - name_or_path = vae_config['repo_name'] - else: - raise ValueError("VAE config must specify either repo_id or path.") + name_or_path = self.model_name_or_path(vae_config) + using_fp16 = False print(f'>> Loading diffusers VAE from {name_or_path}') if self.precision == 'float16': print(' | Using faster float16 precision') - - if not isinstance(name_or_path, Path): - try: - hf_hub_download(name_or_path, "model_index.json", revision="fp16") - except RevisionNotFoundError: - pass - else: - vae_args.update(revision="fp16") - + using_fp16 = True + vae_args.update(revision="fp16") vae_args.update(torch_dtype=torch.float16) else: # TODO: more accurately, "using the model's default precision." # How do we find out what that is? print(' | Using more accurate float32 precision') + # At some point we might need to be able to use different classes here? But for now I think + # all Stable Diffusion VAE are AutoencoderKL. + try: + vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args) + except Exception as e: + if using_fp16: + vae_args.pop('revision') + vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args) + + # comment by lstein: I don't know what this does if 'subfolder' in vae_config: vae_args['subfolder'] = vae_config['subfolder'] - # At some point we might need to be able to use different classes here? But for now I think - # all Stable Diffusion VAE are AutoencoderKL. - return AutoencoderKL.from_pretrained(name_or_path, **vae_args) + return vae diff --git a/ldm/models/diffusion/ddpm.py b/ldm/models/diffusion/ddpm.py index d9fa762f0b9..7c7ba9f5fdf 100644 --- a/ldm/models/diffusion/ddpm.py +++ b/ldm/models/diffusion/ddpm.py @@ -679,7 +679,11 @@ def __init__( self.embedding_manager = self.instantiate_embedding_manager( personalization_config, self.cond_stage_model ) - self.textual_inversion_manager = TextualInversionManager(self.cond_stage_model, full_precision=True) + self.textual_inversion_manager = TextualInversionManager( + tokenizer = self.cond_stage_model.tokenizer, + text_encoder = self.cond_stage_model.transformer, + full_precision = True + ) # this circular component dependency is gross and bad, needs to be rethought self.cond_stage_model.set_textual_inversion_manager(self.textual_inversion_manager) diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 40f24585030..edc1957b12c 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -25,7 +25,9 @@ from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline from getpass_asterisk import getpass_asterisk from huggingface_hub import HfFolder, hf_hub_url, login as hf_hub_login, whoami as hf_whoami +from huggingface_hub.utils._errors import RevisionNotFoundError from omegaconf import OmegaConf +from omegaconf.dictconfig import DictConfig from tqdm import tqdm from transformers import CLIPTokenizer, CLIPTextModel @@ -158,9 +160,10 @@ def select_datasets(action:str): ''' ) for ds in Datasets.keys(): - recommended = '(recommended)' if Datasets[ds]['recommended'] else '' - print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {recommended}') - if yes_or_no(' Download?',default_yes=Datasets[ds]['recommended']): + recommended = Datasets[ds].get('recommended',False) + r_str = '(recommended)' if recommended else '' + print(f'[{counter}] {ds}:\n {Datasets[ds]["description"]} {r_str}') + if yes_or_no(' Download?',default_yes=recommended): datasets[ds]=counter counter += 1 else: @@ -329,17 +332,101 @@ def download_weight_datasets(models:dict, access_token:str): migrate_models_ckpt() successful = dict() for mod in models.keys(): - repo_id = Datasets[mod]['repo_id'] - model_class = StableDiffusionGeneratorPipeline if Datasets[mod]['format']=='diffusers' else AutoencoderKL + print(f'{mod}...',file=sys.stderr,end='') + successful[mod] = _download_repo_or_file(Datasets[mod], access_token) + return successful + +def _download_repo_or_file(mconfig:DictConfig, access_token:str)->Path: + path = None + if mconfig['format'] == 'ckpt': + path = _download_ckpt_weights(mconfig, access_token) + else: + path = _download_diffusion_weights(mconfig, access_token) + return path + +def _download_ckpt_weights(mconfig:DictConfig, access_token:str)->Path: + repo_id = mconfig['repo_id'] + filename = mconfig['file'] + cache_dir = os.path.join(Globals.root, Model_dir, Weights_dir) + return hf_download_with_resume( + repo_id=repo_id, + model_dir=cache_dir, + model_name=filename, + access_token=access_token + ) + +def _download_diffusion_weights(mconfig:DictConfig, access_token:str): + repo_id = mconfig['repo_id'] + model_class = StableDiffusionGeneratorPipeline if mconfig['format']=='diffusers' else AutoencoderKL + path = None + try: path = download_from_hf( model_class, repo_id, safety_checker=None, + revision='fp16', ) - if path: - successful[mod] = path - return successful + except OSError: + path = download_from_hf( + model_class, + repo_id, + safety_checker=None, + ) + except Exception as e: + print(f'could not download; exception = {type(e)}') + return path + +#--------------------------------------------- +def hf_download_with_resume(repo_id:str, model_dir:str, model_name:str, access_token:str=None)->Path: + model_dest = Path(os.path.join(model_dir, model_name)) + os.makedirs(model_dir, exist_ok=True) + + url = hf_hub_url(repo_id, model_name) + + header = {"Authorization": f'Bearer {access_token}'} if access_token else {} + open_mode = 'wb' + exist_size = 0 + + if os.path.exists(model_dest): + exist_size = os.path.getsize(model_dest) + header['Range'] = f'bytes={exist_size}-' + open_mode = 'ab' + + resp = requests.get(url, headers=header, stream=True) + total = int(resp.headers.get('content-length', 0)) + + if resp.status_code==416: # "range not satisfiable", which means nothing to return + print(f'* {model_name}: complete file found. Skipping.') + return model_dest + elif resp.status_code != 200: + print(f'** An error occurred during downloading {model_name}: {resp.reason}') + elif exist_size > 0: + print(f'* {model_name}: partial file found. Resuming...') + else: + print(f'* {model_name}: Downloading...') + try: + if total < 2000: + print(f'*** ERROR DOWNLOADING {model_name}: {resp.text}') + return None + + with open(model_dest, open_mode) as file, tqdm( + desc=model_name, + initial=exist_size, + total=total+exist_size, + unit='iB', + unit_scale=True, + unit_divisor=1000, + ) as bar: + for data in resp.iter_content(chunk_size=1024): + size = file.write(data) + bar.update(size) + except Exception as e: + print(f'An error occurred while downloading {model_name}: {str(e)}') + return None + return model_dest + +# ----------------------------------------------------------------------------------- #--------------------------------------------- def is_huggingface_authenticated(): # huggingface_hub 0.10 API isn't great for this, it could be OSError, ValueError, @@ -405,24 +492,23 @@ def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str default_selected = False for model in successfully_downloaded: - # TODO: fix VAEs - if Datasets[model]['format']=='vae': - continue stanza = conf[model] if model in conf else { } - stanza['description'] = Datasets[model]['description'] - stanza['repo_id'] = Datasets[model]['repo_id'] - stanza['format'] = 'diffusers' - stanza['width'] = Datasets[model]['width'] - stanza['height'] = Datasets[model]['height'] + mod = Datasets[model] + stanza['description'] = mod['description'] + stanza['repo_id'] = mod['repo_id'] + stanza['format'] = mod['format'] + stanza['width'] = mod['width'] + stanza['height'] = mod['height'] + if 'file' in mod: + stanza['weights'] = os.path.relpath(successfully_downloaded[model], start=Globals.root) + stanza['config'] = os.path.normpath(os.path.join(SD_Configs,mod['config'])) + if 'vae' in mod: + if 'file' in mod['vae']: + stanza['vae'] = os.path.normpath(os.path.join(Model_dir, Weights_dir,mod['vae']['file'])) + else: + stanza['vae'] = mod['vae'] stanza.pop('default',None) # this will be set later - # FIX: handle the VAE - # if vaes: - # for target in vaes: - # if re.search(target, model, flags=re.IGNORECASE): - # stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes[target])) - # else: - # stanza['vae'] = os.path.normpath(os.path.join(Model_dir,Weights_dir,vaes['default'])) # BUG - the first stanza is always the default. User should select. if not default_selected: stanza['default'] = True From ebbebd6f0827f4824ac3707f1101f05584bd6342 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 22 Dec 2022 18:01:47 +0000 Subject: [PATCH 083/199] proper support for float32/float16 - configure script now correctly detects user's preference for fp16/32 and downloads the correct diffuser version. If fp16 version not available, falls back to fp32 version. - misc code cleanup and simplification in model_cache --- configs/INITIAL_MODELS.yaml | 7 ++- ldm/invoke/model_cache.py | 89 +++++++++++++++++++---------------- scripts/configure_invokeai.py | 56 ++++++++++++---------- 3 files changed, 86 insertions(+), 66 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index 68be4ca6801..feea63065ed 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -1,5 +1,10 @@ +stable-diffusion-2.1: + description: Stable Diffusion version 2.1 diffusers model (5.21 GB) + repo_id: stabilityai/stable-diffusion-2-1 + format: diffusers + recommended: True stable-diffusion-1.5: - description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) + description: Stable Diffusion version 1.5 weight file (4.27 GB) repo_id: runwayml/stable-diffusion-v1-5 format: diffusers recommended: True diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 867ce576caf..a67da0ef5e5 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -319,45 +319,49 @@ def _load_ckpt_model(self, model_name, mconfig): return model, width, height, model_hash def _load_diffusers_model(self, mconfig): - pipeline_args = {} name_or_path = self.model_name_or_path(mconfig) model_hash = 'FIXME' - using_fp16 = False + using_fp16 = self.precision == 'float16' + print(f'>> Loading diffusers model from {name_or_path}') + if using_fp16: + print(f' | Using faster float16 precision') + else: + print(f' | Using more accurate float32 precision') # TODO: scan weights maybe? + pipeline_args = dict( + safety_checker=None, + #local_files_only=True + ) if 'vae' in mconfig: vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) - - cache_dir = None if isinstance(name_or_path,Path) else os.path.join(Globals.root,'models',name_or_path) - if self.precision == 'float16': - print(' | Using faster float16 precision') - using_fp16 = True - pipeline_args.update(revision="fp16") + if not isinstance(name_or_path,Path): + pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) + if using_fp16: pipeline_args.update(torch_dtype=torch.float16) + fp_args_list = [{'revision':'fp16'},{}] else: - # TODO: more accurately, "using the model's default precision." - # How do we find out what that is? - print(' | Using more accurate float32 precision') - try: - pipeline = StableDiffusionGeneratorPipeline.from_pretrained( - name_or_path, - safety_checker=None, - cache_dir=cache_dir, - # local_files_only=True, - **pipeline_args, - ) - except Exception as e: - if using_fp16: - pipeline_args.pop('revision') + fp_args_list = [{}] + + pipeline = None + for fp_args in fp_args_list: + try: pipeline = StableDiffusionGeneratorPipeline.from_pretrained( name_or_path, - safety_checker=None, - cache_dir=cache_dir, - # local_files_only=True, **pipeline_args, + **fp_args, ) + except OSError as e: + if str(e).startswith('fp16 is not a valid'): + print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') + else: + print(f'An unexpected error occurred while downloading the model: {e})') + if pipeline: + break + + assert pipeline is not None, OSError(f'"{model_name}" could not be loaded') pipeline.to(self.device) @@ -546,27 +550,30 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: def _load_vae(self, vae_config): vae_args = {} name_or_path = self.model_name_or_path(vae_config) - using_fp16 = False + using_fp16 = self.precision == 'float16' print(f'>> Loading diffusers VAE from {name_or_path}') - if self.precision == 'float16': - print(' | Using faster float16 precision') - using_fp16 = True - vae_args.update(revision="fp16") + if using_fp16: + print(f' | Using faster float16 precision') vae_args.update(torch_dtype=torch.float16) + fp_args_list = [{'revision':'fp16'},{}] else: - # TODO: more accurately, "using the model's default precision." - # How do we find out what that is? - print(' | Using more accurate float32 precision') + print(f' | Using more accurate float32 precision') + fp_args_list = [{}] - # At some point we might need to be able to use different classes here? But for now I think - # all Stable Diffusion VAE are AutoencoderKL. - try: - vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args) - except Exception as e: - if using_fp16: - vae_args.pop('revision') - vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args) + vae = None + for fp_args in fp_args_list: + # At some point we might need to be able to use different classes here? But for now I think + # all Stable Diffusion VAE are AutoencoderKL. + try: + vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args, **fp_args) + except OSError as e: + if str(e).startswith('fp16 is not a valid'): + print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') + else: + print(f'An unexpected error occurred while downloading the model: {e})') + if vae: + break # comment by lstein: I don't know what this does if 'subfolder' in vae_config: diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index edc1957b12c..c26359cc357 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -23,6 +23,7 @@ import transformers from diffusers import StableDiffusionPipeline, AutoencoderKL from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline +from ldm.invoke.devices import choose_precision, choose_torch_device from getpass_asterisk import getpass_asterisk from huggingface_hub import HfFolder, hf_hub_url, login as hf_hub_login, whoami as hf_whoami from huggingface_hub.utils._errors import RevisionNotFoundError @@ -328,20 +329,20 @@ def migrate_models_ckpt(): os.replace(os.path.join(model_path,'model.ckpt'),os.path.join(model_path,new_name)) #--------------------------------------------- -def download_weight_datasets(models:dict, access_token:str): +def download_weight_datasets(models:dict, access_token:str, precision:str='float32'): migrate_models_ckpt() successful = dict() for mod in models.keys(): print(f'{mod}...',file=sys.stderr,end='') - successful[mod] = _download_repo_or_file(Datasets[mod], access_token) + successful[mod] = _download_repo_or_file(Datasets[mod], access_token, precision=precision) return successful -def _download_repo_or_file(mconfig:DictConfig, access_token:str)->Path: +def _download_repo_or_file(mconfig:DictConfig, access_token:str, precision:str='float32')->Path: path = None if mconfig['format'] == 'ckpt': path = _download_ckpt_weights(mconfig, access_token) else: - path = _download_diffusion_weights(mconfig, access_token) + path = _download_diffusion_weights(mconfig, access_token, precision=precision) return path def _download_ckpt_weights(mconfig:DictConfig, access_token:str)->Path: @@ -355,25 +356,26 @@ def _download_ckpt_weights(mconfig:DictConfig, access_token:str)->Path: access_token=access_token ) -def _download_diffusion_weights(mconfig:DictConfig, access_token:str): +def _download_diffusion_weights(mconfig:DictConfig, access_token:str, precision:str='float32'): repo_id = mconfig['repo_id'] model_class = StableDiffusionGeneratorPipeline if mconfig['format']=='diffusers' else AutoencoderKL + extra_arg_list = [{'revision':'fp16'},{}] if precision=='float16' else [{}] path = None - try: - path = download_from_hf( - model_class, - repo_id, - safety_checker=None, - revision='fp16', - ) - except OSError: - path = download_from_hf( - model_class, - repo_id, - safety_checker=None, - ) - except Exception as e: - print(f'could not download; exception = {type(e)}') + for extra_args in extra_arg_list: + try: + path = download_from_hf( + model_class, + repo_id, + safety_checker=None, + **extra_args, + ) + except OSError as e: + if str(e).startswith('fp16 is not a valid'): + print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') + else: + print(f'An unexpected error occurred while downloading the model: {e})') + if path: + break return path #--------------------------------------------- @@ -497,8 +499,12 @@ def new_config_file_contents(successfully_downloaded:dict, config_file:str)->str stanza['description'] = mod['description'] stanza['repo_id'] = mod['repo_id'] stanza['format'] = mod['format'] - stanza['width'] = mod['width'] - stanza['height'] = mod['height'] + # diffusers don't need width and height (probably .ckpt doesn't either) + # so we no longer require these in INITIAL_MODELS.yaml + if 'width' in mod: + stanza['width'] = mod['width'] + if 'height' in mod: + stanza['height'] = mod['height'] if 'file' in mod: stanza['weights'] = os.path.relpath(successfully_downloaded[model], start=Globals.root) stanza['config'] = os.path.normpath(os.path.join(SD_Configs,mod['config'])) @@ -633,11 +639,13 @@ def download_safety_checker(): #------------------------------------- def download_weights(opt:dict) -> Union[str, None]: + precision = 'float32' if opt.full_precision else choose_precision(torch.device(choose_torch_device())) + if opt.yes_to_all: models = recommended_datasets() access_token = authenticate(opt.yes_to_all) if len(models)>0: - successfully_downloaded = download_weight_datasets(models, access_token) + successfully_downloaded = download_weight_datasets(models, access_token, precision=precision) update_config_file(successfully_downloaded,opt) return @@ -659,7 +667,7 @@ def download_weights(opt:dict) -> Union[str, None]: HfFolder.save_token(access_token) print('\n** DOWNLOADING WEIGHTS **') - successfully_downloaded = download_weight_datasets(models, access_token) + successfully_downloaded = download_weight_datasets(models, access_token, precision=precision) update_config_file(successfully_downloaded,opt) if len(successfully_downloaded) < len(models): From c73112d12a347c0a6bba3ec0885650513c896a7b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 22 Dec 2022 20:03:29 +0000 Subject: [PATCH 084/199] add on-the-fly conversion of .ckpt to diffusers models 1. On-the-fly conversion code can be found in the file ldm/invoke/ckpt_to_diffusers.py. 2. A new !optimize command has been added to the CLI. Should be ported to Web GUI. User experience on the CLI is this: ``` invoke> !optimize /home/lstein/invokeai/models/ldm/stable-diffusion-v1/sd-v1-4.ckpt INFO: Converting legacy weights file /home/lstein/invokeai/models/ldm/stable-diffusion-v1/sd-v1-4.ckpt to optimized diffuser model. This operation will take 30-60s to complete. Success. Optimized model is now located at /home/lstein/tmp/invokeai/models/optimized-ckpts/sd-v1-4 Writing new config file entry for sd-v1-4... >> New configuration: sd-v1-4: description: Optimized version of sd-v1-4 format: diffusers path: /home/lstein/tmp/invokeai/models/optimized-ckpts/sd-v1-4 OK to import [n]? y >> Verifying that new model loads... >> Current VRAM usage: 2.60G >> Offloading stable-diffusion-2.1 to CPU >> Loading diffusers model from /home/lstein/tmp/invokeai/models/optimized-ckpts/sd-v1-4 | Using faster float16 precision You have disabled the safety checker for by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion \ license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances,\ disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 . | training width x height = (512 x 512) >> Model loaded in 3.48s >> Max VRAM used to load the model: 2.17G >> Current VRAM usage:2.17G >> Textual inversions available: >> Setting Sampler to k_lms (LMSDiscreteScheduler) Keep model loaded? [y] ``` --- configs/stable-diffusion/v2-inference-v.yaml | 68 ++ ldm/invoke/CLI.py | 50 +- ldm/invoke/args.py | 11 +- ldm/invoke/ckpt_to_diffuser.py | 943 +++++++++++++++++++ ldm/invoke/model_cache.py | 16 +- ldm/invoke/readline.py | 4 +- 6 files changed, 1079 insertions(+), 13 deletions(-) create mode 100644 configs/stable-diffusion/v2-inference-v.yaml create mode 100644 ldm/invoke/ckpt_to_diffuser.py diff --git a/configs/stable-diffusion/v2-inference-v.yaml b/configs/stable-diffusion/v2-inference-v.yaml new file mode 100644 index 00000000000..8ec8dfbfefe --- /dev/null +++ b/configs/stable-diffusion/v2-inference-v.yaml @@ -0,0 +1,68 @@ +model: + base_learning_rate: 1.0e-4 + target: ldm.models.diffusion.ddpm.LatentDiffusion + params: + parameterization: "v" + linear_start: 0.00085 + linear_end: 0.0120 + num_timesteps_cond: 1 + log_every_t: 200 + timesteps: 1000 + first_stage_key: "jpg" + cond_stage_key: "txt" + image_size: 64 + channels: 4 + cond_stage_trainable: false + conditioning_key: crossattn + monitor: val/loss_simple_ema + scale_factor: 0.18215 + use_ema: False # we set this to false because this is an inference only config + + unet_config: + target: ldm.modules.diffusionmodules.openaimodel.UNetModel + params: + use_checkpoint: True + use_fp16: True + image_size: 32 # unused + in_channels: 4 + out_channels: 4 + model_channels: 320 + attention_resolutions: [ 4, 2, 1 ] + num_res_blocks: 2 + channel_mult: [ 1, 2, 4, 4 ] + num_head_channels: 64 # need to fix for flash-attn + use_spatial_transformer: True + use_linear_in_transformer: True + transformer_depth: 1 + context_dim: 1024 + legacy: False + + first_stage_config: + target: ldm.models.autoencoder.AutoencoderKL + params: + embed_dim: 4 + monitor: val/rec_loss + ddconfig: + #attn_type: "vanilla-xformers" + double_z: true + z_channels: 4 + resolution: 256 + in_channels: 3 + out_ch: 3 + ch: 128 + ch_mult: + - 1 + - 2 + - 4 + - 4 + num_res_blocks: 2 + attn_resolutions: [] + dropout: 0.0 + lossconfig: + target: torch.nn.Identity + + cond_stage_config: + target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder + params: + freeze: True + layer: "penultimate" diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 09dc2193aca..c49349de322 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -460,6 +460,17 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: completer.add_history(command) operation = None + elif command.startswith('!optimize'): + path = shlex.split(command) + if len(path) < 2: + print('** please provide a path to a .ckpt file') + elif not os.path.exists(path[1]): + print(f'** {path[1]}: file not found') + else: + optimize_model(path[1], gen, opt, completer) + completer.add_history(command) + operation = None + elif command.startswith('!edit'): path = shlex.split(command) if len(path) < 2: @@ -532,6 +543,7 @@ def add_weights_to_config(model_path:str, gen, opt, completer): new_config = {} new_config['weights'] = model_path + new_config['format'] = 'ckpt' done = False while not done: @@ -579,6 +591,36 @@ def add_weights_to_config(model_path:str, gen, opt, completer): if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default): completer.add_model(model_name) +def optimize_model(model_path:str, gen, opt, completer): + from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser + import transformers + basename = os.path.basename(os.path.splitext(model_path)[0]) + dump_path = os.path.join(Globals.root, 'models','optimized-ckpts',basename) + if os.path.exists(dump_path): + print(f'ERROR: The path {dump_path} already exists. Please move or remove it and try again.') + return + + print(f'INFO: Converting legacy weights file {model_path} to optimized diffuser model.') + print(f' This operation will take 30-60s to complete.') + try: + verbosity =transformers.logging.get_verbosity() + transformers.logging.set_verbosity_error() + convert_ckpt_to_diffuser(model_path, dump_path) + transformers.logging.set_verbosity(verbosity) + print(f'Success. Optimized model is now located at {dump_path}') + print(f'Writing new config file entry for {basename}...') + model_name = basename + new_config = dict( + path=dump_path, + description=f'Optimized version of {basename}', + format='diffusers', + ) + if write_config_file(opt.conf, gen, model_name, new_config): + completer.add_model(model_name) + except Exception as e: + print(f'** Conversion failed: {str(e)}') + traceback.print_exc() + def del_config(model_name:str, gen, opt, completer): current_model = gen.model_name if model_name == current_model: @@ -601,7 +643,7 @@ def edit_config(model_name:str, gen, opt, completer): conf = config[model_name] new_config = {} completer.complete_extensions(('.yaml','.yml','.ckpt','.vae.pt')) - for field in ('description', 'weights', 'vae', 'config', 'width','height'): + for field in ('description', 'weights', 'vae', 'config', 'width', 'height', 'format'): completer.linebuffer = str(conf[field]) if field in conf else '' new_value = input(f'{field}: ') new_config[field] = int(new_value) if field in ('width','height') else new_value @@ -625,8 +667,12 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak gen.model_cache.add_model(model_name, new_config, clobber) assert gen.set_model(model_name) is not None, 'model failed to load' except AssertionError as e: + traceback.print_exc() print(f'** aborting **') - gen.model_cache.del_model(model_name) + try: + gen.model_cache.del_model(model_name) + except Exception: + pass return False if make_default: diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 7fa6e7a22c6..c7417900b69 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -724,11 +724,12 @@ def _create_dream_cmd_parser(self): !NN retrieves the NNth command from the history *Model manipulation* - !models -- list models in configs/models.yaml - !switch -- switch to model named - !import_model path/to/weights/file.ckpt -- adds a model to your config - !edit_model -- edit a model's description - !del_model -- delete a model + !models -- list models in configs/models.yaml + !switch -- switch to model named + !import_model path/to/weights/file.ckpt -- adds a .ckpt model to your config + !optimize_model path/to/weights/file.ckpt -- converts a .ckpt file model a diffusers model + !edit_model -- edit a model's description + !del_model -- delete a model """ ) render_group = parser.add_argument_group('General rendering') diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py new file mode 100644 index 00000000000..15702ccb35b --- /dev/null +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -0,0 +1,943 @@ +# coding=utf-8 +# Copyright 2022 The HuggingFace Inc. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Adapted for use as a module by Lincoln Stein +""" Conversion script for the LDM checkpoints. """ + +import os +import re +import torch +from ldm.invoke.globals import Globals + +try: + from omegaconf import OmegaConf +except ImportError: + raise ImportError( + "OmegaConf is required to convert the LDM checkpoints. Please install it with `pip install OmegaConf`." + ) + +from diffusers import ( + AutoencoderKL, + DDIMScheduler, + DPMSolverMultistepScheduler, + EulerAncestralDiscreteScheduler, + EulerDiscreteScheduler, + HeunDiscreteScheduler, + LDMTextToImagePipeline, + LMSDiscreteScheduler, + PNDMScheduler, + StableDiffusionPipeline, + UNet2DConditionModel, +) +from diffusers.pipelines.latent_diffusion.pipeline_latent_diffusion import LDMBertConfig, LDMBertModel +from diffusers.pipelines.paint_by_example import PaintByExampleImageEncoder, PaintByExamplePipeline +from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker +from transformers import AutoFeatureExtractor, BertTokenizerFast, CLIPTextModel, CLIPTokenizer, CLIPVisionConfig + +def shave_segments(path, n_shave_prefix_segments=1): + """ + Removes segments. Positive values shave the first segments, negative shave the last segments. + """ + if n_shave_prefix_segments >= 0: + return ".".join(path.split(".")[n_shave_prefix_segments:]) + else: + return ".".join(path.split(".")[:n_shave_prefix_segments]) + + +def renew_resnet_paths(old_list, n_shave_prefix_segments=0): + """ + Updates paths inside resnets to the new naming scheme (local renaming) + """ + mapping = [] + for old_item in old_list: + new_item = old_item.replace("in_layers.0", "norm1") + new_item = new_item.replace("in_layers.2", "conv1") + + new_item = new_item.replace("out_layers.0", "norm2") + new_item = new_item.replace("out_layers.3", "conv2") + + new_item = new_item.replace("emb_layers.1", "time_emb_proj") + new_item = new_item.replace("skip_connection", "conv_shortcut") + + new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments) + + mapping.append({"old": old_item, "new": new_item}) + + return mapping + + +def renew_vae_resnet_paths(old_list, n_shave_prefix_segments=0): + """ + Updates paths inside resnets to the new naming scheme (local renaming) + """ + mapping = [] + for old_item in old_list: + new_item = old_item + + new_item = new_item.replace("nin_shortcut", "conv_shortcut") + new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments) + + mapping.append({"old": old_item, "new": new_item}) + + return mapping + + +def renew_attention_paths(old_list, n_shave_prefix_segments=0): + """ + Updates paths inside attentions to the new naming scheme (local renaming) + """ + mapping = [] + for old_item in old_list: + new_item = old_item + + # new_item = new_item.replace('norm.weight', 'group_norm.weight') + # new_item = new_item.replace('norm.bias', 'group_norm.bias') + + # new_item = new_item.replace('proj_out.weight', 'proj_attn.weight') + # new_item = new_item.replace('proj_out.bias', 'proj_attn.bias') + + # new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments) + + mapping.append({"old": old_item, "new": new_item}) + + return mapping + + +def renew_vae_attention_paths(old_list, n_shave_prefix_segments=0): + """ + Updates paths inside attentions to the new naming scheme (local renaming) + """ + mapping = [] + for old_item in old_list: + new_item = old_item + + new_item = new_item.replace("norm.weight", "group_norm.weight") + new_item = new_item.replace("norm.bias", "group_norm.bias") + + new_item = new_item.replace("q.weight", "query.weight") + new_item = new_item.replace("q.bias", "query.bias") + + new_item = new_item.replace("k.weight", "key.weight") + new_item = new_item.replace("k.bias", "key.bias") + + new_item = new_item.replace("v.weight", "value.weight") + new_item = new_item.replace("v.bias", "value.bias") + + new_item = new_item.replace("proj_out.weight", "proj_attn.weight") + new_item = new_item.replace("proj_out.bias", "proj_attn.bias") + + new_item = shave_segments(new_item, n_shave_prefix_segments=n_shave_prefix_segments) + + mapping.append({"old": old_item, "new": new_item}) + + return mapping + + +def assign_to_checkpoint( + paths, checkpoint, old_checkpoint, attention_paths_to_split=None, additional_replacements=None, config=None +): + """ + This does the final conversion step: take locally converted weights and apply a global renaming + to them. It splits attention layers, and takes into account additional replacements + that may arise. + + Assigns the weights to the new checkpoint. + """ + assert isinstance(paths, list), "Paths should be a list of dicts containing 'old' and 'new' keys." + + # Splits the attention layers into three variables. + if attention_paths_to_split is not None: + for path, path_map in attention_paths_to_split.items(): + old_tensor = old_checkpoint[path] + channels = old_tensor.shape[0] // 3 + + target_shape = (-1, channels) if len(old_tensor.shape) == 3 else (-1) + + num_heads = old_tensor.shape[0] // config["num_head_channels"] // 3 + + old_tensor = old_tensor.reshape((num_heads, 3 * channels // num_heads) + old_tensor.shape[1:]) + query, key, value = old_tensor.split(channels // num_heads, dim=1) + + checkpoint[path_map["query"]] = query.reshape(target_shape) + checkpoint[path_map["key"]] = key.reshape(target_shape) + checkpoint[path_map["value"]] = value.reshape(target_shape) + + for path in paths: + new_path = path["new"] + + # These have already been assigned + if attention_paths_to_split is not None and new_path in attention_paths_to_split: + continue + + # Global renaming happens here + new_path = new_path.replace("middle_block.0", "mid_block.resnets.0") + new_path = new_path.replace("middle_block.1", "mid_block.attentions.0") + new_path = new_path.replace("middle_block.2", "mid_block.resnets.1") + + if additional_replacements is not None: + for replacement in additional_replacements: + new_path = new_path.replace(replacement["old"], replacement["new"]) + + # proj_attn.weight has to be converted from conv 1D to linear + if "proj_attn.weight" in new_path: + checkpoint[new_path] = old_checkpoint[path["old"]][:, :, 0] + else: + checkpoint[new_path] = old_checkpoint[path["old"]] + + +def conv_attn_to_linear(checkpoint): + keys = list(checkpoint.keys()) + attn_keys = ["query.weight", "key.weight", "value.weight"] + for key in keys: + if ".".join(key.split(".")[-2:]) in attn_keys: + if checkpoint[key].ndim > 2: + checkpoint[key] = checkpoint[key][:, :, 0, 0] + elif "proj_attn.weight" in key: + if checkpoint[key].ndim > 2: + checkpoint[key] = checkpoint[key][:, :, 0] + + +def create_unet_diffusers_config(original_config, image_size: int): + """ + Creates a config for the diffusers based on the config of the LDM model. + """ + unet_params = original_config.model.params.unet_config.params + vae_params = original_config.model.params.first_stage_config.params.ddconfig + + block_out_channels = [unet_params.model_channels * mult for mult in unet_params.channel_mult] + + down_block_types = [] + resolution = 1 + for i in range(len(block_out_channels)): + block_type = "CrossAttnDownBlock2D" if resolution in unet_params.attention_resolutions else "DownBlock2D" + down_block_types.append(block_type) + if i != len(block_out_channels) - 1: + resolution *= 2 + + up_block_types = [] + for i in range(len(block_out_channels)): + block_type = "CrossAttnUpBlock2D" if resolution in unet_params.attention_resolutions else "UpBlock2D" + up_block_types.append(block_type) + resolution //= 2 + + vae_scale_factor = 2 ** (len(vae_params.ch_mult) - 1) + + head_dim = unet_params.num_heads if "num_heads" in unet_params else None + use_linear_projection = ( + unet_params.use_linear_in_transformer if "use_linear_in_transformer" in unet_params else False + ) + if use_linear_projection: + # stable diffusion 2-base-512 and 2-768 + if head_dim is None: + head_dim = [5, 10, 20, 20] + + config = dict( + sample_size=image_size // vae_scale_factor, + in_channels=unet_params.in_channels, + out_channels=unet_params.out_channels, + down_block_types=tuple(down_block_types), + up_block_types=tuple(up_block_types), + block_out_channels=tuple(block_out_channels), + layers_per_block=unet_params.num_res_blocks, + cross_attention_dim=unet_params.context_dim, + attention_head_dim=head_dim, + use_linear_projection=use_linear_projection, + ) + + return config + + +def create_vae_diffusers_config(original_config, image_size: int): + """ + Creates a config for the diffusers based on the config of the LDM model. + """ + vae_params = original_config.model.params.first_stage_config.params.ddconfig + _ = original_config.model.params.first_stage_config.params.embed_dim + + block_out_channels = [vae_params.ch * mult for mult in vae_params.ch_mult] + down_block_types = ["DownEncoderBlock2D"] * len(block_out_channels) + up_block_types = ["UpDecoderBlock2D"] * len(block_out_channels) + + config = dict( + sample_size=image_size, + in_channels=vae_params.in_channels, + out_channels=vae_params.out_ch, + down_block_types=tuple(down_block_types), + up_block_types=tuple(up_block_types), + block_out_channels=tuple(block_out_channels), + latent_channels=vae_params.z_channels, + layers_per_block=vae_params.num_res_blocks, + ) + return config + + +def create_diffusers_schedular(original_config): + schedular = DDIMScheduler( + num_train_timesteps=original_config.model.params.timesteps, + beta_start=original_config.model.params.linear_start, + beta_end=original_config.model.params.linear_end, + beta_schedule="scaled_linear", + ) + return schedular + + +def create_ldm_bert_config(original_config): + bert_params = original_config.model.params.cond_stage_config.params + config = LDMBertConfig( + d_model=bert_params.n_embed, + encoder_layers=bert_params.n_layer, + encoder_ffn_dim=bert_params.n_embed * 4, + ) + return config + + +def convert_ldm_unet_checkpoint(checkpoint, config, path=None, extract_ema=False): + """ + Takes a state dict and a config, and returns a converted checkpoint. + """ + + # extract state_dict for UNet + unet_state_dict = {} + keys = list(checkpoint.keys()) + + unet_key = "model.diffusion_model." + # at least a 100 parameters have to start with `model_ema` in order for the checkpoint to be EMA + if sum(k.startswith("model_ema") for k in keys) > 100: + print(f"Checkpoint {path} has both EMA and non-EMA weights.") + if extract_ema: + print( + "In this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA" + " weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag." + ) + for key in keys: + if key.startswith("model.diffusion_model"): + flat_ema_key = "model_ema." + "".join(key.split(".")[1:]) + unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(flat_ema_key) + else: + print( + "In this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA" + " weights (usually better for inference), please make sure to add the `--extract_ema` flag." + ) + + for key in keys: + if key.startswith(unet_key): + unet_state_dict[key.replace(unet_key, "")] = checkpoint.pop(key) + + new_checkpoint = {} + + new_checkpoint["time_embedding.linear_1.weight"] = unet_state_dict["time_embed.0.weight"] + new_checkpoint["time_embedding.linear_1.bias"] = unet_state_dict["time_embed.0.bias"] + new_checkpoint["time_embedding.linear_2.weight"] = unet_state_dict["time_embed.2.weight"] + new_checkpoint["time_embedding.linear_2.bias"] = unet_state_dict["time_embed.2.bias"] + + new_checkpoint["conv_in.weight"] = unet_state_dict["input_blocks.0.0.weight"] + new_checkpoint["conv_in.bias"] = unet_state_dict["input_blocks.0.0.bias"] + + new_checkpoint["conv_norm_out.weight"] = unet_state_dict["out.0.weight"] + new_checkpoint["conv_norm_out.bias"] = unet_state_dict["out.0.bias"] + new_checkpoint["conv_out.weight"] = unet_state_dict["out.2.weight"] + new_checkpoint["conv_out.bias"] = unet_state_dict["out.2.bias"] + + # Retrieves the keys for the input blocks only + num_input_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "input_blocks" in layer}) + input_blocks = { + layer_id: [key for key in unet_state_dict if f"input_blocks.{layer_id}" in key] + for layer_id in range(num_input_blocks) + } + + # Retrieves the keys for the middle blocks only + num_middle_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "middle_block" in layer}) + middle_blocks = { + layer_id: [key for key in unet_state_dict if f"middle_block.{layer_id}" in key] + for layer_id in range(num_middle_blocks) + } + + # Retrieves the keys for the output blocks only + num_output_blocks = len({".".join(layer.split(".")[:2]) for layer in unet_state_dict if "output_blocks" in layer}) + output_blocks = { + layer_id: [key for key in unet_state_dict if f"output_blocks.{layer_id}" in key] + for layer_id in range(num_output_blocks) + } + + for i in range(1, num_input_blocks): + block_id = (i - 1) // (config["layers_per_block"] + 1) + layer_in_block_id = (i - 1) % (config["layers_per_block"] + 1) + + resnets = [ + key for key in input_blocks[i] if f"input_blocks.{i}.0" in key and f"input_blocks.{i}.0.op" not in key + ] + attentions = [key for key in input_blocks[i] if f"input_blocks.{i}.1" in key] + + if f"input_blocks.{i}.0.op.weight" in unet_state_dict: + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.weight"] = unet_state_dict.pop( + f"input_blocks.{i}.0.op.weight" + ) + new_checkpoint[f"down_blocks.{block_id}.downsamplers.0.conv.bias"] = unet_state_dict.pop( + f"input_blocks.{i}.0.op.bias" + ) + + paths = renew_resnet_paths(resnets) + meta_path = {"old": f"input_blocks.{i}.0", "new": f"down_blocks.{block_id}.resnets.{layer_in_block_id}"} + assign_to_checkpoint( + paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config + ) + + if len(attentions): + paths = renew_attention_paths(attentions) + meta_path = {"old": f"input_blocks.{i}.1", "new": f"down_blocks.{block_id}.attentions.{layer_in_block_id}"} + assign_to_checkpoint( + paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config + ) + + resnet_0 = middle_blocks[0] + attentions = middle_blocks[1] + resnet_1 = middle_blocks[2] + + resnet_0_paths = renew_resnet_paths(resnet_0) + assign_to_checkpoint(resnet_0_paths, new_checkpoint, unet_state_dict, config=config) + + resnet_1_paths = renew_resnet_paths(resnet_1) + assign_to_checkpoint(resnet_1_paths, new_checkpoint, unet_state_dict, config=config) + + attentions_paths = renew_attention_paths(attentions) + meta_path = {"old": "middle_block.1", "new": "mid_block.attentions.0"} + assign_to_checkpoint( + attentions_paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config + ) + + for i in range(num_output_blocks): + block_id = i // (config["layers_per_block"] + 1) + layer_in_block_id = i % (config["layers_per_block"] + 1) + output_block_layers = [shave_segments(name, 2) for name in output_blocks[i]] + output_block_list = {} + + for layer in output_block_layers: + layer_id, layer_name = layer.split(".")[0], shave_segments(layer, 1) + if layer_id in output_block_list: + output_block_list[layer_id].append(layer_name) + else: + output_block_list[layer_id] = [layer_name] + + if len(output_block_list) > 1: + resnets = [key for key in output_blocks[i] if f"output_blocks.{i}.0" in key] + attentions = [key for key in output_blocks[i] if f"output_blocks.{i}.1" in key] + + resnet_0_paths = renew_resnet_paths(resnets) + paths = renew_resnet_paths(resnets) + + meta_path = {"old": f"output_blocks.{i}.0", "new": f"up_blocks.{block_id}.resnets.{layer_in_block_id}"} + assign_to_checkpoint( + paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config + ) + + if ["conv.weight", "conv.bias"] in output_block_list.values(): + index = list(output_block_list.values()).index(["conv.weight", "conv.bias"]) + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.weight"] = unet_state_dict[ + f"output_blocks.{i}.{index}.conv.weight" + ] + new_checkpoint[f"up_blocks.{block_id}.upsamplers.0.conv.bias"] = unet_state_dict[ + f"output_blocks.{i}.{index}.conv.bias" + ] + + # Clear attentions as they have been attributed above. + if len(attentions) == 2: + attentions = [] + + if len(attentions): + paths = renew_attention_paths(attentions) + meta_path = { + "old": f"output_blocks.{i}.1", + "new": f"up_blocks.{block_id}.attentions.{layer_in_block_id}", + } + assign_to_checkpoint( + paths, new_checkpoint, unet_state_dict, additional_replacements=[meta_path], config=config + ) + else: + resnet_0_paths = renew_resnet_paths(output_block_layers, n_shave_prefix_segments=1) + for path in resnet_0_paths: + old_path = ".".join(["output_blocks", str(i), path["old"]]) + new_path = ".".join(["up_blocks", str(block_id), "resnets", str(layer_in_block_id), path["new"]]) + + new_checkpoint[new_path] = unet_state_dict[old_path] + + return new_checkpoint + + +def convert_ldm_vae_checkpoint(checkpoint, config): + # extract state dict for VAE + vae_state_dict = {} + vae_key = "first_stage_model." + keys = list(checkpoint.keys()) + for key in keys: + if key.startswith(vae_key): + vae_state_dict[key.replace(vae_key, "")] = checkpoint.get(key) + + new_checkpoint = {} + + new_checkpoint["encoder.conv_in.weight"] = vae_state_dict["encoder.conv_in.weight"] + new_checkpoint["encoder.conv_in.bias"] = vae_state_dict["encoder.conv_in.bias"] + new_checkpoint["encoder.conv_out.weight"] = vae_state_dict["encoder.conv_out.weight"] + new_checkpoint["encoder.conv_out.bias"] = vae_state_dict["encoder.conv_out.bias"] + new_checkpoint["encoder.conv_norm_out.weight"] = vae_state_dict["encoder.norm_out.weight"] + new_checkpoint["encoder.conv_norm_out.bias"] = vae_state_dict["encoder.norm_out.bias"] + + new_checkpoint["decoder.conv_in.weight"] = vae_state_dict["decoder.conv_in.weight"] + new_checkpoint["decoder.conv_in.bias"] = vae_state_dict["decoder.conv_in.bias"] + new_checkpoint["decoder.conv_out.weight"] = vae_state_dict["decoder.conv_out.weight"] + new_checkpoint["decoder.conv_out.bias"] = vae_state_dict["decoder.conv_out.bias"] + new_checkpoint["decoder.conv_norm_out.weight"] = vae_state_dict["decoder.norm_out.weight"] + new_checkpoint["decoder.conv_norm_out.bias"] = vae_state_dict["decoder.norm_out.bias"] + + new_checkpoint["quant_conv.weight"] = vae_state_dict["quant_conv.weight"] + new_checkpoint["quant_conv.bias"] = vae_state_dict["quant_conv.bias"] + new_checkpoint["post_quant_conv.weight"] = vae_state_dict["post_quant_conv.weight"] + new_checkpoint["post_quant_conv.bias"] = vae_state_dict["post_quant_conv.bias"] + + # Retrieves the keys for the encoder down blocks only + num_down_blocks = len({".".join(layer.split(".")[:3]) for layer in vae_state_dict if "encoder.down" in layer}) + down_blocks = { + layer_id: [key for key in vae_state_dict if f"down.{layer_id}" in key] for layer_id in range(num_down_blocks) + } + + # Retrieves the keys for the decoder up blocks only + num_up_blocks = len({".".join(layer.split(".")[:3]) for layer in vae_state_dict if "decoder.up" in layer}) + up_blocks = { + layer_id: [key for key in vae_state_dict if f"up.{layer_id}" in key] for layer_id in range(num_up_blocks) + } + + for i in range(num_down_blocks): + resnets = [key for key in down_blocks[i] if f"down.{i}" in key and f"down.{i}.downsample" not in key] + + if f"encoder.down.{i}.downsample.conv.weight" in vae_state_dict: + new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.weight"] = vae_state_dict.pop( + f"encoder.down.{i}.downsample.conv.weight" + ) + new_checkpoint[f"encoder.down_blocks.{i}.downsamplers.0.conv.bias"] = vae_state_dict.pop( + f"encoder.down.{i}.downsample.conv.bias" + ) + + paths = renew_vae_resnet_paths(resnets) + meta_path = {"old": f"down.{i}.block", "new": f"down_blocks.{i}.resnets"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + + mid_resnets = [key for key in vae_state_dict if "encoder.mid.block" in key] + num_mid_res_blocks = 2 + for i in range(1, num_mid_res_blocks + 1): + resnets = [key for key in mid_resnets if f"encoder.mid.block_{i}" in key] + + paths = renew_vae_resnet_paths(resnets) + meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + + mid_attentions = [key for key in vae_state_dict if "encoder.mid.attn" in key] + paths = renew_vae_attention_paths(mid_attentions) + meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + conv_attn_to_linear(new_checkpoint) + + for i in range(num_up_blocks): + block_id = num_up_blocks - 1 - i + resnets = [ + key for key in up_blocks[block_id] if f"up.{block_id}" in key and f"up.{block_id}.upsample" not in key + ] + + if f"decoder.up.{block_id}.upsample.conv.weight" in vae_state_dict: + new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.weight"] = vae_state_dict[ + f"decoder.up.{block_id}.upsample.conv.weight" + ] + new_checkpoint[f"decoder.up_blocks.{i}.upsamplers.0.conv.bias"] = vae_state_dict[ + f"decoder.up.{block_id}.upsample.conv.bias" + ] + + paths = renew_vae_resnet_paths(resnets) + meta_path = {"old": f"up.{block_id}.block", "new": f"up_blocks.{i}.resnets"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + + mid_resnets = [key for key in vae_state_dict if "decoder.mid.block" in key] + num_mid_res_blocks = 2 + for i in range(1, num_mid_res_blocks + 1): + resnets = [key for key in mid_resnets if f"decoder.mid.block_{i}" in key] + + paths = renew_vae_resnet_paths(resnets) + meta_path = {"old": f"mid.block_{i}", "new": f"mid_block.resnets.{i - 1}"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + + mid_attentions = [key for key in vae_state_dict if "decoder.mid.attn" in key] + paths = renew_vae_attention_paths(mid_attentions) + meta_path = {"old": "mid.attn_1", "new": "mid_block.attentions.0"} + assign_to_checkpoint(paths, new_checkpoint, vae_state_dict, additional_replacements=[meta_path], config=config) + conv_attn_to_linear(new_checkpoint) + return new_checkpoint + + +def convert_ldm_bert_checkpoint(checkpoint, config): + def _copy_attn_layer(hf_attn_layer, pt_attn_layer): + hf_attn_layer.q_proj.weight.data = pt_attn_layer.to_q.weight + hf_attn_layer.k_proj.weight.data = pt_attn_layer.to_k.weight + hf_attn_layer.v_proj.weight.data = pt_attn_layer.to_v.weight + + hf_attn_layer.out_proj.weight = pt_attn_layer.to_out.weight + hf_attn_layer.out_proj.bias = pt_attn_layer.to_out.bias + + def _copy_linear(hf_linear, pt_linear): + hf_linear.weight = pt_linear.weight + hf_linear.bias = pt_linear.bias + + def _copy_layer(hf_layer, pt_layer): + # copy layer norms + _copy_linear(hf_layer.self_attn_layer_norm, pt_layer[0][0]) + _copy_linear(hf_layer.final_layer_norm, pt_layer[1][0]) + + # copy attn + _copy_attn_layer(hf_layer.self_attn, pt_layer[0][1]) + + # copy MLP + pt_mlp = pt_layer[1][1] + _copy_linear(hf_layer.fc1, pt_mlp.net[0][0]) + _copy_linear(hf_layer.fc2, pt_mlp.net[2]) + + def _copy_layers(hf_layers, pt_layers): + for i, hf_layer in enumerate(hf_layers): + if i != 0: + i += i + pt_layer = pt_layers[i : i + 2] + _copy_layer(hf_layer, pt_layer) + + hf_model = LDMBertModel(config).eval() + + # copy embeds + hf_model.model.embed_tokens.weight = checkpoint.transformer.token_emb.weight + hf_model.model.embed_positions.weight.data = checkpoint.transformer.pos_emb.emb.weight + + # copy layer norm + _copy_linear(hf_model.model.layer_norm, checkpoint.transformer.norm) + + # copy hidden layers + _copy_layers(hf_model.model.layers, checkpoint.transformer.attn_layers.layers) + + _copy_linear(hf_model.to_logits, checkpoint.transformer.to_logits) + + return hf_model + + +def convert_ldm_clip_checkpoint(checkpoint): + text_model = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14") + + keys = list(checkpoint.keys()) + + text_model_dict = {} + + for key in keys: + if key.startswith("cond_stage_model.transformer"): + text_model_dict[key[len("cond_stage_model.transformer.") :]] = checkpoint[key] + + text_model.load_state_dict(text_model_dict) + + return text_model + + +textenc_conversion_lst = [ + ("cond_stage_model.model.positional_embedding", "text_model.embeddings.position_embedding.weight"), + ("cond_stage_model.model.token_embedding.weight", "text_model.embeddings.token_embedding.weight"), + ("cond_stage_model.model.ln_final.weight", "text_model.final_layer_norm.weight"), + ("cond_stage_model.model.ln_final.bias", "text_model.final_layer_norm.bias"), +] +textenc_conversion_map = {x[0]: x[1] for x in textenc_conversion_lst} + +textenc_transformer_conversion_lst = [ + # (stable-diffusion, HF Diffusers) + ("resblocks.", "text_model.encoder.layers."), + ("ln_1", "layer_norm1"), + ("ln_2", "layer_norm2"), + (".c_fc.", ".fc1."), + (".c_proj.", ".fc2."), + (".attn", ".self_attn"), + ("ln_final.", "transformer.text_model.final_layer_norm."), + ("token_embedding.weight", "transformer.text_model.embeddings.token_embedding.weight"), + ("positional_embedding", "transformer.text_model.embeddings.position_embedding.weight"), +] +protected = {re.escape(x[0]): x[1] for x in textenc_transformer_conversion_lst} +textenc_pattern = re.compile("|".join(protected.keys())) + + +def convert_paint_by_example_checkpoint(checkpoint): + config = CLIPVisionConfig.from_pretrained("openai/clip-vit-large-patch14") + model = PaintByExampleImageEncoder(config) + + keys = list(checkpoint.keys()) + + text_model_dict = {} + + for key in keys: + if key.startswith("cond_stage_model.transformer"): + text_model_dict[key[len("cond_stage_model.transformer.") :]] = checkpoint[key] + + # load clip vision + model.model.load_state_dict(text_model_dict) + + # load mapper + keys_mapper = { + k[len("cond_stage_model.mapper.res") :]: v + for k, v in checkpoint.items() + if k.startswith("cond_stage_model.mapper") + } + + MAPPING = { + "attn.c_qkv": ["attn1.to_q", "attn1.to_k", "attn1.to_v"], + "attn.c_proj": ["attn1.to_out.0"], + "ln_1": ["norm1"], + "ln_2": ["norm3"], + "mlp.c_fc": ["ff.net.0.proj"], + "mlp.c_proj": ["ff.net.2"], + } + + mapped_weights = {} + for key, value in keys_mapper.items(): + prefix = key[: len("blocks.i")] + suffix = key.split(prefix)[-1].split(".")[-1] + name = key.split(prefix)[-1].split(suffix)[0][1:-1] + mapped_names = MAPPING[name] + + num_splits = len(mapped_names) + for i, mapped_name in enumerate(mapped_names): + new_name = ".".join([prefix, mapped_name, suffix]) + shape = value.shape[0] // num_splits + mapped_weights[new_name] = value[i * shape : (i + 1) * shape] + + model.mapper.load_state_dict(mapped_weights) + + # load final layer norm + model.final_layer_norm.load_state_dict( + { + "bias": checkpoint["cond_stage_model.final_ln.bias"], + "weight": checkpoint["cond_stage_model.final_ln.weight"], + } + ) + + # load final proj + model.proj_out.load_state_dict( + { + "bias": checkpoint["proj_out.bias"], + "weight": checkpoint["proj_out.weight"], + } + ) + + # load uncond vector + model.uncond_vector.data = torch.nn.Parameter(checkpoint["learnable_vector"]) + return model + + +def convert_open_clip_checkpoint(checkpoint): + text_model = CLIPTextModel.from_pretrained("stabilityai/stable-diffusion-2", subfolder="text_encoder") + + keys = list(checkpoint.keys()) + + text_model_dict = {} + + d_model = int(checkpoint["cond_stage_model.model.text_projection"].shape[0]) + + text_model_dict["text_model.embeddings.position_ids"] = text_model.text_model.embeddings.get_buffer("position_ids") + + for key in keys: + if "resblocks.23" in key: # Diffusers drops the final layer and only uses the penultimate layer + continue + if key in textenc_conversion_map: + text_model_dict[textenc_conversion_map[key]] = checkpoint[key] + if key.startswith("cond_stage_model.model.transformer."): + new_key = key[len("cond_stage_model.model.transformer.") :] + if new_key.endswith(".in_proj_weight"): + new_key = new_key[: -len(".in_proj_weight")] + new_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], new_key) + text_model_dict[new_key + ".q_proj.weight"] = checkpoint[key][:d_model, :] + text_model_dict[new_key + ".k_proj.weight"] = checkpoint[key][d_model : d_model * 2, :] + text_model_dict[new_key + ".v_proj.weight"] = checkpoint[key][d_model * 2 :, :] + elif new_key.endswith(".in_proj_bias"): + new_key = new_key[: -len(".in_proj_bias")] + new_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], new_key) + text_model_dict[new_key + ".q_proj.bias"] = checkpoint[key][:d_model] + text_model_dict[new_key + ".k_proj.bias"] = checkpoint[key][d_model : d_model * 2] + text_model_dict[new_key + ".v_proj.bias"] = checkpoint[key][d_model * 2 :] + else: + new_key = textenc_pattern.sub(lambda m: protected[re.escape(m.group(0))], new_key) + + text_model_dict[new_key] = checkpoint[key] + + text_model.load_state_dict(text_model_dict) + + return text_model + +def convert_ckpt_to_diffuser(checkpoint_path:str, + dump_path:str, + original_config_file:str=None, + num_in_channels:int=None, + scheduler_type:str='pndm', + pipeline_type:str=None, + image_size:int=None, + prediction_type:str=None, + extract_ema:bool=False, + upcast_attn:bool=False, + ): + + checkpoint = torch.load(checkpoint_path) + + # Sometimes models don't have the global_step item + if "global_step" in checkpoint: + global_step = checkpoint["global_step"] + else: + print("global_step key not found in model") + global_step = None + checkpoint = checkpoint["state_dict"] + + upcast_attention = False + if original_config_file is None: + key_name = "model.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weight" + + if key_name in checkpoint and checkpoint[key_name].shape[-1] == 1024: + original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v2-inference-v.yaml') + + if global_step == 110000: + # v2.1 needs to upcast attention + upcast_attention = True + else: + original_config_file = os.path.join(Globals.root,'configs','stable-diffusion','v1-inference.yaml') + + original_config = OmegaConf.load(original_config_file) + + if num_in_channels is not None: + original_config["model"]["params"]["unet_config"]["params"]["in_channels"] = num_in_channels + + if ( + "parameterization" in original_config["model"]["params"] + and original_config["model"]["params"]["parameterization"] == "v" + ): + if prediction_type is None: + # NOTE: For stable diffusion 2 base it is recommended to pass `prediction_type=="epsilon"` + # as it relies on a brittle global step parameter here + prediction_type = "epsilon" if global_step == 875000 else "v_prediction" + if image_size is None: + # NOTE: For stable diffusion 2 base one has to pass `image_size==512` + # as it relies on a brittle global step parameter here + image_size = 512 if global_step == 875000 else 768 + else: + if prediction_type is None: + prediction_type = "epsilon" + if image_size is None: + image_size = 512 + + num_train_timesteps = original_config.model.params.timesteps + beta_start = original_config.model.params.linear_start + beta_end = original_config.model.params.linear_end + + scheduler = DDIMScheduler( + beta_end=beta_end, + beta_schedule="scaled_linear", + beta_start=beta_start, + num_train_timesteps=num_train_timesteps, + steps_offset=1, + clip_sample=False, + set_alpha_to_one=False, + prediction_type=prediction_type, + ) + # make sure scheduler works correctly with DDIM + scheduler.register_to_config(clip_sample=False) + + if scheduler_type == "pndm": + config = dict(scheduler.config) + config["skip_prk_steps"] = True + scheduler = PNDMScheduler.from_config(config) + elif scheduler_type == "lms": + scheduler = LMSDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "heun": + scheduler = HeunDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "euler": + scheduler = EulerDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "euler-ancestral": + scheduler = EulerAncestralDiscreteScheduler.from_config(scheduler.config) + elif scheduler_type == "dpm": + scheduler = DPMSolverMultistepScheduler.from_config(scheduler.config) + elif scheduler_type == "ddim": + scheduler = scheduler + else: + raise ValueError(f"Scheduler of type {scheduler_type} doesn't exist!") + + # Convert the UNet2DConditionModel model. + unet_config = create_unet_diffusers_config(original_config, image_size=image_size) + unet_config["upcast_attention"] = upcast_attention + unet = UNet2DConditionModel(**unet_config) + + converted_unet_checkpoint = convert_ldm_unet_checkpoint( + checkpoint, unet_config, path=checkpoint_path, extract_ema=extract_ema + ) + + unet.load_state_dict(converted_unet_checkpoint) + + # Convert the VAE model. + vae_config = create_vae_diffusers_config(original_config, image_size=image_size) + converted_vae_checkpoint = convert_ldm_vae_checkpoint(checkpoint, vae_config) + + vae = AutoencoderKL(**vae_config) + vae.load_state_dict(converted_vae_checkpoint) + + # Convert the text model. + model_type = pipeline_type + if model_type is None: + model_type = original_config.model.params.cond_stage_config.target.split(".")[-1] + + if model_type == "FrozenOpenCLIPEmbedder": + text_model = convert_open_clip_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("stabilityai/stable-diffusion-2", subfolder="tokenizer") + pipe = StableDiffusionPipeline( + vae=vae, + text_encoder=text_model, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=None, + feature_extractor=None, + requires_safety_checker=False, + ) + elif model_type == "PaintByExample": + vision_model = convert_paint_by_example_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker") + pipe = PaintByExamplePipeline( + vae=vae, + image_encoder=vision_model, + unet=unet, + scheduler=scheduler, + safety_checker=None, + feature_extractor=feature_extractor, + ) + elif model_type in ['FrozenCLIPEmbedder','WeightedFrozenCLIPEmbedder']: + text_model = convert_ldm_clip_checkpoint(checkpoint) + tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14") + safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker") + feature_extractor = AutoFeatureExtractor.from_pretrained("CompVis/stable-diffusion-safety-checker") + pipe = StableDiffusionPipeline( + vae=vae, + text_encoder=text_model, + tokenizer=tokenizer, + unet=unet, + scheduler=scheduler, + safety_checker=safety_checker, + feature_extractor=feature_extractor, + ) + else: + text_config = create_ldm_bert_config(original_config) + text_model = convert_ldm_bert_checkpoint(checkpoint, text_config) + tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") + pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler) + + pipe.save_pretrained(dump_path) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index a67da0ef5e5..bdeb85082d3 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -180,7 +180,7 @@ def del_model(self, model_name:str) -> None: if model_name in self.stack: self.stack.remove(model_name) - def add_model(self, model_name:str, model_attributes:dict, clobber=False) -> None: + def add_model(self, model_name:str, model_attributes:dict, clobber:bool=False) -> None: ''' Update the named model with a dictionary of attributes. Will fail with an assertion error if the name already exists. Pass clobber=True to overwrite. @@ -189,8 +189,14 @@ def add_model(self, model_name:str, model_attributes:dict, clobber=False) -> Non attributes are incorrect or the model name is missing. ''' omega = self.config - for field in ('description','weights','height','width','config'): - assert field in model_attributes, f'required field {field} is missing' + assert 'format' in model_attributes, f'missing required field "format"' + if model_attributes['format']=='diffusers': + assert 'description' in model_attributes, 'required field "description" is missing' + assert 'path' in model_attributes or 'repo_id' in model_attributes,'model must have either the "path" or "repo_id" fields defined' + else: + for field in ('description','weights','height','width','config'): + assert field in model_attributes, f'required field {field} is missing' + assert (clobber or model_name not in omega), f'attempt to overwrite existing model definition "{model_name}"' config = omega[model_name] if model_name in omega else {} @@ -368,6 +374,8 @@ def _load_diffusers_model(self, mconfig): width = pipeline.vae.block_out_channels[-2] height = pipeline.vae.block_out_channels[-1] + print(f' | training width x height = ({width} x {height})') + return pipeline, width, height, model_hash def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path: @@ -569,7 +577,7 @@ def _load_vae(self, vae_config): vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args, **fp_args) except OSError as e: if str(e).startswith('fp16 is not a valid'): - print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') + print(f'Could not fetch half-precision version of model {name_or_path}; fetching full-precision instead') else: print(f'An unexpected error occurred while downloading the model: {e})') if vae: diff --git a/ldm/invoke/readline.py b/ldm/invoke/readline.py index 376e0092960..d54d96409b3 100644 --- a/ldm/invoke/readline.py +++ b/ldm/invoke/readline.py @@ -59,7 +59,7 @@ '--png_compression','-z', '--text_mask','-tm', '!fix','!fetch','!replay','!history','!search','!clear', - '!models','!switch','!import_model','!edit_model','!del_model', + '!models','!switch','!import_model','!optimize_model','!edit_model','!del_model', '!mask', ) MODEL_COMMANDS = ( @@ -68,7 +68,7 @@ '!del_model', ) WEIGHT_COMMANDS = ( - '!import_model', + '!import_model','!optimize_model', ) IMG_PATH_COMMANDS = ( '--outdir[=\s]', From b71b789186c63c45f8f24bbe7b2527b39e3a0e31 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 22 Dec 2022 15:48:30 -0500 Subject: [PATCH 085/199] add parallel set of generator files for ckpt legacy generation --- ldm/generate.py | 66 ++- ldm/invoke/ckpt_generator/__init__.py | 4 + ldm/invoke/ckpt_generator/base.py | 338 +++++++++++++++ ldm/invoke/ckpt_generator/embiggen.py | 501 +++++++++++++++++++++++ ldm/invoke/ckpt_generator/img2img.py | 97 +++++ ldm/invoke/ckpt_generator/inpaint.py | 358 ++++++++++++++++ ldm/invoke/ckpt_generator/omnibus.py | 175 ++++++++ ldm/invoke/ckpt_generator/txt2img.py | 88 ++++ ldm/invoke/ckpt_generator/txt2img2img.py | 182 ++++++++ ldm/invoke/model_cache.py | 15 + 10 files changed, 1783 insertions(+), 41 deletions(-) create mode 100644 ldm/invoke/ckpt_generator/__init__.py create mode 100644 ldm/invoke/ckpt_generator/base.py create mode 100644 ldm/invoke/ckpt_generator/embiggen.py create mode 100644 ldm/invoke/ckpt_generator/img2img.py create mode 100644 ldm/invoke/ckpt_generator/inpaint.py create mode 100644 ldm/invoke/ckpt_generator/omnibus.py create mode 100644 ldm/invoke/ckpt_generator/txt2img.py create mode 100644 ldm/invoke/ckpt_generator/txt2img2img.py diff --git a/ldm/generate.py b/ldm/generate.py index cea0ee05c33..5f246ee84fa 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -10,6 +10,7 @@ import sys import time import traceback +import importlib import cv2 import numpy as np @@ -179,7 +180,6 @@ def __init__( self.sampler = None self.device = None self.session_peakmem = None - self.generators = {} self.base_generator = None self.seed = None self.outdir = outdir @@ -772,55 +772,36 @@ def _make_images( return init_image,init_mask - # lots o' repeated code here! Turn into a make_func() def _make_base(self): - if not self.generators.get('base'): - from ldm.invoke.generator import Generator - self.generators['base'] = Generator(self.model, self.precision) - return self.generators['base'] + return self._load_generator('generator','Generator') + + def _make_txt2img(self): + return self._load_generator('txt2img','Txt2Img') def _make_img2img(self): - if not self.generators.get('img2img'): - from ldm.invoke.generator.img2img import Img2Img - self.generators['img2img'] = Img2Img(self.model, self.precision) - self.generators['img2img'].free_gpu_mem = self.free_gpu_mem - return self.generators['img2img'] + return self._load_generator('img2img','Img2Img') def _make_embiggen(self): - if not self.generators.get('embiggen'): - from ldm.invoke.generator.embiggen import Embiggen - self.generators['embiggen'] = Embiggen(self.model, self.precision) - return self.generators['embiggen'] - - def _make_txt2img(self): - if not self.generators.get('txt2img'): - from ldm.invoke.generator.txt2img import Txt2Img - self.generators['txt2img'] = Txt2Img(self.model, self.precision) - self.generators['txt2img'].free_gpu_mem = self.free_gpu_mem - return self.generators['txt2img'] - + return self._load_generator('embiggen','Embiggen') + def _make_txt2img2img(self): - if not self.generators.get('txt2img2'): - from ldm.invoke.generator.txt2img2img import Txt2Img2Img - self.generators['txt2img2'] = Txt2Img2Img(self.model, self.precision) - self.generators['txt2img2'].free_gpu_mem = self.free_gpu_mem - return self.generators['txt2img2'] + return self._load_generator('txt2img2img','Txt2Img2Img') def _make_inpaint(self): - if not self.generators.get('inpaint'): - from ldm.invoke.generator.inpaint import Inpaint - self.generators['inpaint'] = Inpaint(self.model, self.precision) - self.generators['inpaint'].free_gpu_mem = self.free_gpu_mem - return self.generators['inpaint'] - - # "omnibus" supports the runwayML custom inpainting model, which does - # txt2img, img2img and inpainting using slight variations on the same code + return self._load_generator('inpaint','Inpaint') + def _make_omnibus(self): - if not self.generators.get('omnibus'): - from ldm.invoke.generator.omnibus import Omnibus - self.generators['omnibus'] = Omnibus(self.model, self.precision) - self.generators['omnibus'].free_gpu_mem = self.free_gpu_mem - return self.generators['omnibus'] + return self._load_generator('omnibus','Omnibus') + + def _load_generator(self, module, class_name): + if self.is_legacy_model(self.model_name): + mn = f'ldm.invoke.ckpt_{module}' + cn = f'Ckpt{class_name}' + else: + mn = f'ldm.invoke.{module}' + cn = class_name + importlib.import_module(mn) + return cn(self.model, self.precision) def load_model(self): ''' @@ -981,6 +962,9 @@ def sample_to_image(self, samples): def sample_to_lowres_estimated_image(self, samples): return self._make_base().sample_to_lowres_estimated_image(samples) + def is_legacy_model(model_name)->bool: + return self.model_cache.is_legacy(model_name) + def _set_sampler(self): if isinstance(self.model, DiffusionPipeline): return self._set_scheduler() diff --git a/ldm/invoke/ckpt_generator/__init__.py b/ldm/invoke/ckpt_generator/__init__.py new file mode 100644 index 00000000000..2fa5573c84d --- /dev/null +++ b/ldm/invoke/ckpt_generator/__init__.py @@ -0,0 +1,4 @@ +''' +Initialization file for the ldm.invoke.generator package +''' +from .base import Generator diff --git a/ldm/invoke/ckpt_generator/base.py b/ldm/invoke/ckpt_generator/base.py new file mode 100644 index 00000000000..c73bb50c9bf --- /dev/null +++ b/ldm/invoke/ckpt_generator/base.py @@ -0,0 +1,338 @@ +''' +Base class for ldm.invoke.ckpt_generator.* +including img2img, txt2img, and inpaint + +THESE MODULES ARE TRANSITIONAL AND WILL BE REMOVED AT A FUTURE DATE +WHEN LEGACY CKPT MODEL SUPPORT IS DISCONTINUED. +''' +import torch +import numpy as np +import random +import os +import os.path as osp +import traceback +from tqdm import tqdm, trange +from PIL import Image, ImageFilter, ImageChops +import cv2 as cv +from einops import rearrange, repeat +from pytorch_lightning import seed_everything +from ldm.invoke.devices import choose_autocast +from ldm.models.diffusion.cross_attention_map_saving import AttentionMapSaver +from ldm.util import rand_perlin_2d + +downsampling = 8 +CAUTION_IMG = 'assets/caution.png' + +class CkptGenerator(): + def __init__(self, model, precision): + self.model = model + self.precision = precision + self.seed = None + self.latent_channels = model.channels + self.downsampling_factor = downsampling # BUG: should come from model or config + self.safety_checker = None + self.perlin = 0.0 + self.threshold = 0 + self.variation_amount = 0 + self.with_variations = [] + self.use_mps_noise = False + self.free_gpu_mem = None + self.caution_img = None + + # this is going to be overridden in img2img.py, txt2img.py and inpaint.py + def get_make_image(self,prompt,**kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it + """ + raise NotImplementedError("image_iterator() must be implemented in a descendent class") + + def set_variation(self, seed, variation_amount, with_variations): + self.seed = seed + self.variation_amount = variation_amount + self.with_variations = with_variations + + def generate(self,prompt,init_image,width,height,sampler, iterations=1,seed=None, + image_callback=None, step_callback=None, threshold=0.0, perlin=0.0, + safety_checker:dict=None, + attention_maps_callback = None, + **kwargs): + scope = choose_autocast(self.precision) + self.safety_checker = safety_checker + attention_maps_images = [] + attention_maps_callback = lambda saver: attention_maps_images.append(saver.get_stacked_maps_image()) + make_image = self.get_make_image( + prompt, + sampler = sampler, + init_image = init_image, + width = width, + height = height, + step_callback = step_callback, + threshold = threshold, + perlin = perlin, + attention_maps_callback = attention_maps_callback, + **kwargs + ) + results = [] + seed = seed if seed is not None and seed >= 0 else self.new_seed() + first_seed = seed + seed, initial_noise = self.generate_initial_noise(seed, width, height) + + # There used to be an additional self.model.ema_scope() here, but it breaks + # the inpaint-1.5 model. Not sure what it did.... ? + with scope(self.model.device.type): + for n in trange(iterations, desc='Generating'): + x_T = None + if self.variation_amount > 0: + seed_everything(seed) + target_noise = self.get_noise(width,height) + x_T = self.slerp(self.variation_amount, initial_noise, target_noise) + elif initial_noise is not None: + # i.e. we specified particular variations + x_T = initial_noise + else: + seed_everything(seed) + try: + x_T = self.get_noise(width,height) + except: + print('** An error occurred while getting initial noise **') + print(traceback.format_exc()) + + image = make_image(x_T) + + if self.safety_checker is not None: + image = self.safety_check(image) + + results.append([image, seed]) + + if image_callback is not None: + attention_maps_image = None if len(attention_maps_images)==0 else attention_maps_images[-1] + image_callback(image, seed, first_seed=first_seed, attention_maps_image=attention_maps_image) + + seed = self.new_seed() + + return results + + def sample_to_image(self,samples)->Image.Image: + """ + Given samples returned from a sampler, converts + it into a PIL Image + """ + x_samples = self.model.decode_first_stage(samples) + x_samples = torch.clamp((x_samples + 1.0) / 2.0, min=0.0, max=1.0) + if len(x_samples) != 1: + raise Exception( + f'>> expected to get a single image, but got {len(x_samples)}') + x_sample = 255.0 * rearrange( + x_samples[0].cpu().numpy(), 'c h w -> h w c' + ) + return Image.fromarray(x_sample.astype(np.uint8)) + + # write an approximate RGB image from latent samples for a single step to PNG + + def repaste_and_color_correct(self, result: Image.Image, init_image: Image.Image, init_mask: Image.Image, mask_blur_radius: int = 8) -> Image.Image: + if init_image is None or init_mask is None: + return result + + # Get the original alpha channel of the mask if there is one. + # Otherwise it is some other black/white image format ('1', 'L' or 'RGB') + pil_init_mask = init_mask.getchannel('A') if init_mask.mode == 'RGBA' else init_mask.convert('L') + pil_init_image = init_image.convert('RGBA') # Add an alpha channel if one doesn't exist + + # Build an image with only visible pixels from source to use as reference for color-matching. + init_rgb_pixels = np.asarray(init_image.convert('RGB'), dtype=np.uint8) + init_a_pixels = np.asarray(pil_init_image.getchannel('A'), dtype=np.uint8) + init_mask_pixels = np.asarray(pil_init_mask, dtype=np.uint8) + + # Get numpy version of result + np_image = np.asarray(result, dtype=np.uint8) + + # Mask and calculate mean and standard deviation + mask_pixels = init_a_pixels * init_mask_pixels > 0 + np_init_rgb_pixels_masked = init_rgb_pixels[mask_pixels, :] + np_image_masked = np_image[mask_pixels, :] + + if np_init_rgb_pixels_masked.size > 0: + init_means = np_init_rgb_pixels_masked.mean(axis=0) + init_std = np_init_rgb_pixels_masked.std(axis=0) + gen_means = np_image_masked.mean(axis=0) + gen_std = np_image_masked.std(axis=0) + + # Color correct + np_matched_result = np_image.copy() + np_matched_result[:,:,:] = (((np_matched_result[:,:,:].astype(np.float32) - gen_means[None,None,:]) / gen_std[None,None,:]) * init_std[None,None,:] + init_means[None,None,:]).clip(0, 255).astype(np.uint8) + matched_result = Image.fromarray(np_matched_result, mode='RGB') + else: + matched_result = Image.fromarray(np_image, mode='RGB') + + # Blur the mask out (into init image) by specified amount + if mask_blur_radius > 0: + nm = np.asarray(pil_init_mask, dtype=np.uint8) + nmd = cv.erode(nm, kernel=np.ones((3,3), dtype=np.uint8), iterations=int(mask_blur_radius / 2)) + pmd = Image.fromarray(nmd, mode='L') + blurred_init_mask = pmd.filter(ImageFilter.BoxBlur(mask_blur_radius)) + else: + blurred_init_mask = pil_init_mask + + multiplied_blurred_init_mask = ImageChops.multiply(blurred_init_mask, self.pil_image.split()[-1]) + + # Paste original on color-corrected generation (using blurred mask) + matched_result.paste(init_image, (0,0), mask = multiplied_blurred_init_mask) + return matched_result + + + + def sample_to_lowres_estimated_image(self,samples): + # origingally adapted from code by @erucipe and @keturn here: + # https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/7 + + # these updated numbers for v1.5 are from @torridgristle + v1_5_latent_rgb_factors = torch.tensor([ + # R G B + [ 0.3444, 0.1385, 0.0670], # L1 + [ 0.1247, 0.4027, 0.1494], # L2 + [-0.3192, 0.2513, 0.2103], # L3 + [-0.1307, -0.1874, -0.7445] # L4 + ], dtype=samples.dtype, device=samples.device) + + latent_image = samples[0].permute(1, 2, 0) @ v1_5_latent_rgb_factors + latents_ubyte = (((latent_image + 1) / 2) + .clamp(0, 1) # change scale from -1..1 to 0..1 + .mul(0xFF) # to 0..255 + .byte()).cpu() + + return Image.fromarray(latents_ubyte.numpy()) + + def generate_initial_noise(self, seed, width, height): + initial_noise = None + if self.variation_amount > 0 or len(self.with_variations) > 0: + # use fixed initial noise plus random noise per iteration + seed_everything(seed) + initial_noise = self.get_noise(width,height) + for v_seed, v_weight in self.with_variations: + seed = v_seed + seed_everything(seed) + next_noise = self.get_noise(width,height) + initial_noise = self.slerp(v_weight, initial_noise, next_noise) + if self.variation_amount > 0: + random.seed() # reset RNG to an actually random state, so we can get a random seed for variations + seed = random.randrange(0,np.iinfo(np.uint32).max) + return (seed, initial_noise) + else: + return (seed, None) + + # returns a tensor filled with random numbers from a normal distribution + def get_noise(self,width,height): + """ + Returns a tensor filled with random numbers, either form a normal distribution + (txt2img) or from the latent image (img2img, inpaint) + """ + raise NotImplementedError("get_noise() must be implemented in a descendent class") + + def get_perlin_noise(self,width,height): + fixdevice = 'cpu' if (self.model.device.type == 'mps') else self.model.device + return torch.stack([rand_perlin_2d((height, width), (8, 8), device = self.model.device).to(fixdevice) for _ in range(self.latent_channels)], dim=0).to(self.model.device) + + def new_seed(self): + self.seed = random.randrange(0, np.iinfo(np.uint32).max) + return self.seed + + def slerp(self, t, v0, v1, DOT_THRESHOLD=0.9995): + ''' + Spherical linear interpolation + Args: + t (float/np.ndarray): Float value between 0.0 and 1.0 + v0 (np.ndarray): Starting vector + v1 (np.ndarray): Final vector + DOT_THRESHOLD (float): Threshold for considering the two vectors as + colineal. Not recommended to alter this. + Returns: + v2 (np.ndarray): Interpolation vector between v0 and v1 + ''' + inputs_are_torch = False + if not isinstance(v0, np.ndarray): + inputs_are_torch = True + v0 = v0.detach().cpu().numpy() + if not isinstance(v1, np.ndarray): + inputs_are_torch = True + v1 = v1.detach().cpu().numpy() + + dot = np.sum(v0 * v1 / (np.linalg.norm(v0) * np.linalg.norm(v1))) + if np.abs(dot) > DOT_THRESHOLD: + v2 = (1 - t) * v0 + t * v1 + else: + theta_0 = np.arccos(dot) + sin_theta_0 = np.sin(theta_0) + theta_t = theta_0 * t + sin_theta_t = np.sin(theta_t) + s0 = np.sin(theta_0 - theta_t) / sin_theta_0 + s1 = sin_theta_t / sin_theta_0 + v2 = s0 * v0 + s1 * v1 + + if inputs_are_torch: + v2 = torch.from_numpy(v2).to(self.model.device) + + return v2 + + def safety_check(self,image:Image.Image): + ''' + If the CompViz safety checker flags an NSFW image, we + blur it out. + ''' + import diffusers + + checker = self.safety_checker['checker'] + extractor = self.safety_checker['extractor'] + features = extractor([image], return_tensors="pt") + features.to(self.model.device) + + # unfortunately checker requires the numpy version, so we have to convert back + x_image = np.array(image).astype(np.float32) / 255.0 + x_image = x_image[None].transpose(0, 3, 1, 2) + + diffusers.logging.set_verbosity_error() + checked_image, has_nsfw_concept = checker(images=x_image, clip_input=features.pixel_values) + if has_nsfw_concept[0]: + print('** An image with potential non-safe content has been detected. A blurred image will be returned. **') + return self.blur(image) + else: + return image + + def blur(self,input): + blurry = input.filter(filter=ImageFilter.GaussianBlur(radius=32)) + try: + caution = self.get_caution_img() + if caution: + blurry.paste(caution,(0,0),caution) + except FileNotFoundError: + pass + return blurry + + def get_caution_img(self): + path = None + if self.caution_img: + return self.caution_img + # Find the caution image. If we are installed in the package directory it will + # be six levels up. If we are in the repo directory it will be three levels up. + for dots in ('../../..','../../../../../..'): + caution_path = osp.join(osp.dirname(__file__),dots,CAUTION_IMG) + if osp.exists(caution_path): + path = caution_path + break + if not path: + return + caution = Image.open(path) + self.caution_img = caution.resize((caution.width // 2, caution.height //2)) + return self.caution_img + + # this is a handy routine for debugging use. Given a generated sample, + # convert it into a PNG image and store it at the indicated path + def save_sample(self, sample, filepath): + image = self.sample_to_image(sample) + dirname = os.path.dirname(filepath) or '.' + if not os.path.exists(dirname): + print(f'** creating directory {dirname}') + os.makedirs(dirname, exist_ok=True) + image.save(filepath,'PNG') + + diff --git a/ldm/invoke/ckpt_generator/embiggen.py b/ldm/invoke/ckpt_generator/embiggen.py new file mode 100644 index 00000000000..0b43d3d19b3 --- /dev/null +++ b/ldm/invoke/ckpt_generator/embiggen.py @@ -0,0 +1,501 @@ +''' +ldm.invoke.ckpt_generator.embiggen descends from ldm.invoke.ckpt_generator +and generates with ldm.invoke.ckpt_generator.img2img +''' + +import torch +import numpy as np +from tqdm import trange +from PIL import Image +from ldm.invoke.ckpt_generator.base import CkptGenerator +from ldm.invoke.ckpt_generator.img2img import CkptImg2Img +from ldm.invoke.devices import choose_autocast +from ldm.models.diffusion.ddim import DDIMSampler + +class CkptEmbiggen(CkptGenerator): + def __init__(self, model, precision): + super().__init__(model, precision) + self.init_latent = None + + # Replace generate because Embiggen doesn't need/use most of what it does normallly + def generate(self,prompt,iterations=1,seed=None, + image_callback=None, step_callback=None, + **kwargs): + + scope = choose_autocast(self.precision) + make_image = self.get_make_image( + prompt, + step_callback = step_callback, + **kwargs + ) + results = [] + seed = seed if seed else self.new_seed() + + # Noise will be generated by the Img2Img generator when called + with scope(self.model.device.type), self.model.ema_scope(): + for n in trange(iterations, desc='Generating'): + # make_image will call Img2Img which will do the equivalent of get_noise itself + image = make_image() + results.append([image, seed]) + if image_callback is not None: + image_callback(image, seed, prompt_in=prompt) + seed = self.new_seed() + return results + + @torch.no_grad() + def get_make_image( + self, + prompt, + sampler, + steps, + cfg_scale, + ddim_eta, + conditioning, + init_img, + strength, + width, + height, + embiggen, + embiggen_tiles, + step_callback=None, + **kwargs + ): + """ + Returns a function returning an image derived from the prompt and multi-stage twice-baked potato layering over the img2img on the initial image + Return value depends on the seed at the time you call it + """ + assert not sampler.uses_inpainting_model(), "--embiggen is not supported by inpainting models" + + # Construct embiggen arg array, and sanity check arguments + if embiggen == None: # embiggen can also be called with just embiggen_tiles + embiggen = [1.0] # If not specified, assume no scaling + elif embiggen[0] < 0: + embiggen[0] = 1.0 + print( + '>> Embiggen scaling factor cannot be negative, fell back to the default of 1.0 !') + if len(embiggen) < 2: + embiggen.append(0.75) + elif embiggen[1] > 1.0 or embiggen[1] < 0: + embiggen[1] = 0.75 + print('>> Embiggen upscaling strength for ESRGAN must be between 0 and 1, fell back to the default of 0.75 !') + if len(embiggen) < 3: + embiggen.append(0.25) + elif embiggen[2] < 0: + embiggen[2] = 0.25 + print('>> Overlap size for Embiggen must be a positive ratio between 0 and 1 OR a number of pixels, fell back to the default of 0.25 !') + + # Convert tiles from their user-freindly count-from-one to count-from-zero, because we need to do modulo math + # and then sort them, because... people. + if embiggen_tiles: + embiggen_tiles = list(map(lambda n: n-1, embiggen_tiles)) + embiggen_tiles.sort() + + if strength >= 0.5: + print(f'* WARNING: Embiggen may produce mirror motifs if the strength (-f) is too high (currently {strength}). Try values between 0.35-0.45.') + + # Prep img2img generator, since we wrap over it + gen_img2img = CkptImg2Img(self.model,self.precision) + + # Open original init image (not a tensor) to manipulate + initsuperimage = Image.open(init_img) + + with Image.open(init_img) as img: + initsuperimage = img.convert('RGB') + + # Size of the target super init image in pixels + initsuperwidth, initsuperheight = initsuperimage.size + + # Increase by scaling factor if not already resized, using ESRGAN as able + if embiggen[0] != 1.0: + initsuperwidth = round(initsuperwidth*embiggen[0]) + initsuperheight = round(initsuperheight*embiggen[0]) + if embiggen[1] > 0: # No point in ESRGAN upscaling if strength is set zero + from ldm.invoke.restoration.realesrgan import ESRGAN + esrgan = ESRGAN() + print( + f'>> ESRGAN upscaling init image prior to cutting with Embiggen with strength {embiggen[1]}') + if embiggen[0] > 2: + initsuperimage = esrgan.process( + initsuperimage, + embiggen[1], # upscale strength + self.seed, + 4, # upscale scale + ) + else: + initsuperimage = esrgan.process( + initsuperimage, + embiggen[1], # upscale strength + self.seed, + 2, # upscale scale + ) + # We could keep recursively re-running ESRGAN for a requested embiggen[0] larger than 4x + # but from personal experiance it doesn't greatly improve anything after 4x + # Resize to target scaling factor resolution + initsuperimage = initsuperimage.resize( + (initsuperwidth, initsuperheight), Image.Resampling.LANCZOS) + + # Use width and height as tile widths and height + # Determine buffer size in pixels + if embiggen[2] < 1: + if embiggen[2] < 0: + embiggen[2] = 0 + overlap_size_x = round(embiggen[2] * width) + overlap_size_y = round(embiggen[2] * height) + else: + overlap_size_x = round(embiggen[2]) + overlap_size_y = round(embiggen[2]) + + # With overall image width and height known, determine how many tiles we need + def ceildiv(a, b): + return -1 * (-a // b) + + # X and Y needs to be determined independantly (we may have savings on one based on the buffer pixel count) + # (initsuperwidth - width) is the area remaining to the right that we need to layers tiles to fill + # (width - overlap_size_x) is how much new we can fill with a single tile + emb_tiles_x = 1 + emb_tiles_y = 1 + if (initsuperwidth - width) > 0: + emb_tiles_x = ceildiv(initsuperwidth - width, + width - overlap_size_x) + 1 + if (initsuperheight - height) > 0: + emb_tiles_y = ceildiv(initsuperheight - height, + height - overlap_size_y) + 1 + # Sanity + assert emb_tiles_x > 1 or emb_tiles_y > 1, f'ERROR: Based on the requested dimensions of {initsuperwidth}x{initsuperheight} and tiles of {width}x{height} you don\'t need to Embiggen! Check your arguments.' + + # Prep alpha layers -------------- + # https://stackoverflow.com/questions/69321734/how-to-create-different-transparency-like-gradient-with-python-pil + # agradientL is Left-side transparent + agradientL = Image.linear_gradient('L').rotate( + 90).resize((overlap_size_x, height)) + # agradientT is Top-side transparent + agradientT = Image.linear_gradient('L').resize((width, overlap_size_y)) + # radial corner is the left-top corner, made full circle then cut to just the left-top quadrant + agradientC = Image.new('L', (256, 256)) + for y in range(256): + for x in range(256): + # Find distance to lower right corner (numpy takes arrays) + distanceToLR = np.sqrt([(255 - x) ** 2 + (255 - y) ** 2])[0] + # Clamp values to max 255 + if distanceToLR > 255: + distanceToLR = 255 + #Place the pixel as invert of distance + agradientC.putpixel((x, y), round(255 - distanceToLR)) + + # Create alternative asymmetric diagonal corner to use on "tailing" intersections to prevent hard edges + # Fits for a left-fading gradient on the bottom side and full opacity on the right side. + agradientAsymC = Image.new('L', (256, 256)) + for y in range(256): + for x in range(256): + value = round(max(0, x-(255-y)) * (255 / max(1,y))) + #Clamp values + value = max(0, value) + value = min(255, value) + agradientAsymC.putpixel((x, y), value) + + # Create alpha layers default fully white + alphaLayerL = Image.new("L", (width, height), 255) + alphaLayerT = Image.new("L", (width, height), 255) + alphaLayerLTC = Image.new("L", (width, height), 255) + # Paste gradients into alpha layers + alphaLayerL.paste(agradientL, (0, 0)) + alphaLayerT.paste(agradientT, (0, 0)) + alphaLayerLTC.paste(agradientL, (0, 0)) + alphaLayerLTC.paste(agradientT, (0, 0)) + alphaLayerLTC.paste(agradientC.resize((overlap_size_x, overlap_size_y)), (0, 0)) + # make masks with an asymmetric upper-right corner so when the curved transparent corner of the next tile + # to its right is placed it doesn't reveal a hard trailing semi-transparent edge in the overlapping space + alphaLayerTaC = alphaLayerT.copy() + alphaLayerTaC.paste(agradientAsymC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + alphaLayerLTaC = alphaLayerLTC.copy() + alphaLayerLTaC.paste(agradientAsymC.rotate(270).resize((overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + + if embiggen_tiles: + # Individual unconnected sides + alphaLayerR = Image.new("L", (width, height), 255) + alphaLayerR.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerB = Image.new("L", (width, height), 255) + alphaLayerB.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerTB = Image.new("L", (width, height), 255) + alphaLayerTB.paste(agradientT, (0, 0)) + alphaLayerTB.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerLR = Image.new("L", (width, height), 255) + alphaLayerLR.paste(agradientL, (0, 0)) + alphaLayerLR.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + + # Sides and corner Layers + alphaLayerRBC = Image.new("L", (width, height), 255) + alphaLayerRBC.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerRBC.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerRBC.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerLBC = Image.new("L", (width, height), 255) + alphaLayerLBC.paste(agradientL, (0, 0)) + alphaLayerLBC.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerLBC.paste(agradientC.rotate(90).resize( + (overlap_size_x, overlap_size_y)), (0, height - overlap_size_y)) + alphaLayerRTC = Image.new("L", (width, height), 255) + alphaLayerRTC.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerRTC.paste(agradientT, (0, 0)) + alphaLayerRTC.paste(agradientC.rotate(270).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + + # All but X layers + alphaLayerABT = Image.new("L", (width, height), 255) + alphaLayerABT.paste(alphaLayerLBC, (0, 0)) + alphaLayerABT.paste(agradientL.rotate( + 180), (width - overlap_size_x, 0)) + alphaLayerABT.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerABL = Image.new("L", (width, height), 255) + alphaLayerABL.paste(alphaLayerRTC, (0, 0)) + alphaLayerABL.paste(agradientT.rotate( + 180), (0, height - overlap_size_y)) + alphaLayerABL.paste(agradientC.rotate(180).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, height - overlap_size_y)) + alphaLayerABR = Image.new("L", (width, height), 255) + alphaLayerABR.paste(alphaLayerLBC, (0, 0)) + alphaLayerABR.paste(agradientT, (0, 0)) + alphaLayerABR.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerABB = Image.new("L", (width, height), 255) + alphaLayerABB.paste(alphaLayerRTC, (0, 0)) + alphaLayerABB.paste(agradientL, (0, 0)) + alphaLayerABB.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) + + # All-around layer + alphaLayerAA = Image.new("L", (width, height), 255) + alphaLayerAA.paste(alphaLayerABT, (0, 0)) + alphaLayerAA.paste(agradientT, (0, 0)) + alphaLayerAA.paste(agradientC.resize( + (overlap_size_x, overlap_size_y)), (0, 0)) + alphaLayerAA.paste(agradientC.rotate(270).resize( + (overlap_size_x, overlap_size_y)), (width - overlap_size_x, 0)) + + # Clean up temporary gradients + del agradientL + del agradientT + del agradientC + + def make_image(): + # Make main tiles ------------------------------------------------- + if embiggen_tiles: + print(f'>> Making {len(embiggen_tiles)} Embiggen tiles...') + else: + print( + f'>> Making {(emb_tiles_x * emb_tiles_y)} Embiggen tiles ({emb_tiles_x}x{emb_tiles_y})...') + + emb_tile_store = [] + # Although we could use the same seed for every tile for determinism, at higher strengths this may + # produce duplicated structures for each tile and make the tiling effect more obvious + # instead track and iterate a local seed we pass to Img2Img + seed = self.seed + seedintlimit = np.iinfo(np.uint32).max - 1 # only retreive this one from numpy + + for tile in range(emb_tiles_x * emb_tiles_y): + # Don't iterate on first tile + if tile != 0: + if seed < seedintlimit: + seed += 1 + else: + seed = 0 + + # Determine if this is a re-run and replace + if embiggen_tiles and not tile in embiggen_tiles: + continue + # Get row and column entries + emb_row_i = tile // emb_tiles_x + emb_column_i = tile % emb_tiles_x + # Determine bounds to cut up the init image + # Determine upper-left point + if emb_column_i + 1 == emb_tiles_x: + left = initsuperwidth - width + else: + left = round(emb_column_i * (width - overlap_size_x)) + if emb_row_i + 1 == emb_tiles_y: + top = initsuperheight - height + else: + top = round(emb_row_i * (height - overlap_size_y)) + right = left + width + bottom = top + height + + # Cropped image of above dimension (does not modify the original) + newinitimage = initsuperimage.crop((left, top, right, bottom)) + # DEBUG: + # newinitimagepath = init_img[0:-4] + f'_emb_Ti{tile}.png' + # newinitimage.save(newinitimagepath) + + if embiggen_tiles: + print( + f'Making tile #{tile + 1} ({embiggen_tiles.index(tile) + 1} of {len(embiggen_tiles)} requested)') + else: + print( + f'Starting {tile + 1} of {(emb_tiles_x * emb_tiles_y)} tiles') + + # create a torch tensor from an Image + newinitimage = np.array( + newinitimage).astype(np.float32) / 255.0 + newinitimage = newinitimage[None].transpose(0, 3, 1, 2) + newinitimage = torch.from_numpy(newinitimage) + newinitimage = 2.0 * newinitimage - 1.0 + newinitimage = newinitimage.to(self.model.device) + + tile_results = gen_img2img.generate( + prompt, + iterations = 1, + seed = seed, + sampler = DDIMSampler(self.model, device=self.model.device), + steps = steps, + cfg_scale = cfg_scale, + conditioning = conditioning, + ddim_eta = ddim_eta, + image_callback = None, # called only after the final image is generated + step_callback = step_callback, # called after each intermediate image is generated + width = width, + height = height, + init_image = newinitimage, # notice that init_image is different from init_img + mask_image = None, + strength = strength, + ) + + emb_tile_store.append(tile_results[0][0]) + # DEBUG (but, also has other uses), worth saving if you want tiles without a transparency overlap to manually composite + # emb_tile_store[-1].save(init_img[0:-4] + f'_emb_To{tile}.png') + del newinitimage + + # Sanity check we have them all + if len(emb_tile_store) == (emb_tiles_x * emb_tiles_y) or (embiggen_tiles != [] and len(emb_tile_store) == len(embiggen_tiles)): + outputsuperimage = Image.new( + "RGBA", (initsuperwidth, initsuperheight)) + if embiggen_tiles: + outputsuperimage.alpha_composite( + initsuperimage.convert('RGBA'), (0, 0)) + for tile in range(emb_tiles_x * emb_tiles_y): + if embiggen_tiles: + if tile in embiggen_tiles: + intileimage = emb_tile_store.pop(0) + else: + continue + else: + intileimage = emb_tile_store[tile] + intileimage = intileimage.convert('RGBA') + # Get row and column entries + emb_row_i = tile // emb_tiles_x + emb_column_i = tile % emb_tiles_x + if emb_row_i == 0 and emb_column_i == 0 and not embiggen_tiles: + left = 0 + top = 0 + else: + # Determine upper-left point + if emb_column_i + 1 == emb_tiles_x: + left = initsuperwidth - width + else: + left = round(emb_column_i * + (width - overlap_size_x)) + if emb_row_i + 1 == emb_tiles_y: + top = initsuperheight - height + else: + top = round(emb_row_i * (height - overlap_size_y)) + # Handle gradients for various conditions + # Handle emb_rerun case + if embiggen_tiles: + # top of image + if emb_row_i == 0: + if emb_column_i == 0: + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) not in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerB) + # Otherwise do nothing on this tile + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + intileimage.putalpha(alphaLayerR) + else: + intileimage.putalpha(alphaLayerRBC) + elif emb_column_i == emb_tiles_x - 1: + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerL) + else: + intileimage.putalpha(alphaLayerLBC) + else: + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerL) + else: + intileimage.putalpha(alphaLayerLBC) + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + intileimage.putalpha(alphaLayerLR) + else: + intileimage.putalpha(alphaLayerABT) + # bottom of image + elif emb_row_i == emb_tiles_y - 1: + if emb_column_i == 0: + if (tile+1) in embiggen_tiles: # Look-ahead right + intileimage.putalpha(alphaLayerTaC) + else: + intileimage.putalpha(alphaLayerRTC) + elif emb_column_i == emb_tiles_x - 1: + # No tiles to look ahead to + intileimage.putalpha(alphaLayerLTC) + else: + if (tile+1) in embiggen_tiles: # Look-ahead right + intileimage.putalpha(alphaLayerLTaC) + else: + intileimage.putalpha(alphaLayerABB) + # vertical middle of image + else: + if emb_column_i == 0: + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerTaC) + else: + intileimage.putalpha(alphaLayerTB) + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + intileimage.putalpha(alphaLayerRTC) + else: + intileimage.putalpha(alphaLayerABL) + elif emb_column_i == emb_tiles_x - 1: + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerLTC) + else: + intileimage.putalpha(alphaLayerABR) + else: + if (tile+1) in embiggen_tiles: # Look-ahead right + if (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down + intileimage.putalpha(alphaLayerLTaC) + else: + intileimage.putalpha(alphaLayerABR) + elif (tile+emb_tiles_x) in embiggen_tiles: # Look-ahead down only + intileimage.putalpha(alphaLayerABB) + else: + intileimage.putalpha(alphaLayerAA) + # Handle normal tiling case (much simpler - since we tile left to right, top to bottom) + else: + if emb_row_i == 0 and emb_column_i >= 1: + intileimage.putalpha(alphaLayerL) + elif emb_row_i >= 1 and emb_column_i == 0: + if emb_column_i + 1 == emb_tiles_x: # If we don't have anything that can be placed to the right + intileimage.putalpha(alphaLayerT) + else: + intileimage.putalpha(alphaLayerTaC) + else: + if emb_column_i + 1 == emb_tiles_x: # If we don't have anything that can be placed to the right + intileimage.putalpha(alphaLayerLTC) + else: + intileimage.putalpha(alphaLayerLTaC) + # Layer tile onto final image + outputsuperimage.alpha_composite(intileimage, (left, top)) + else: + print(f'Error: could not find all Embiggen output tiles in memory? Something must have gone wrong with img2img generation.') + + # after internal loops and patching up return Embiggen image + return outputsuperimage + # end of function declaration + return make_image diff --git a/ldm/invoke/ckpt_generator/img2img.py b/ldm/invoke/ckpt_generator/img2img.py new file mode 100644 index 00000000000..e1f12b542e6 --- /dev/null +++ b/ldm/invoke/ckpt_generator/img2img.py @@ -0,0 +1,97 @@ +''' +ldm.invoke.ckpt_generator.img2img descends from ldm.invoke.generator +''' + +import torch +import numpy as np +import PIL +from torch import Tensor +from PIL import Image +from ldm.invoke.devices import choose_autocast +from ldm.invoke.ckpt_generator.base import CkptGenerator +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent + +class CkptImg2Img(CkptGenerator): + def __init__(self, model, precision): + super().__init__(model, precision) + self.init_latent = None # by get_noise() + + def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,init_image,strength,step_callback=None,threshold=0.0,perlin=0.0,**kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it. + """ + self.perlin = perlin + + sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + if isinstance(init_image, PIL.Image.Image): + init_image = self._image_to_tensor(init_image.convert('RGB')) + + scope = choose_autocast(self.precision) + with scope(self.model.device.type): + self.init_latent = self.model.get_first_stage_encoding( + self.model.encode_first_stage(init_image) + ) # move to latent space + + t_enc = int(strength * steps) + uc, c, extra_conditioning_info = conditioning + + def make_image(x_T): + # encode (scaled latent) + z_enc = sampler.stochastic_encode( + self.init_latent, + torch.tensor([t_enc - 1]).to(self.model.device), + noise=x_T + ) + + if self.free_gpu_mem and self.model.model.device != self.model.device: + self.model.model.to(self.model.device) + + # decode it + samples = sampler.decode( + z_enc, + c, + t_enc, + img_callback = step_callback, + unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc, + init_latent = self.init_latent, # changes how noising is performed in ksampler + extra_conditioning_info = extra_conditioning_info, + all_timesteps_count = steps + ) + + if self.free_gpu_mem: + self.model.model.to("cpu") + + return self.sample_to_image(samples) + + return make_image + + def get_noise(self,width,height): + device = self.model.device + init_latent = self.init_latent + assert init_latent is not None,'call to get_noise() when init_latent not set' + if device.type == 'mps': + x = torch.randn_like(init_latent, device='cpu').to(device) + else: + x = torch.randn_like(init_latent, device=device) + if self.perlin > 0.0: + shape = init_latent.shape + x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(shape[3], shape[2]) + return x + + def _image_to_tensor(self, image:Image, normalize:bool=True)->Tensor: + image = np.array(image).astype(np.float32) / 255.0 + if len(image.shape) == 2: # 'L' image, as in a mask + image = image[None,None] + else: # 'RGB' image + image = image[None].transpose(0, 3, 1, 2) + image = torch.from_numpy(image) + if normalize: + image = 2.0 * image - 1.0 + return image.to(self.model.device) diff --git a/ldm/invoke/ckpt_generator/inpaint.py b/ldm/invoke/ckpt_generator/inpaint.py new file mode 100644 index 00000000000..ac99430a12d --- /dev/null +++ b/ldm/invoke/ckpt_generator/inpaint.py @@ -0,0 +1,358 @@ +''' +ldm.invoke.ckpt_generator.inpaint descends from ldm.invoke.ckpt_generator +''' + +import math +import torch +import torchvision.transforms as T +import numpy as np +import cv2 as cv +import PIL +from PIL import Image, ImageFilter, ImageOps, ImageChops +from skimage.exposure.histogram_matching import match_histograms +from einops import rearrange, repeat +from ldm.invoke.devices import choose_autocast +from ldm.invoke.ckpt_generator.img2img import CkptImg2Img +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.ksampler import KSampler +from ldm.invoke.generator.base import downsampling +from ldm.util import debug_image +from ldm.invoke.patchmatch import PatchMatch +from ldm.invoke.globals import Globals + +def infill_methods()->list[str]: + methods = list() + if PatchMatch.patchmatch_available(): + methods.append('patchmatch') + methods.append('tile') + return methods + +class Inpaint(CkptImg2Img): + def __init__(self, model, precision): + self.init_latent = None + self.pil_image = None + self.pil_mask = None + self.mask_blur_radius = 0 + self.infill_method = None + super().__init__(model, precision) + + # Outpaint support code + def get_tile_images(self, image: np.ndarray, width=8, height=8): + _nrows, _ncols, depth = image.shape + _strides = image.strides + + nrows, _m = divmod(_nrows, height) + ncols, _n = divmod(_ncols, width) + if _m != 0 or _n != 0: + return None + + return np.lib.stride_tricks.as_strided( + np.ravel(image), + shape=(nrows, ncols, height, width, depth), + strides=(height * _strides[0], width * _strides[1], *_strides), + writeable=False + ) + + def infill_patchmatch(self, im: Image.Image) -> Image: + if im.mode != 'RGBA': + return im + + # Skip patchmatch if patchmatch isn't available + if not PatchMatch.patchmatch_available(): + return im + + # Patchmatch (note, we may want to expose patch_size? Increasing it significantly impacts performance though) + im_patched_np = PatchMatch.inpaint(im.convert('RGB'), ImageOps.invert(im.split()[-1]), patch_size = 3) + im_patched = Image.fromarray(im_patched_np, mode = 'RGB') + return im_patched + + def tile_fill_missing(self, im: Image.Image, tile_size: int = 16, seed: int = None) -> Image: + # Only fill if there's an alpha layer + if im.mode != 'RGBA': + return im + + a = np.asarray(im, dtype=np.uint8) + + tile_size = (tile_size, tile_size) + + # Get the image as tiles of a specified size + tiles = self.get_tile_images(a,*tile_size).copy() + + # Get the mask as tiles + tiles_mask = tiles[:,:,:,:,3] + + # Find any mask tiles with any fully transparent pixels (we will be replacing these later) + tmask_shape = tiles_mask.shape + tiles_mask = tiles_mask.reshape(math.prod(tiles_mask.shape)) + n,ny = (math.prod(tmask_shape[0:2])), math.prod(tmask_shape[2:]) + tiles_mask = (tiles_mask > 0) + tiles_mask = tiles_mask.reshape((n,ny)).all(axis = 1) + + # Get RGB tiles in single array and filter by the mask + tshape = tiles.shape + tiles_all = tiles.reshape((math.prod(tiles.shape[0:2]), * tiles.shape[2:])) + filtered_tiles = tiles_all[tiles_mask] + + if len(filtered_tiles) == 0: + return im + + # Find all invalid tiles and replace with a random valid tile + replace_count = (tiles_mask == False).sum() + rng = np.random.default_rng(seed = seed) + tiles_all[np.logical_not(tiles_mask)] = filtered_tiles[rng.choice(filtered_tiles.shape[0], replace_count),:,:,:] + + # Convert back to an image + tiles_all = tiles_all.reshape(tshape) + tiles_all = tiles_all.swapaxes(1,2) + st = tiles_all.reshape((math.prod(tiles_all.shape[0:2]), math.prod(tiles_all.shape[2:4]), tiles_all.shape[4])) + si = Image.fromarray(st, mode='RGBA') + + return si + + + def mask_edge(self, mask: Image, edge_size: int, edge_blur: int) -> Image: + npimg = np.asarray(mask, dtype=np.uint8) + + # Detect any partially transparent regions + npgradient = np.uint8(255 * (1.0 - np.floor(np.abs(0.5 - np.float32(npimg) / 255.0) * 2.0))) + + # Detect hard edges + npedge = cv.Canny(npimg, threshold1=100, threshold2=200) + + # Combine + npmask = npgradient + npedge + + # Expand + npmask = cv.dilate(npmask, np.ones((3,3), np.uint8), iterations = int(edge_size / 2)) + + new_mask = Image.fromarray(npmask) + + if edge_blur > 0: + new_mask = new_mask.filter(ImageFilter.BoxBlur(edge_blur)) + + return ImageOps.invert(new_mask) + + + def seam_paint(self, + im: Image.Image, + seam_size: int, + seam_blur: int, + prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,strength, + noise, + step_callback + ) -> Image.Image: + hard_mask = self.pil_image.split()[-1].copy() + mask = self.mask_edge(hard_mask, seam_size, seam_blur) + + make_image = self.get_make_image( + prompt, + sampler, + steps, + cfg_scale, + ddim_eta, + conditioning, + init_image = im.copy().convert('RGBA'), + mask_image = mask.convert('RGB'), # Code currently requires an RGB mask + strength = strength, + mask_blur_radius = 0, + seam_size = 0, + step_callback = step_callback, + inpaint_width = im.width, + inpaint_height = im.height + ) + + seam_noise = self.get_noise(im.width, im.height) + + result = make_image(seam_noise) + + return result + + + @torch.no_grad() + def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,init_image,mask_image,strength, + mask_blur_radius: int = 8, + # Seam settings - when 0, doesn't fill seam + seam_size: int = 0, + seam_blur: int = 0, + seam_strength: float = 0.7, + seam_steps: int = 10, + tile_size: int = 32, + step_callback=None, + inpaint_replace=False, enable_image_debugging=False, + infill_method = None, + inpaint_width=None, + inpaint_height=None, + **kwargs): + """ + Returns a function returning an image derived from the prompt and + the initial image + mask. Return value depends on the seed at + the time you call it. kwargs are 'init_latent' and 'strength' + """ + + self.enable_image_debugging = enable_image_debugging + self.infill_method = infill_method or infill_methods()[0], # The infill method to use + + self.inpaint_width = inpaint_width + self.inpaint_height = inpaint_height + + if isinstance(init_image, PIL.Image.Image): + self.pil_image = init_image.copy() + + # Do infill + if infill_method == 'patchmatch' and PatchMatch.patchmatch_available(): + init_filled = self.infill_patchmatch(self.pil_image.copy()) + else: # if infill_method == 'tile': # Only two methods right now, so always use 'tile' if not patchmatch + init_filled = self.tile_fill_missing( + self.pil_image.copy(), + seed = self.seed, + tile_size = tile_size + ) + init_filled.paste(init_image, (0,0), init_image.split()[-1]) + + # Resize if requested for inpainting + if inpaint_width and inpaint_height: + init_filled = init_filled.resize((inpaint_width, inpaint_height)) + + debug_image(init_filled, "init_filled", debug_status=self.enable_image_debugging) + + # Create init tensor + init_image = self._image_to_tensor(init_filled.convert('RGB')) + + if isinstance(mask_image, PIL.Image.Image): + self.pil_mask = mask_image.copy() + debug_image(mask_image, "mask_image BEFORE multiply with pil_image", debug_status=self.enable_image_debugging) + + mask_image = ImageChops.multiply(mask_image, self.pil_image.split()[-1].convert('RGB')) + self.pil_mask = mask_image + + # Resize if requested for inpainting + if inpaint_width and inpaint_height: + mask_image = mask_image.resize((inpaint_width, inpaint_height)) + + debug_image(mask_image, "mask_image AFTER multiply with pil_image", debug_status=self.enable_image_debugging) + mask_image = mask_image.resize( + ( + mask_image.width // downsampling, + mask_image.height // downsampling + ), + resample=Image.Resampling.NEAREST + ) + mask_image = self._image_to_tensor(mask_image,normalize=False) + + self.mask_blur_radius = mask_blur_radius + + # klms samplers not supported yet, so ignore previous sampler + if isinstance(sampler,KSampler): + print( + f">> Using recommended DDIM sampler for inpainting." + ) + sampler = DDIMSampler(self.model, device=self.model.device) + + sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + mask_image = mask_image[0][0].unsqueeze(0).repeat(4,1,1).unsqueeze(0) + mask_image = repeat(mask_image, '1 ... -> b ...', b=1) + + scope = choose_autocast(self.precision) + with scope(self.model.device.type): + self.init_latent = self.model.get_first_stage_encoding( + self.model.encode_first_stage(init_image) + ) # move to latent space + + t_enc = int(strength * steps) + # todo: support cross-attention control + uc, c, _ = conditioning + + print(f">> target t_enc is {t_enc} steps") + + @torch.no_grad() + def make_image(x_T): + # encode (scaled latent) + z_enc = sampler.stochastic_encode( + self.init_latent, + torch.tensor([t_enc - 1]).to(self.model.device), + noise=x_T + ) + + # to replace masked area with latent noise, weighted by inpaint_replace strength + if inpaint_replace > 0.0: + print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}') + l_noise = self.get_noise(kwargs['width'],kwargs['height']) + inverted_mask = 1.0-mask_image # there will be 1s where the mask is + masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise + z_enc = z_enc * mask_image + masked_region + + if self.free_gpu_mem and self.model.model.device != self.model.device: + self.model.model.to(self.model.device) + + # decode it + samples = sampler.decode( + z_enc, + c, + t_enc, + img_callback = step_callback, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc, + mask = mask_image, + init_latent = self.init_latent + ) + + result = self.sample_to_image(samples) + + # Seam paint if this is our first pass (seam_size set to 0 during seam painting) + if seam_size > 0: + old_image = self.pil_image or init_image + old_mask = self.pil_mask or mask_image + + result = self.seam_paint( + result, + seam_size, + seam_blur, + prompt, + sampler, + seam_steps, + cfg_scale, + ddim_eta, + conditioning, + seam_strength, + x_T, + step_callback) + + # Restore original settings + self.get_make_image(prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning, + old_image, + old_mask, + strength, + mask_blur_radius, seam_size, seam_blur, seam_strength, + seam_steps, tile_size, step_callback, + inpaint_replace, enable_image_debugging, + inpaint_width = inpaint_width, + inpaint_height = inpaint_height, + infill_method = infill_method, + **kwargs) + + return result + + return make_image + + + def sample_to_image(self, samples)->Image.Image: + gen_result = super().sample_to_image(samples).convert('RGB') + debug_image(gen_result, "gen_result", debug_status=self.enable_image_debugging) + + # Resize if necessary + if self.inpaint_width and self.inpaint_height: + gen_result = gen_result.resize(self.pil_image.size) + + if self.pil_image is None or self.pil_mask is None: + return gen_result + + corrected_result = super().repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) + debug_image(corrected_result, "corrected_result", debug_status=self.enable_image_debugging) + + return corrected_result diff --git a/ldm/invoke/ckpt_generator/omnibus.py b/ldm/invoke/ckpt_generator/omnibus.py new file mode 100644 index 00000000000..b728e6b4488 --- /dev/null +++ b/ldm/invoke/ckpt_generator/omnibus.py @@ -0,0 +1,175 @@ +"""omnibus module to be used with the runwayml 9-channel custom inpainting model""" + +import torch +import numpy as np +from einops import repeat +from PIL import Image, ImageOps, ImageChops +from ldm.invoke.devices import choose_autocast +from ldm.invoke.ckpt_generator.base import downsampling +from ldm.invoke.ckpt_generator.img2img import CkptImg2Img +from ldm.invoke.ckpt_generator.txt2img import CkptTxt2Img + +class CkptOmnibus(CkptImg2Img,CkptTxt2Img): + def __init__(self, model, precision): + super().__init__(model, precision) + self.pil_mask = None + self.pil_image = None + + def get_make_image( + self, + prompt, + sampler, + steps, + cfg_scale, + ddim_eta, + conditioning, + width, + height, + init_image = None, + mask_image = None, + strength = None, + step_callback=None, + threshold=0.0, + perlin=0.0, + mask_blur_radius: int = 8, + **kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it. + """ + self.perlin = perlin + num_samples = 1 + + sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + if isinstance(init_image, Image.Image): + self.pil_image = init_image + if init_image.mode != 'RGB': + init_image = init_image.convert('RGB') + init_image = self._image_to_tensor(init_image) + + if isinstance(mask_image, Image.Image): + self.pil_mask = mask_image + + mask_image = ImageChops.multiply(mask_image.convert('L'), self.pil_image.split()[-1]) + mask_image = self._image_to_tensor(ImageOps.invert(mask_image), normalize=False) + + self.mask_blur_radius = mask_blur_radius + + t_enc = steps + + if init_image is not None and mask_image is not None: # inpainting + masked_image = init_image * (1 - mask_image) # masked image is the image masked by mask - masked regions zero + + elif init_image is not None: # img2img + scope = choose_autocast(self.precision) + + with scope(self.model.device.type): + self.init_latent = self.model.get_first_stage_encoding( + self.model.encode_first_stage(init_image) + ) # move to latent space + + # create a completely black mask (1s) + mask_image = torch.ones(1, 1, init_image.shape[2], init_image.shape[3], device=self.model.device) + # and the masked image is just a copy of the original + masked_image = init_image + + else: # txt2img + init_image = torch.zeros(1, 3, height, width, device=self.model.device) + mask_image = torch.ones(1, 1, height, width, device=self.model.device) + masked_image = init_image + + self.init_latent = init_image + height = init_image.shape[2] + width = init_image.shape[3] + model = self.model + + def make_image(x_T): + with torch.no_grad(): + scope = choose_autocast(self.precision) + with scope(self.model.device.type): + + batch = self.make_batch_sd( + init_image, + mask_image, + masked_image, + prompt=prompt, + device=model.device, + num_samples=num_samples, + ) + + c = model.cond_stage_model.encode(batch["txt"]) + c_cat = list() + for ck in model.concat_keys: + cc = batch[ck].float() + if ck != model.masked_image_key: + bchw = [num_samples, 4, height//8, width//8] + cc = torch.nn.functional.interpolate(cc, size=bchw[-2:]) + else: + cc = model.get_first_stage_encoding(model.encode_first_stage(cc)) + c_cat.append(cc) + c_cat = torch.cat(c_cat, dim=1) + + # cond + cond={"c_concat": [c_cat], "c_crossattn": [c]} + + # uncond cond + uc_cross = model.get_unconditional_conditioning(num_samples, "") + uc_full = {"c_concat": [c_cat], "c_crossattn": [uc_cross]} + shape = [model.channels, height//8, width//8] + + samples, _ = sampler.sample( + batch_size = 1, + S = steps, + x_T = x_T, + conditioning = cond, + shape = shape, + verbose = False, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc_full, + eta = 1.0, + img_callback = step_callback, + threshold = threshold, + ) + if self.free_gpu_mem: + self.model.model.to("cpu") + return self.sample_to_image(samples) + + return make_image + + def make_batch_sd( + self, + image, + mask, + masked_image, + prompt, + device, + num_samples=1): + batch = { + "image": repeat(image.to(device=device), "1 ... -> n ...", n=num_samples), + "txt": num_samples * [prompt], + "mask": repeat(mask.to(device=device), "1 ... -> n ...", n=num_samples), + "masked_image": repeat(masked_image.to(device=device), "1 ... -> n ...", n=num_samples), + } + return batch + + def get_noise(self, width:int, height:int): + if self.init_latent is not None: + height = self.init_latent.shape[2] + width = self.init_latent.shape[3] + return Txt2Img.get_noise(self,width,height) + + + def sample_to_image(self, samples)->Image.Image: + gen_result = super().sample_to_image(samples).convert('RGB') + + if self.pil_image is None or self.pil_mask is None: + return gen_result + if self.pil_image.size != self.pil_mask.size: + return gen_result + + corrected_result = super(CkptImg2Img, self).repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) + + return corrected_result diff --git a/ldm/invoke/ckpt_generator/txt2img.py b/ldm/invoke/ckpt_generator/txt2img.py new file mode 100644 index 00000000000..48b83be2edb --- /dev/null +++ b/ldm/invoke/ckpt_generator/txt2img.py @@ -0,0 +1,88 @@ +''' +ldm.invoke.ckpt_generator.txt2img inherits from ldm.invoke.ckpt_generator +''' + +import torch +import numpy as np +from ldm.invoke.ckpt_generator.base import CkptGenerator +from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent +import gc + + +class CkptTxt2Img(CkptGenerator): + def __init__(self, model, precision): + super().__init__(model, precision) + + @torch.no_grad() + def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,width,height,step_callback=None,threshold=0.0,perlin=0.0, + attention_maps_callback=None, + **kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it + kwargs are 'width' and 'height' + """ + self.perlin = perlin + uc, c, extra_conditioning_info = conditioning + + @torch.no_grad() + def make_image(x_T): + shape = [ + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor, + ] + + if self.free_gpu_mem and self.model.model.device != self.model.device: + self.model.model.to(self.model.device) + + sampler.make_schedule(ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False) + + samples, _ = sampler.sample( + batch_size = 1, + S = steps, + x_T = x_T, + conditioning = c, + shape = shape, + verbose = False, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc, + extra_conditioning_info = extra_conditioning_info, + eta = ddim_eta, + img_callback = step_callback, + threshold = threshold, + attention_maps_callback = attention_maps_callback, + ) + + if self.free_gpu_mem: + self.model.model.to('cpu') + self.model.cond_stage_model.device = 'cpu' + self.model.cond_stage_model.to('cpu') + gc.collect() + torch.cuda.empty_cache() + + return self.sample_to_image(samples) + + return make_image + + + # returns a tensor filled with random numbers from a normal distribution + def get_noise(self,width,height): + device = self.model.device + if self.use_mps_noise or device.type == 'mps': + x = torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device='cpu').to(device) + else: + x = torch.randn([1, + self.latent_channels, + height // self.downsampling_factor, + width // self.downsampling_factor], + device=device) + if self.perlin > 0.0: + x = (1-self.perlin)*x + self.perlin*self.get_perlin_noise(width // self.downsampling_factor, height // self.downsampling_factor) + return x + diff --git a/ldm/invoke/ckpt_generator/txt2img2img.py b/ldm/invoke/ckpt_generator/txt2img2img.py new file mode 100644 index 00000000000..8c650f9ebda --- /dev/null +++ b/ldm/invoke/ckpt_generator/txt2img2img.py @@ -0,0 +1,182 @@ +''' +ldm.invoke.ckpt_generator.txt2img inherits from ldm.invoke.ckpt_generator +''' + +import torch +import numpy as np +import math +import gc +from ldm.invoke.ckpt_generator.base import CkptGenerator +from ldm.invoke.ckpt_generator.omnibus import CkptOmnibus +from ldm.models.diffusion.ddim import DDIMSampler +from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent +from PIL import Image + +class CkptTxt2Img2Img(Generator): + def __init__(self, model, precision): + super().__init__(model, precision) + self.init_latent = None # for get_noise() + + @torch.no_grad() + def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, + conditioning,width,height,strength,step_callback=None,**kwargs): + """ + Returns a function returning an image derived from the prompt and the initial image + Return value depends on the seed at the time you call it + kwargs are 'width' and 'height' + """ + uc, c, extra_conditioning_info = conditioning + scale_dim = min(width, height) + scale = 512 / scale_dim + + init_width = math.ceil(scale * width / 64) * 64 + init_height = math.ceil(scale * height / 64) * 64 + + @torch.no_grad() + def make_image(x_T): + + shape = [ + self.latent_channels, + init_height // self.downsampling_factor, + init_width // self.downsampling_factor, + ] + + sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + #x = self.get_noise(init_width, init_height) + x = x_T + + if self.free_gpu_mem and self.model.model.device != self.model.device: + self.model.model.to(self.model.device) + + samples, _ = sampler.sample( + batch_size = 1, + S = steps, + x_T = x, + conditioning = c, + shape = shape, + verbose = False, + unconditional_guidance_scale = cfg_scale, + unconditional_conditioning = uc, + eta = ddim_eta, + img_callback = step_callback, + extra_conditioning_info = extra_conditioning_info + ) + + print( + f"\n>> Interpolating from {init_width}x{init_height} to {width}x{height} using DDIM sampling" + ) + + # resizing + samples = torch.nn.functional.interpolate( + samples, + size=(height // self.downsampling_factor, width // self.downsampling_factor), + mode="bilinear" + ) + + t_enc = int(strength * steps) + ddim_sampler = DDIMSampler(self.model, device=self.model.device) + ddim_sampler.make_schedule( + ddim_num_steps=steps, ddim_eta=ddim_eta, verbose=False + ) + + z_enc = ddim_sampler.stochastic_encode( + samples, + torch.tensor([t_enc-1]).to(self.model.device), + noise=self.get_noise(width,height,False) + ) + + # decode it + samples = ddim_sampler.decode( + z_enc, + c, + t_enc, + img_callback = step_callback, + unconditional_guidance_scale=cfg_scale, + unconditional_conditioning=uc, + extra_conditioning_info=extra_conditioning_info, + all_timesteps_count=steps + ) + + if self.free_gpu_mem: + self.model.model.to('cpu') + self.model.cond_stage_model.device = 'cpu' + self.model.cond_stage_model.to('cpu') + gc.collect() + torch.cuda.empty_cache() + + return self.sample_to_image(samples) + + # in the case of the inpainting model being loaded, the trick of + # providing an interpolated latent doesn't work, so we transiently + # create a 512x512 PIL image, upscale it, and run the inpainting + # over it in img2img mode. Because the inpaing model is so conservative + # it doesn't change the image (much) + def inpaint_make_image(x_T): + omnibus = CkptOmnibus(self.model,self.precision) + result = omnibus.generate( + prompt, + sampler=sampler, + width=init_width, + height=init_height, + step_callback=step_callback, + steps = steps, + cfg_scale = cfg_scale, + ddim_eta = ddim_eta, + conditioning = conditioning, + **kwargs + ) + assert result is not None and len(result)>0,'** txt2img failed **' + image = result[0][0] + interpolated_image = image.resize((width,height),resample=Image.Resampling.LANCZOS) + print(kwargs.pop('init_image',None)) + result = omnibus.generate( + prompt, + sampler=sampler, + init_image=interpolated_image, + width=width, + height=height, + seed=result[0][1], + step_callback=step_callback, + steps = steps, + cfg_scale = cfg_scale, + ddim_eta = ddim_eta, + conditioning = conditioning, + **kwargs + ) + return result[0][0] + + if sampler.uses_inpainting_model(): + return inpaint_make_image + else: + return make_image + + # returns a tensor filled with random numbers from a normal distribution + def get_noise(self,width,height,scale = True): + # print(f"Get noise: {width}x{height}") + if scale: + trained_square = 512 * 512 + actual_square = width * height + scale = math.sqrt(trained_square / actual_square) + scaled_width = math.ceil(scale * width / 64) * 64 + scaled_height = math.ceil(scale * height / 64) * 64 + else: + scaled_width = width + scaled_height = height + + device = self.model.device + if self.use_mps_noise or device.type == 'mps': + return torch.randn([1, + self.latent_channels, + scaled_height // self.downsampling_factor, + scaled_width // self.downsampling_factor], + device='cpu').to(device) + else: + return torch.randn([1, + self.latent_channels, + scaled_height // self.downsampling_factor, + scaled_width // self.downsampling_factor], + device=device) + diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index bdeb85082d3..859dc360ab5 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -132,6 +132,21 @@ def set_default_model(self,model_name:str) -> None: config[model].pop('default',None) config[model_name]['default'] = True + def model_info(self, model_name:str)->dict: + ''' + Given a model name returns the config object describing it. + ''' + if model_name not in self.config: + return None + return self.config[model_name] + + def is_legacy(self,model_name:str)->bool: + ''' + Return true if this is a legacy (.ckpt) model + ''' + info = self.model_info(model_name) + return info['format']=='ckpt' if info else False + def list_models(self) -> dict: ''' Return a dict of models in the format: From 10b2870dafe794d23a49b6caccdf6b85d9daa3fd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 22 Dec 2022 21:04:51 +0000 Subject: [PATCH 086/199] generation using legacy ckpt models now working --- ldm/generate.py | 21 +++++++++++---------- ldm/invoke/ckpt_generator/__init__.py | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 5f246ee84fa..3ec72614d32 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -773,19 +773,19 @@ def _make_images( return init_image,init_mask def _make_base(self): - return self._load_generator('generator','Generator') + return self._load_generator('','Generator') def _make_txt2img(self): - return self._load_generator('txt2img','Txt2Img') + return self._load_generator('.txt2img','Txt2Img') def _make_img2img(self): - return self._load_generator('img2img','Img2Img') + return self._load_generator('.img2img','Img2Img') def _make_embiggen(self): - return self._load_generator('embiggen','Embiggen') + return self._load_generator('.embiggen','Embiggen') def _make_txt2img2img(self): - return self._load_generator('txt2img2img','Txt2Img2Img') + return self._load_generator('.txt2img2img','Txt2Img2Img') def _make_inpaint(self): return self._load_generator('inpaint','Inpaint') @@ -795,13 +795,14 @@ def _make_omnibus(self): def _load_generator(self, module, class_name): if self.is_legacy_model(self.model_name): - mn = f'ldm.invoke.ckpt_{module}' + mn = f'ldm.invoke.ckpt_generator{module}' cn = f'Ckpt{class_name}' else: - mn = f'ldm.invoke.{module}' + mn = f'ldm.invoke.generator{module}' cn = class_name - importlib.import_module(mn) - return cn(self.model, self.precision) + module = importlib.import_module(mn) + constructor = getattr(module,cn) + return constructor(self.model, self.precision) def load_model(self): ''' @@ -962,7 +963,7 @@ def sample_to_image(self, samples): def sample_to_lowres_estimated_image(self, samples): return self._make_base().sample_to_lowres_estimated_image(samples) - def is_legacy_model(model_name)->bool: + def is_legacy_model(self,model_name)->bool: return self.model_cache.is_legacy(model_name) def _set_sampler(self): diff --git a/ldm/invoke/ckpt_generator/__init__.py b/ldm/invoke/ckpt_generator/__init__.py index 2fa5573c84d..d25e192149c 100644 --- a/ldm/invoke/ckpt_generator/__init__.py +++ b/ldm/invoke/ckpt_generator/__init__.py @@ -1,4 +1,4 @@ ''' Initialization file for the ldm.invoke.generator package ''' -from .base import Generator +from .base import CkptGenerator From a4f3d8a11473e24c3f56a4aadd0d860a6f1144de Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 22 Dec 2022 20:42:19 -0800 Subject: [PATCH 087/199] diffusers: fix missing attention_maps_callback fix for 23eb80b40421b2bb8f4b6d3dd30490d11c447b36 --- ldm/invoke/generator/inpaint.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index a55276d66c3..bbee1e4b0ae 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -16,6 +16,7 @@ from ldm.invoke.patchmatch import PatchMatch from ldm.util import debug_image + def infill_methods()->list[str]: methods = list() if PatchMatch.patchmatch_available(): @@ -180,6 +181,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, infill_method = None, inpaint_width=None, inpaint_height=None, + attention_maps_callback=None, **kwargs): """ Returns a function returning an image derived from the prompt and From 3a61258d84c7412fa7d1f2076f8d673343641a2f Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 23 Dec 2022 16:58:53 +0000 Subject: [PATCH 088/199] associate legacy CrossAttention with .ckpt models --- ldm/invoke/model_cache.py | 8 ++++---- ldm/models/diffusion/cross_attention_control.py | 4 +++- ldm/modules/attention.py | 1 - 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 859dc360ab5..9f01ab9baff 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -389,7 +389,7 @@ def _load_diffusers_model(self, mconfig): width = pipeline.vae.block_out_channels[-2] height = pipeline.vae.block_out_channels[-1] - print(f' | training width x height = ({width} x {height})') + print(f' | default image dimensions = {width} x {height}') return pipeline, width, height, model_hash @@ -575,7 +575,7 @@ def _load_vae(self, vae_config): name_or_path = self.model_name_or_path(vae_config) using_fp16 = self.precision == 'float16' - print(f'>> Loading diffusers VAE from {name_or_path}') + print(f' | Loading diffusers VAE from {name_or_path}') if using_fp16: print(f' | Using faster float16 precision') vae_args.update(torch_dtype=torch.float16) @@ -592,9 +592,9 @@ def _load_vae(self, vae_config): vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args, **fp_args) except OSError as e: if str(e).startswith('fp16 is not a valid'): - print(f'Could not fetch half-precision version of model {name_or_path}; fetching full-precision instead') + print(f' | Half-precision version of model not available; fetching full-precision instead') else: - print(f'An unexpected error occurred while downloading the model: {e})') + print(f'** An unexpected error occurred while downloading the model: {e})') if vae: break diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index 66c5567ebdb..eab76e2cbfa 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -6,6 +6,7 @@ import torch import diffusers from torch import nn +from diffusers.models.unet_2d_condition import UNet2DConditionModel # adapted from bloc97's CrossAttentionControl colab # https://github.com/bloc97/CrossAttentionControl @@ -338,7 +339,8 @@ def setup_cross_attention_control(model, context: Context): def get_cross_attention_modules(model, which: CrossAttentionType) -> list[tuple[str, InvokeAICrossAttentionMixin]]: - cross_attention_class: type = InvokeAIDiffusersCrossAttention + from ldm.modules.attention import CrossAttention # avoid circular import + cross_attention_class: type = InvokeAIDiffusersCrossAttention if isinstance(model,UNet2DConditionModel) else CrossAttention which_attn = "attn1" if which is CrossAttentionType.SELF else "attn2" attention_module_tuples = [(name,module) for name, module in model.named_modules() if isinstance(module, cross_attention_class) and which_attn in name] diff --git a/ldm/modules/attention.py b/ldm/modules/attention.py index 5d9c6db7e83..6737ed2060d 100644 --- a/ldm/modules/attention.py +++ b/ldm/modules/attention.py @@ -164,7 +164,6 @@ def get_mem_free_total(device): class CrossAttention(nn.Module, InvokeAICrossAttentionMixin): def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.): - print(f"Warning! ldm.modules.attention.CrossAttention is no longer being maintained. Please use InvokeAICrossAttention instead.") super().__init__() InvokeAICrossAttentionMixin.__init__(self) inner_dim = dim_head * heads From b72c878e44e34657fba8d3ef5cbbe027b82aa3b9 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 23 Dec 2022 18:46:41 +0000 Subject: [PATCH 089/199] enable autoconvert New --autoconvert CLI option will scan a designated directory for new .ckpt files, convert them into diffuser models, and import them into models.yaml. Works like this: invoke.py --autoconvert /path/to/weights/directory In ModelCache added two new methods: autoconvert_weights(config_path, weights_directory_path, models_directory_path) convert_and_import(ckpt_path, diffuser_path) --- ldm/invoke/CLI.py | 46 ++++++++------------ ldm/invoke/args.py | 6 +++ ldm/invoke/globals.py | 14 ++++++ ldm/invoke/model_cache.py | 91 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 125 insertions(+), 32 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index c49349de322..b412d8b8a84 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -17,6 +17,7 @@ from ldm.invoke.image_util import make_grid from ldm.invoke.log import write_log from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary +from ldm.invoke.model_cache import ModelCache from omegaconf import OmegaConf from pathlib import Path import pyparsing @@ -126,6 +127,14 @@ def main(): emergency_model_reconfigure(opt) sys.exit(-1) + # try to autoconvert new models + # autoimport new .ckpt files + if path := opt.autoconvert: + gen.model_cache.autoconvert_weights( + conf_path=opt.conf, + weights_directory=path, + ) + # web server loops forever if opt.web or opt.gui: invoke_ai_web_server_loop(gen, gfpgan, codeformer, esrgan) @@ -591,35 +600,16 @@ def add_weights_to_config(model_path:str, gen, opt, completer): if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default): completer.add_model(model_name) -def optimize_model(model_path:str, gen, opt, completer): - from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser - import transformers - basename = os.path.basename(os.path.splitext(model_path)[0]) - dump_path = os.path.join(Globals.root, 'models','optimized-ckpts',basename) - if os.path.exists(dump_path): - print(f'ERROR: The path {dump_path} already exists. Please move or remove it and try again.') +def optimize_model(ckpt_path:str, gen, opt, completer): + ckpt_path = Path(ckpt_path) + basename = ckpt_path.stem + diffuser_path = Path(Globals.root, 'models','optimized-ckpts',basename) + if diffuser_path.exists(): + print(f'** {basename} is already optimized. Will not overwrite.') return - - print(f'INFO: Converting legacy weights file {model_path} to optimized diffuser model.') - print(f' This operation will take 30-60s to complete.') - try: - verbosity =transformers.logging.get_verbosity() - transformers.logging.set_verbosity_error() - convert_ckpt_to_diffuser(model_path, dump_path) - transformers.logging.set_verbosity(verbosity) - print(f'Success. Optimized model is now located at {dump_path}') - print(f'Writing new config file entry for {basename}...') - model_name = basename - new_config = dict( - path=dump_path, - description=f'Optimized version of {basename}', - format='diffusers', - ) - if write_config_file(opt.conf, gen, model_name, new_config): - completer.add_model(model_name) - except Exception as e: - print(f'** Conversion failed: {str(e)}') - traceback.print_exc() + new_config = gen.model_cache.convert_and_import(ckpt_path, diffuser_path) + if write_config_file(opt.conf, gen, basename, new_config, clobber=False): + completer.add_model(basename) def del_config(model_name:str, gen, opt, completer): current_model = gen.model_name diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index c7417900b69..3d5e28bab61 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -508,6 +508,12 @@ def _create_arg_parser(self): default=False, help='Check for and blur potentially NSFW images. Use --no-nsfw_checker to disable.', ) + model_group.add_argument( + '--autoconvert', + default=None, + type=str, + help='Check the indicated directory for .ckpt weights files at startup and import as optimized diffuser models', + ) model_group.add_argument( '--patchmatch', action=argparse.BooleanOptionalAction, diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index 8dd64b9ea85..10a673a08a8 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -13,6 +13,7 @@ import os import os.path as osp +from pathlib import Path from argparse import Namespace Globals = Namespace() @@ -27,9 +28,22 @@ # Where to look for the initialization file Globals.initfile = 'invokeai.init' +Globals.models_dir = 'models' +Globals.config_dir = 'configs' +Globals.autoscan_dir = 'weights' # Try loading patchmatch Globals.try_patchmatch = True # Use CPU even if GPU is available (main use case is for debugging MPS issues) Globals.always_use_cpu = False + +def global_config_dir()->str: + return Path(Globals.root, Globals.config_dir) + +def global_models_dir()->str: + return Path(Globals.root, Globals.models_dir) + +def global_autoscan_dir()->str: + return Path(Globals.root, Globals.autoscan_dir) + diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 9f01ab9baff..c874da81173 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -21,7 +21,7 @@ import torch import transformers -from diffusers import AutoencoderKL +from diffusers import AutoencoderKL, logging as dlogging from huggingface_hub import hf_hub_download from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf @@ -30,7 +30,7 @@ from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, global_config_dir, global_models_dir, global_autoscan_dir from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -96,7 +96,7 @@ def get_model(self, model_name:str): except Exception as e: print(f'** model {model_name} could not be loaded: {str(e)}') - print(traceback.format_exc()) + traceback.print_exc() assert self.current_model,'** FATAL: no current model to restore to' print(f'** restoring {self.current_model}') self.get_model(self.current_model) @@ -366,6 +366,9 @@ def _load_diffusers_model(self, mconfig): else: fp_args_list = [{}] + verbosity = dlogging.get_verbosity() + dlogging.set_verbosity_error() + pipeline = None for fp_args in fp_args_list: try: @@ -374,6 +377,7 @@ def _load_diffusers_model(self, mconfig): **pipeline_args, **fp_args, ) + except OSError as e: if str(e).startswith('fp16 is not a valid'): print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') @@ -382,6 +386,7 @@ def _load_diffusers_model(self, mconfig): if pipeline: break + dlogging.set_verbosity_error() assert pipeline is not None, OSError(f'"{model_name}" could not be loaded') pipeline.to(self.device) @@ -430,6 +435,10 @@ def offload_model(self, model_name:str) -> None: torch.cuda.empty_cache() def scan_model(self, model_name, checkpoint): + ''' + Apply picklescanner to the indicated checkpoint and issue a warning + and option to exit if an infected file is identified. + ''' # scan model print(f'>> Scanning Model: {model_name}') scan_result = scan_file_path(checkpoint) @@ -448,7 +457,81 @@ def scan_model(self, model_name, checkpoint): print("### Exiting InvokeAI") sys.exit() else: - print('>> Model Scanned. OK!!') + print('>> Model scanned ok!') + + def autoconvert_weights( + self, + conf_path:Path, + weights_directory:Path=None, + dest_directory:Path=None, + ): + ''' + Scan the indicated directory for .ckpt files, convert into diffuser models, + and import. + ''' + weights_directory = weights_directory or global_autoscan_dir() + dest_directory = dest_directory or Path(global_models_dir(), 'optimized-ckpts') + + print('>> Checking for unconverted .ckpt files in {weights_directory}') + ckpt_files = dict() + for root, dirs, files in os.walk(weights_directory): + for f in files: + if not f.endswith('.ckpt'): + continue + basename = Path(f).stem + dest = Path(dest_directory,basename) + if not dest.exists(): + ckpt_files[Path(root,f)]=dest + + if len(ckpt_files)==0: + return + + print(f'>> New .ckpt file(s) found in {weights_directory}. Optimizing and importing...') + for ckpt in ckpt_files: + self.convert_and_import(ckpt, ckpt_files[ckpt]) + self.commit(conf_path) + + def convert_and_import(self, ckpt_path:Path, diffuser_path:Path)->dict: + ''' + Convert a legacy ckpt weights file to diffuser model and import + into models.yaml. + ''' + from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser + import transformers + if diffuser_path.exists(): + print(f'ERROR: The path {str(diffuser_path)} already exists. Please move or remove it and try again.') + return + + print(f'>> {ckpt_path.name}: optimizing (30-60s).') + try: + model_name = diffuser_path.name + verbosity =transformers.logging.get_verbosity() + transformers.logging.set_verbosity_error() + convert_ckpt_to_diffuser(ckpt_path, diffuser_path) + transformers.logging.set_verbosity(verbosity) + print(f'>> Success. Optimized model is now located at {str(diffuser_path)}') + print(f'>> Writing new config file entry for {model_name}...',end='') + new_config = dict( + path=str(diffuser_path), + description=f'Optimized version of {model_name}', + format='diffusers', + ) + self.add_model(model_name, new_config, True) + print('done.') + except Exception as e: + print(f'** Conversion failed: {str(e)}') + traceback.print_exc() + return new_config + + def del_config(model_name:str, gen, opt, completer): + current_model = gen.model_name + if model_name == current_model: + print("** Can't delete active model. !switch to another model first. **") + return + gen.model_cache.del_model(model_name) + gen.model_cache.commit(opt.conf) + print(f'** {model_name} deleted') + completer.del_model(model_name) def _make_cache_room(self) -> None: num_loaded_models = len(self.models) From 2e3d42d010b6eacb17acdafe7f9ab8739781bfef Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 23 Dec 2022 18:59:29 -0800 Subject: [PATCH 090/199] diffusers: update to diffusers 0.11 (from 0.10.2) --- binary_installer/requirements.in | 2 +- environments-and-requirements/environment-lin-amd.yml | 2 +- environments-and-requirements/environment-lin-cuda.yml | 2 +- environments-and-requirements/environment-mac.yml | 2 +- environments-and-requirements/environment-win-cuda.yml | 2 +- environments-and-requirements/requirements-base.txt | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/binary_installer/requirements.in b/binary_installer/requirements.in index c90da4e390b..66e0618f789 100644 --- a/binary_installer/requirements.in +++ b/binary_installer/requirements.in @@ -4,7 +4,7 @@ --trusted-host https://download.pytorch.org accelerate~=0.15 albumentations -diffusers[torch]~=0.10 +diffusers[torch]~=0.11 einops eventlet flask_cors diff --git a/environments-and-requirements/environment-lin-amd.yml b/environments-and-requirements/environment-lin-amd.yml index 1ad046ea848..c100b70b872 100644 --- a/environments-and-requirements/environment-lin-amd.yml +++ b/environments-and-requirements/environment-lin-amd.yml @@ -10,7 +10,7 @@ dependencies: - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.2/ - albumentations==0.4.3 - - diffusers~=0.10 + - diffusers~=0.11 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index 8bb073d832b..fa22d6c843b 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -14,7 +14,7 @@ dependencies: - pip: - accelerate~=0.13 - albumentations==0.4.3 - - diffusers~=0.10 + - diffusers~=0.11 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index 18de1ce6c4d..de5330f40c4 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -22,7 +22,7 @@ dependencies: - albumentations=1.2 - coloredlogs=15.0 - - diffusers~=0.10 + - diffusers~=0.11 - einops=0.3 - eventlet - grpcio=1.46 diff --git a/environments-and-requirements/environment-win-cuda.yml b/environments-and-requirements/environment-win-cuda.yml index dd1404e1d02..306ffd67246 100644 --- a/environments-and-requirements/environment-win-cuda.yml +++ b/environments-and-requirements/environment-win-cuda.yml @@ -13,7 +13,7 @@ dependencies: - cudatoolkit=11.6 - pip: - albumentations==0.4.3 - - diffusers~=0.10 + - diffusers~=0.11 - einops==0.3.0 - eventlet - flask==2.1.3 diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index a7b9d48c13b..f84ebbaf6f9 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,6 +1,6 @@ # pip will resolve the version which matches torch albumentations -diffusers[torch]~=0.10 +diffusers[torch]~=0.11 einops eventlet facexlib From 3309baf168284711aca10c05c246af1841c619c8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 24 Dec 2022 03:01:28 +0000 Subject: [PATCH 091/199] fix vae loading & width/height calculation --- ldm/invoke/ckpt_to_diffuser.py | 5 ++++- ldm/invoke/model_cache.py | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index 15702ccb35b..fcbe0b7ec5d 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -940,4 +940,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str, tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased") pipe = LDMTextToImagePipeline(vqvae=vae, bert=text_model, tokenizer=tokenizer, unet=unet, scheduler=scheduler) - pipe.save_pretrained(dump_path) + pipe.save_pretrained( + dump_path, +# safe_serialization=1, + ) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index c874da81173..b947504412d 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -356,8 +356,8 @@ def _load_diffusers_model(self, mconfig): #local_files_only=True ) if 'vae' in mconfig: - vae = self._load_vae(mconfig['vae']) - pipeline_args.update(vae=vae) + vae = self._load_vae(mconfig['vae']) + pipeline_args.update(vae=vae) if not isinstance(name_or_path,Path): pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) if using_fp16: @@ -391,8 +391,9 @@ def _load_diffusers_model(self, mconfig): pipeline.to(self.device) - width = pipeline.vae.block_out_channels[-2] - height = pipeline.vae.block_out_channels[-1] + # square images??? + width = pipeline.unet.config.sample_size * pipeline.vae_scale_factor + height = width print(f' | default image dimensions = {width} x {height}') @@ -657,7 +658,8 @@ def _load_vae(self, vae_config): vae_args = {} name_or_path = self.model_name_or_path(vae_config) using_fp16 = self.precision == 'float16' - + vae_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) + print(f' | Loading diffusers VAE from {name_or_path}') if using_fp16: print(f' | Using faster float16 precision') From aaff507bdd9307c820c1ee42557823723dee6000 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 23 Dec 2022 19:58:53 -0800 Subject: [PATCH 092/199] refactor: encapsulate these conditioning data into one container --- ldm/invoke/generator/diffusers_pipeline.py | 103 +++++++++++---------- ldm/invoke/generator/img2img.py | 6 +- ldm/invoke/generator/inpaint.py | 8 +- ldm/invoke/generator/txt2img.py | 10 +- ldm/invoke/generator/txt2img2img.py | 15 ++- 5 files changed, 70 insertions(+), 72 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 771184c4824..cbe38ee5ebe 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,10 +1,11 @@ from __future__ import annotations import secrets +import sys import warnings from dataclasses import dataclass -import sys from typing import List, Optional, Union, Callable, Type, TypeVar, Generic, Any + if sys.version_info < (3, 10): from typing_extensions import ParamSpec else: @@ -12,7 +13,6 @@ import PIL.Image import einops -import numpy as np import torch import torchvision.transforms as T from diffusers.models import attention @@ -39,7 +39,6 @@ from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from ldm.modules.textual_inversion_manager import TextualInversionManager -from ldm.modules.encoders.modules import WeightedFrozenCLIPEmbedder @dataclass @@ -189,6 +188,25 @@ def __call__(self, *args: ParamType.args, raise AssertionError("why was that an empty generator?") return result + +@dataclass(frozen=True) +class ConditioningData: + unconditioned_embeddings: torch.Tensor + text_embeddings: torch.Tensor + guidance_scale: float + """ + Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). + `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf). + Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate + images that are closely linked to the text `prompt`, usually at the expense of lower image quality. + """ + extra: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] = None + + @property + def dtype(self): + return self.text_embeddings.dtype + + @dataclass class InvokeAIStableDiffusionPipelineOutput(StableDiffusionPipelineOutput): r""" @@ -275,32 +293,24 @@ def __init__( self.enable_xformers_memory_efficient_attention() def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - guidance_scale: float, + conditioning_data: ConditioningData, *, callback: Callable[[PipelineIntermediateState], None]=None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo=None, run_id=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: r""" Function invoked when calling the pipeline for generation. + :param conditioning_data: :param latents: Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. :param num_inference_steps: The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. - :param text_embeddings: - :param guidance_scale: Guidance scale as defined in [Classifier-Free Diffusion Guidance](https://arxiv.org/abs/2207.12598). - `guidance_scale` is defined as `w` of equation 2. of [Imagen Paper](https://arxiv.org/pdf/2205.11487.pdf). - Guidance scale is enabled by setting `guidance_scale > 1`. Higher guidance scale encourages to generate - images that are closely linked to the text `prompt`, usually at the expense of lower image quality. :param callback: - :param extra_conditioning_info: :param run_id: :param extra_step_kwargs: """ result_latents, result_attention_map_saver = self.latents_from_embeddings( - latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, + latents, num_inference_steps, conditioning_data, run_id=run_id, callback=callback, **extra_step_kwargs ) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 @@ -309,42 +319,40 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, with torch.inference_mode(): image = self.decode_latents(result_latents) output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_map_saver) - return self.check_for_safety(output, dtype=text_embeddings.dtype) + return self.check_for_safety(output, dtype=conditioning_data.dtype) def latents_from_embeddings( self, latents: torch.Tensor, num_inference_steps: int, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - guidance_scale: float, + conditioning_data: ConditioningData, *, timesteps = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, additional_guidance: List[Callable] = None, run_id=None, callback: Callable[[PipelineIntermediateState], None]=None, **extra_step_kwargs - ) -> torch.Tensor: + ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: if timesteps is None: self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) timesteps = self.scheduler.timesteps infer_latents_from_embeddings = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) result: PipelineIntermediateState = infer_latents_from_embeddings( - latents, timesteps, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, + latents, timesteps, conditioning_data, additional_guidance=additional_guidance, run_id=run_id, callback=callback, **extra_step_kwargs) return result.latents, result.attention_map_saver - def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, text_embeddings: torch.Tensor, - unconditioned_embeddings: torch.Tensor, guidance_scale: float, *, + def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, + conditioning_data: ConditioningData, + *, run_id: str = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, additional_guidance: List[Callable] = None, **extra_step_kwargs): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) if additional_guidance is None: additional_guidance = [] + extra_conditioning_info = conditioning_data.extra if extra_conditioning_info is not None and extra_conditioning_info.wants_cross_attention_control: self.invokeai_diffuser.setup_cross_attention_control(extra_conditioning_info, step_count=len(self.scheduler.timesteps)) @@ -360,12 +368,11 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, tex batched_t = torch.full((batch_size,), timesteps[0], dtype=timesteps.dtype, device=self.unet.device) - attention_map_saver: AttentionMapSaver = None + attention_map_saver: Optional[AttentionMapSaver] = None self.invokeai_diffuser.remove_attention_map_saving() for i, t in enumerate(self.progress_bar(timesteps)): batched_t.fill_(t) - step_output = self.step(batched_t, latents, guidance_scale, - text_embeddings, unconditioned_embeddings, + step_output = self.step(batched_t, latents, conditioning_data, i, additional_guidance=additional_guidance, **extra_step_kwargs) latents = step_output.prev_sample @@ -384,8 +391,8 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, tex return latents, attention_map_saver @torch.inference_mode() - def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, + def step(self, t: torch.Tensor, latents: torch.Tensor, + conditioning_data: ConditioningData, step_index:int | None = None, additional_guidance: List[Callable] = None, **extra_step_kwargs): # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value @@ -401,8 +408,8 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, # predict the noise residual noise_pred = self.invokeai_diffuser.do_diffusion_step( latent_model_input, t, - unconditioned_embeddings, text_embeddings, - guidance_scale, + conditioning_data.unconditioned_embeddings, conditioning_data.text_embeddings, + conditioning_data.guidance_scale, step_index=step_index) # compute the previous noisy sample x_t -> x_t-1 @@ -412,7 +419,7 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, guidance_scale: float, # But the way things are now, scheduler runs _after_ that, so there was # no way to use it to apply an operation that happens after the last scheduler.step. for guidance in additional_guidance: - step_output = guidance(step_output, timestep, (unconditioned_embeddings, text_embeddings)) + step_output = guidance(step_output, timestep, conditioning_data) return step_output @@ -424,10 +431,8 @@ def img2img_from_embeddings(self, init_image: Union[torch.FloatTensor, PIL.Image.Image], strength: float, num_inference_steps: int, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - guidance_scale: float, + conditioning_data: ConditioningData, *, callback: Callable[[PipelineIntermediateState], None] = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, run_id=None, noise_func=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: @@ -442,13 +447,15 @@ def img2img_from_embeddings(self, latents_dtype = self.unet.dtype initial_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) - return self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, text_embeddings, - unconditioned_embeddings, guidance_scale, strength, - extra_conditioning_info, noise_func, run_id, callback, - **extra_step_kwargs) + return self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, + conditioning_data, + strength, + noise_func, run_id, callback, + **extra_step_kwargs) - def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, text_embeddings, - unconditioned_embeddings, guidance_scale, strength, extra_conditioning_info, + def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, + conditioning_data: ConditioningData, + strength, noise_func, run_id=None, callback=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device batch_size = initial_latents.size(0) @@ -461,8 +468,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste latents = noised_latents result_latents, result_attention_maps = self.latents_from_embeddings( - latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, + latents, num_inference_steps, conditioning_data, timesteps=timesteps, callback=callback, run_id=run_id, **extra_step_kwargs) @@ -473,7 +479,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste with torch.inference_mode(): image = self.decode_latents(result_latents) output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_maps) - return self.check_for_safety(output, dtype=text_embeddings.dtype) + return self.check_for_safety(output, dtype=conditioning_data.dtype) def inpaint_from_embeddings( self, @@ -481,10 +487,8 @@ def inpaint_from_embeddings( mask: torch.FloatTensor, strength: float, num_inference_steps: int, - text_embeddings: torch.Tensor, unconditioned_embeddings: torch.Tensor, - guidance_scale: float, + conditioning_data: ConditioningData, *, callback: Callable[[PipelineIntermediateState], None] = None, - extra_conditioning_info: InvokeAIDiffuserComponent.ExtraConditioningInfo = None, run_id=None, noise_func=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: @@ -531,8 +535,7 @@ def inpaint_from_embeddings( try: result_latents, result_attention_maps = self.latents_from_embeddings( - latents, num_inference_steps, text_embeddings, unconditioned_embeddings, guidance_scale, - extra_conditioning_info=extra_conditioning_info, + latents, num_inference_steps, conditioning_data, timesteps=timesteps, run_id=run_id, additional_guidance=guidance, callback=callback, @@ -546,7 +549,7 @@ def inpaint_from_embeddings( with torch.inference_mode(): image = self.decode_latents(result_latents) output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_maps) - return self.check_for_safety(output, dtype=text_embeddings.dtype) + return self.check_for_safety(output, dtype=conditioning_data.dtype) def non_noised_latents_from_image(self, init_image, *, device, dtype): init_image = init_image.to(device=device, dtype=dtype) diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 1a470d1ebf1..551fa64ab50 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -5,7 +5,7 @@ import torch from ldm.invoke.generator.base import Generator -from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline +from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData class Img2Img(Generator): @@ -24,6 +24,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.perlin = perlin uc, c, extra_conditioning_info = conditioning + conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model @@ -32,8 +33,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, def make_image(x_T): # FIXME: use x_T for initial seeded noise pipeline_output = pipeline.img2img_from_embeddings( - init_image, strength, steps, c, uc, cfg_scale, - extra_conditioning_info=extra_conditioning_info, + init_image, strength, steps, conditioning_data, noise_func=self.get_noise_like, callback=step_callback ) diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index bbee1e4b0ae..f29eaa77bb0 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -11,7 +11,8 @@ import torch from PIL import Image, ImageFilter, ImageOps, ImageChops -from ldm.invoke.generator.diffusers_pipeline import image_resized_to_grid_as_tensor, StableDiffusionGeneratorPipeline +from ldm.invoke.generator.diffusers_pipeline import image_resized_to_grid_as_tensor, StableDiffusionGeneratorPipeline, \ + ConditioningData from ldm.invoke.generator.img2img import Img2Img from ldm.invoke.patchmatch import PatchMatch from ldm.util import debug_image @@ -242,6 +243,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, # todo: support cross-attention control uc, c, _ = conditioning + conditioning_data = ConditioningData(uc, c, cfg_scale) # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model @@ -262,9 +264,7 @@ def make_image(x_T): mask=1 - mask, # expects white means "paint here." strength=strength, num_inference_steps=steps, - text_embeddings=c, - unconditioned_embeddings=uc, - guidance_scale=cfg_scale, + conditioning_data=conditioning_data, noise_func=self.get_noise_like, callback=step_callback, ) diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 6c4c7a3f13a..fc003c4af86 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -5,7 +5,7 @@ import torch from .base import Generator -from .diffusers_pipeline import StableDiffusionGeneratorPipeline +from .diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData class Txt2Img(Generator): @@ -24,6 +24,7 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, """ self.perlin = perlin uc, c, extra_conditioning_info = conditioning + conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model @@ -33,11 +34,8 @@ def make_image(x_T) -> PIL.Image.Image: pipeline_output = pipeline.image_from_embeddings( latents=x_T, num_inference_steps=steps, - text_embeddings=c, - unconditioned_embeddings=uc, - guidance_scale=cfg_scale, - callback=step_callback, - extra_conditioning_info=extra_conditioning_info + conditioning_data=conditioning_data, + callback=step_callback # TODO: eta = ddim_eta, # TODO: threshold = threshold, ) diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 56ebcc5bf43..b4aa6b57749 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -8,7 +8,8 @@ import torch from ldm.invoke.generator.base import Generator -from ldm.invoke.generator.diffusers_pipeline import trim_to_multiple_of, StableDiffusionGeneratorPipeline +from ldm.invoke.generator.diffusers_pipeline import trim_to_multiple_of, StableDiffusionGeneratorPipeline, \ + ConditioningData class Txt2Img2Img(Generator): @@ -25,6 +26,7 @@ def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_e kwargs are 'width' and 'height' """ uc, c, extra_conditioning_info = conditioning + conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) scale_dim = min(width, height) scale = 512 / scale_dim @@ -39,11 +41,8 @@ def make_image(x_T): first_pass_latent_output, _ = pipeline.latents_from_embeddings( latents=x_T, num_inference_steps=steps, - text_embeddings=c, - unconditioned_embeddings=uc, - guidance_scale=cfg_scale, + conditioning_data=conditioning_data, callback=step_callback, - extra_conditioning_info=extra_conditioning_info, # TODO: eta = ddim_eta, # TODO: threshold = threshold, ) @@ -62,10 +61,8 @@ def make_image(x_T): pipeline_output = pipeline.img2img_from_latents_and_embeddings( resized_latents, num_inference_steps=steps, - text_embeddings=c, - unconditioned_embeddings=uc, - guidance_scale=cfg_scale, strength=strength, - extra_conditioning_info=extra_conditioning_info, + conditioning_data=conditioning_data, + strength=strength, noise_func=self.get_noise_like, callback=step_callback) From 644ba5edab4b8df6a3a61e8a937faa50c6faeffb Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Fri, 23 Dec 2022 20:35:34 -0800 Subject: [PATCH 093/199] diffusers: fix some noise-scaling issues by pushing the noise-mixing down to the common function --- ldm/invoke/generator/diffusers_pipeline.py | 68 +++++++++++----------- ldm/invoke/generator/img2img.py | 2 + ldm/invoke/generator/inpaint.py | 9 --- ldm/invoke/generator/txt2img.py | 3 +- ldm/invoke/generator/txt2img2img.py | 7 ++- 5 files changed, 42 insertions(+), 47 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index cbe38ee5ebe..22bada77eb1 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -294,25 +294,31 @@ def __init__( def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, conditioning_data: ConditioningData, - *, callback: Callable[[PipelineIntermediateState], None]=None, + *, + noise: torch.Tensor, + callback: Callable[[PipelineIntermediateState], None]=None, run_id=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: r""" Function invoked when calling the pipeline for generation. :param conditioning_data: - :param latents: Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for + :param latents: Pre-generated un-noised latents, to be used as inputs for image generation. Can be used to tweak the same generation with different prompts. :param num_inference_steps: The number of denoising steps. More denoising steps usually lead to a higher quality image at the expense of slower inference. + :param noise: Noise to add to the latents, sampled from a Gaussian distribution. :param callback: :param run_id: :param extra_step_kwargs: """ result_latents, result_attention_map_saver = self.latents_from_embeddings( - latents, num_inference_steps, conditioning_data, - run_id=run_id, callback=callback, **extra_step_kwargs - ) + latents, num_inference_steps, + conditioning_data, + noise=noise, + run_id=run_id, + callback=callback, + **extra_step_kwargs) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() @@ -321,22 +327,20 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, output = InvokeAIStableDiffusionPipelineOutput(images=image, nsfw_content_detected=[], attention_map_saver=result_attention_map_saver) return self.check_for_safety(output, dtype=conditioning_data.dtype) - def latents_from_embeddings( - self, latents: torch.Tensor, num_inference_steps: int, - conditioning_data: ConditioningData, - *, - timesteps = None, - additional_guidance: List[Callable] = None, - run_id=None, - callback: Callable[[PipelineIntermediateState], None]=None, - **extra_step_kwargs - ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: + def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, + conditioning_data: ConditioningData, + *, + noise: torch.Tensor, + timesteps=None, + additional_guidance: List[Callable] = None, run_id=None, + callback: Callable[[PipelineIntermediateState], None] = None, **extra_step_kwargs) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: if timesteps is None: self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) timesteps = self.scheduler.timesteps infer_latents_from_embeddings = GeneratorToCallbackinator(self.generate_latents_from_embeddings, PipelineIntermediateState) result: PipelineIntermediateState = infer_latents_from_embeddings( latents, timesteps, conditioning_data, + noise=noise, additional_guidance=additional_guidance, run_id=run_id, callback=callback, @@ -346,6 +350,7 @@ def latents_from_embeddings( def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, conditioning_data: ConditioningData, *, + noise: torch.Tensor, run_id: str = None, additional_guidance: List[Callable] = None, **extra_step_kwargs): if run_id is None: @@ -359,14 +364,13 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, else: self.invokeai_diffuser.remove_cross_attention_control() - # scale the initial noise by the standard deviation required by the scheduler - latents *= self.scheduler.init_noise_sigma yield PipelineIntermediateState(run_id=run_id, step=-1, timestep=self.scheduler.num_train_timesteps, latents=latents) batch_size = latents.shape[0] batched_t = torch.full((batch_size,), timesteps[0], dtype=timesteps.dtype, device=self.unet.device) + latents = self.scheduler.add_noise(latents, noise, batched_t) attention_map_saver: Optional[AttentionMapSaver] = None self.invokeai_diffuser.remove_attention_map_saving() @@ -446,32 +450,30 @@ def img2img_from_embeddings(self, device = self.unet.device latents_dtype = self.unet.dtype initial_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) + noise = noise_func(initial_latents) return self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, conditioning_data, strength, - noise_func, run_id, callback, + noise, run_id, callback, **extra_step_kwargs) def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, conditioning_data: ConditioningData, strength, - noise_func, run_id=None, callback=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: + noise: torch.Tensor, run_id=None, callback=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device - batch_size = initial_latents.size(0) img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) timesteps, _ = img2img_pipeline.get_timesteps(num_inference_steps, strength, device=device) - latent_timestep = timesteps[:1].repeat(batch_size) - noise = noise_func(initial_latents) - noised_latents = self.scheduler.add_noise(initial_latents, noise, latent_timestep) - latents = noised_latents result_latents, result_attention_maps = self.latents_from_embeddings( - latents, num_inference_steps, conditioning_data, + initial_latents, num_inference_steps, conditioning_data, timesteps=timesteps, + noise=noise, + run_id=run_id, callback=callback, - run_id=run_id, **extra_step_kwargs) + **extra_step_kwargs) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() @@ -494,8 +496,6 @@ def inpaint_from_embeddings( **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device latents_dtype = self.unet.dtype - batch_size = 1 - num_images_per_prompt = 1 if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) @@ -512,16 +512,14 @@ def inpaint_from_embeddings( assert img2img_pipeline.scheduler is self.scheduler # 6. Prepare latent variables - latent_timestep = timesteps[:1].repeat(batch_size * num_images_per_prompt) # can't quite use upstream StableDiffusionImg2ImgPipeline.prepare_latents # because we have our own noise function init_image_latents = self.non_noised_latents_from_image(init_image, device=device, dtype=latents_dtype) noise = noise_func(init_image_latents) - latents = self.scheduler.add_noise(init_image_latents, noise, latent_timestep) if mask.dim() == 3: mask = mask.unsqueeze(0) - mask = tv_resize(mask, latents.shape[-2:], T.InterpolationMode.BILINEAR) \ + mask = tv_resize(mask, init_image_latents.shape[-2:], T.InterpolationMode.BILINEAR) \ .to(device=device, dtype=latents_dtype) guidance: List[Callable] = [] @@ -535,10 +533,10 @@ def inpaint_from_embeddings( try: result_latents, result_attention_maps = self.latents_from_embeddings( - latents, num_inference_steps, conditioning_data, - timesteps=timesteps, - run_id=run_id, additional_guidance=guidance, - callback=callback, + init_image_latents, num_inference_steps, + conditioning_data, noise=noise, timesteps=timesteps, + additional_guidance=guidance, + run_id=run_id, callback=callback, **extra_step_kwargs) finally: self.invokeai_diffuser.model_forward_callback = self._unet_forward diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 551fa64ab50..52dec6f198d 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -32,6 +32,8 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, def make_image(x_T): # FIXME: use x_T for initial seeded noise + # We're not at the moment because the pipeline automatically resizes init_image if + # necessary, which the x_T input might not match. pipeline_output = pipeline.img2img_from_embeddings( init_image, strength, steps, conditioning_data, noise_func=self.get_noise_like, diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index f29eaa77bb0..263644176c6 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -250,15 +250,6 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, pipeline.scheduler = sampler def make_image(x_T): - # FIXME: some of this z_enc and inpaint_replace stuff was probably important - # # to replace masked area with latent noise, weighted by inpaint_replace strength - # if inpaint_replace > 0.0: - # print(f'>> inpaint will replace what was under the mask with a strength of {inpaint_replace}') - # l_noise = self.get_noise(kwargs['width'],kwargs['height']) - # inverted_mask = 1.0-mask # there will be 1s where the mask is - # masked_region = (1.0-inpaint_replace) * inverted_mask * z_enc + inpaint_replace * inverted_mask * l_noise - # z_enc = z_enc * mask + masked_region - pipeline_output = pipeline.inpaint_from_embeddings( init_image=init_image, mask=1 - mask, # expects white means "paint here." diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index fc003c4af86..768f20abd14 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -32,7 +32,8 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, def make_image(x_T) -> PIL.Image.Image: pipeline_output = pipeline.image_from_embeddings( - latents=x_T, + latents=torch.zeros_like(x_T), + noise=x_T, num_inference_steps=steps, conditioning_data=conditioning_data, callback=step_callback diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index b4aa6b57749..a1743988f8f 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -39,9 +39,10 @@ def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_e def make_image(x_T): first_pass_latent_output, _ = pipeline.latents_from_embeddings( - latents=x_T, + latents=torch.zeros_like(x_T), num_inference_steps=steps, conditioning_data=conditioning_data, + noise=x_T, callback=step_callback, # TODO: eta = ddim_eta, # TODO: threshold = threshold, @@ -58,12 +59,14 @@ def make_image(x_T): mode="bilinear" ) + second_pass_noise = self.get_noise_like(resized_latents) + pipeline_output = pipeline.img2img_from_latents_and_embeddings( resized_latents, num_inference_steps=steps, conditioning_data=conditioning_data, strength=strength, - noise_func=self.get_noise_like, + noise=second_pass_noise, callback=step_callback) return pipeline.numpy_to_pil(pipeline_output.images)[0] From 003a7f87e1d67866b21db48e1a4ef063832a77a6 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 24 Dec 2022 12:54:31 +0000 Subject: [PATCH 094/199] add support for safetensors and accelerate --- configs/INITIAL_MODELS.yaml | 20 ++++++++++++++++++- .../environment-lin-aarch64.yml | 2 ++ .../environment-lin-amd.yml | 2 ++ .../environment-lin-cuda.yml | 1 + .../environment-mac.yml | 3 ++- .../environment-win-cuda.yml | 2 ++ .../requirements-base.txt | 2 ++ ldm/invoke/ckpt_to_diffuser.py | 2 +- ldm/invoke/model_cache.py | 4 +++- 9 files changed, 34 insertions(+), 4 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index feea63065ed..64158186faf 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -21,7 +21,7 @@ inpainting-1.5: height: 512 stable-diffusion-1.4: description: The original Stable Diffusion version 1.4 weight file (4.27 GB) - repo_id: CompVis/stable-diffusion-v-1-4-original + repo_id: CompVis/stable-diffusion-v1-4 recommended: False format: diffusers vae: @@ -80,3 +80,21 @@ voxel_art-1.0: recommended: False width: 512 height: 512 +ft-mse-improved-autoencoder-840000: + description: StabilityAI improved autoencoder fine-tuned for human faces for legacy .ckpt models only (335 MB) + repo_id: stabilityai/sd-vae-ft-mse-original + format: ckpt + config: VAE/default + file: vae-ft-mse-840000-ema-pruned.ckpt + recommended: True + width: 512 + height: 512 +trinart_vae: + description: Custom autoencoder for trinart_characters for legacy .ckpt models only (335 MB) + repo_id: naclbit/trinart_characters_19.2m_stable_diffusion_v1 + config: VAE/trinart + format: ckpt + file: autoencoder_fix_kl-f8-trinart_characters.ckpt + recommended: False + width: 512 + height: 512 diff --git a/environments-and-requirements/environment-lin-aarch64.yml b/environments-and-requirements/environment-lin-aarch64.yml index 01fc4c30c28..b2944066576 100644 --- a/environments-and-requirements/environment-lin-aarch64.yml +++ b/environments-and-requirements/environment-lin-aarch64.yml @@ -30,11 +30,13 @@ dependencies: - torchvision - transformers~=4.25 - pip: + - accelerate - getpass_asterisk - omegaconf==2.1.1 - picklescan - pyreadline3 - realesrgan + - safetensors - taming-transformers-rom1504 - test-tube>=0.7.5 - git+https://github.com/openai/CLIP.git@main#egg=clip diff --git a/environments-and-requirements/environment-lin-amd.yml b/environments-and-requirements/environment-lin-amd.yml index c100b70b872..3c061a9a507 100644 --- a/environments-and-requirements/environment-lin-amd.yml +++ b/environments-and-requirements/environment-lin-amd.yml @@ -9,6 +9,7 @@ dependencies: - numpy=1.23.3 - pip: - --extra-index-url https://download.pytorch.org/whl/rocm5.2/ + - accelerate - albumentations==0.4.3 - diffusers~=0.11 - einops==0.3.0 @@ -28,6 +29,7 @@ dependencies: - pyreadline3 - pytorch-lightning==1.7.7 - realesrgan + - safetensors - send2trash==1.8.0 - streamlit==1.12.0 - taming-transformers-rom1504 diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index fa22d6c843b..f48f2f3110b 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -32,6 +32,7 @@ dependencies: - pyreadline3 - pytorch-lightning==1.7.7 - realesrgan + - safetensors - send2trash==1.8.0 - streamlit==1.12.0 - taming-transformers-rom1504 diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index de5330f40c4..99b4093a0c5 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -19,7 +19,7 @@ dependencies: # sed -E 's/invokeai/invokeai-updated/;20,99s/- ([^=]+)==.+/- \1/' environment-mac.yml > environment-mac-updated.yml # CONDA_SUBDIR=osx-arm64 conda env create -f environment-mac-updated.yml && conda list -n invokeai-updated | awk ' {print " - " $1 "==" $2;} ' # ``` - + - accelerate - albumentations=1.2 - coloredlogs=15.0 - diffusers~=0.11 @@ -44,6 +44,7 @@ dependencies: - pudb=2019.2 - protobuf=3.20 - py-opencv=4.6 + - safetensors - scipy=1.9 - streamlit=1.12 - sympy=1.10 diff --git a/environments-and-requirements/environment-win-cuda.yml b/environments-and-requirements/environment-win-cuda.yml index 306ffd67246..9f91d66cb9b 100644 --- a/environments-and-requirements/environment-win-cuda.yml +++ b/environments-and-requirements/environment-win-cuda.yml @@ -12,6 +12,7 @@ dependencies: - pytorch=1.12.1 - cudatoolkit=11.6 - pip: + - accelerate - albumentations==0.4.3 - diffusers~=0.11 - einops==0.3.0 @@ -31,6 +32,7 @@ dependencies: - pyreadline3 - pytorch-lightning==1.7.7 - realesrgan + - safetensors - send2trash==1.8.0 - streamlit==1.12.0 - taming-transformers-rom1504 diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index f84ebbaf6f9..ba95913651e 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,4 +1,5 @@ # pip will resolve the version which matches torch +accelerate albumentations diffusers[torch]~=0.11 einops @@ -25,6 +26,7 @@ pyreadline3 pytorch-lightning==1.7.7 realesrgan requests==2.25.1 +safetensors scikit-image>=0.19 send2trash streamlit diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index fcbe0b7ec5d..7d8510f3166 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -942,5 +942,5 @@ def convert_ckpt_to_diffuser(checkpoint_path:str, pipe.save_pretrained( dump_path, -# safe_serialization=1, + safe_serialization=1, ) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index b947504412d..4324e8d303c 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -243,7 +243,9 @@ def _load_model(self, model_name:str): print(f'>> Loading {model_name} from {weights}') model, width, height, model_hash = self._load_ckpt_model(model_name, mconfig) elif model_format == 'diffusers': - model, width, height, model_hash = self._load_diffusers_model(mconfig) + with warnings.catch_warnings(): + warnings.simplefilter('ignore') + model, width, height, model_hash = self._load_diffusers_model(mconfig) else: raise NotImplementedError(f"Unknown model format {model_name}: {model_format}") From 8ce7c45fef4703b8bad2b446f3be14419a36177b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 24 Dec 2022 13:52:56 +0000 Subject: [PATCH 095/199] set local_files_only when internet unreachable --- ldm/invoke/CLI.py | 15 +++++++++++++++ ldm/invoke/args.py | 7 +++++++ ldm/invoke/globals.py | 4 ++++ ldm/invoke/model_cache.py | 20 ++++++++++++++------ 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index b412d8b8a84..5bbd4e1b1fe 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -48,6 +48,8 @@ def main(): # alert - setting a global here Globals.try_patchmatch = args.patchmatch Globals.always_use_cpu = args.always_use_cpu + Globals.internet_available = args.internet_available and check_internet() + print(f'>> Internet connectivity is {Globals.internet_available}') if not args.conf: if not os.path.exists(os.path.join(Globals.root,'configs','models.yaml')): @@ -1003,3 +1005,16 @@ def emergency_model_reconfigure(opt): import configure_invokeai configure_invokeai.main() + +def check_internet()->bool: + ''' + Return true if the internet is reachable. + It does this by pinging huggingface.co. + ''' + import urllib.request + host = 'http://huggingface.co' + try: + urllib.request.urlopen(host,timeout=1) + return True + except: + return False diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 905230f4ade..6b21df47a34 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -500,6 +500,13 @@ def _create_arg_parser(self): help=f'Set model precision. Defaults to auto selected based on device. Options: {", ".join(PRECISION_CHOICES)}', default='auto', ) + model_group.add_argument( + '--internet', + action=argparse.BooleanOptionalAction, + dest='internet_available', + default=True, + help='Indicate whether internet is available for just-in-time model downloading (default: probe automatically).', + ) model_group.add_argument( '--nsfw_checker', '--safety_checker', diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index 10a673a08a8..7c5b765cbc3 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -38,6 +38,10 @@ # Use CPU even if GPU is available (main use case is for debugging MPS issues) Globals.always_use_cpu = False +# Whether the internet is reachable for dynamic downloads +# The CLI will test connectivity at startup time. +Globals.internet_available = True + def global_config_dir()->str: return Path(Globals.root, Globals.config_dir) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 4324e8d303c..cb40007ddf5 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -22,7 +22,7 @@ import torch import transformers from diffusers import AutoencoderKL, logging as dlogging -from huggingface_hub import hf_hub_download +from huggingface_hub import hf_hub_download, logging as hlogging from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf from omegaconf.dictconfig import DictConfig @@ -355,7 +355,7 @@ def _load_diffusers_model(self, mconfig): # TODO: scan weights maybe? pipeline_args = dict( safety_checker=None, - #local_files_only=True + local_files_only=not Globals.internet_available ) if 'vae' in mconfig: vae = self._load_vae(mconfig['vae']) @@ -397,7 +397,7 @@ def _load_diffusers_model(self, mconfig): width = pipeline.unet.config.sample_size * pipeline.vae_scale_factor height = width - print(f' | default image dimensions = {width} x {height}') + print(f' | Default image dimensions = {width} x {height}') return pipeline, width, height, model_hash @@ -660,11 +660,14 @@ def _load_vae(self, vae_config): vae_args = {} name_or_path = self.model_name_or_path(vae_config) using_fp16 = self.precision == 'float16' - vae_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) + + vae_args.update( + cache_dir=os.path.join(Globals.root,'models',name_or_path), + local_files_only=not Globals.internet_available, + ) print(f' | Loading diffusers VAE from {name_or_path}') if using_fp16: - print(f' | Using faster float16 precision') vae_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] else: @@ -672,6 +675,8 @@ def _load_vae(self, vae_config): fp_args_list = [{}] vae = None + deferred_error = None + for fp_args in fp_args_list: # At some point we might need to be able to use different classes here? But for now I think # all Stable Diffusion VAE are AutoencoderKL. @@ -681,10 +686,13 @@ def _load_vae(self, vae_config): if str(e).startswith('fp16 is not a valid'): print(f' | Half-precision version of model not available; fetching full-precision instead') else: - print(f'** An unexpected error occurred while downloading the model: {e})') + deferred_error = e if vae: break + if not vae and deferred_error: + print(f'** Could not load VAE {name_or_path}: {str(deferred_error)}') + # comment by lstein: I don't know what this does if 'subfolder' in vae_config: vae_args['subfolder'] = vae_config['subfolder'] From f0fb555c4fce0a678e3788636869a4aff0cabdcf Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 24 Dec 2022 10:24:51 -0800 Subject: [PATCH 096/199] diffusers: fix error-handling path when model repo has no fp16 branch --- ldm/invoke/model_cache.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index cb40007ddf5..83d5ca12184 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -17,20 +17,18 @@ import traceback import warnings from pathlib import Path -from typing import Union +from typing import Union, Any import torch import transformers from diffusers import AutoencoderKL, logging as dlogging -from huggingface_hub import hf_hub_download, logging as hlogging -from huggingface_hub.utils import RevisionNotFoundError from omegaconf import OmegaConf from omegaconf.dictconfig import DictConfig from omegaconf.errors import ConfigAttributeError from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.invoke.globals import Globals, global_config_dir, global_models_dir, global_autoscan_dir +from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -146,7 +144,7 @@ def is_legacy(self,model_name:str)->bool: ''' info = self.model_info(model_name) return info['format']=='ckpt' if info else False - + def list_models(self) -> dict: ''' Return a dict of models in the format: @@ -353,7 +351,7 @@ def _load_diffusers_model(self, mconfig): print(f' | Using more accurate float32 precision') # TODO: scan weights maybe? - pipeline_args = dict( + pipeline_args: dict[str, Any] = dict( safety_checker=None, local_files_only=not Globals.internet_available ) @@ -366,7 +364,7 @@ def _load_diffusers_model(self, mconfig): pipeline_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] else: - fp_args_list = [{}] + fp_args_list = [{}] verbosity = dlogging.get_verbosity() dlogging.set_verbosity_error() @@ -379,17 +377,17 @@ def _load_diffusers_model(self, mconfig): **pipeline_args, **fp_args, ) - + except OSError as e: if str(e).startswith('fp16 is not a valid'): - print(f'Could not fetch half-precision version of model {repo_id}; fetching full-precision instead') + print(f'Could not fetch half-precision version of model {name_or_path}; fetching full-precision instead') else: print(f'An unexpected error occurred while downloading the model: {e})') if pipeline: break - dlogging.set_verbosity_error() - assert pipeline is not None, OSError(f'"{model_name}" could not be loaded') + dlogging.set_verbosity(verbosity) + assert pipeline is not None, OSError(f'"{name_or_path}" could not be loaded') pipeline.to(self.device) @@ -405,7 +403,7 @@ def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path: if isinstance(model_name,DictConfig): mconfig = model_name elif model_name in self.config: - mconfig = self.config[model_name] + mconfig = self.config[model_name] else: raise ValueError(f'"{model_name}" is not a known model name. Please check your models.yaml file') @@ -474,7 +472,7 @@ def autoconvert_weights( ''' weights_directory = weights_directory or global_autoscan_dir() dest_directory = dest_directory or Path(global_models_dir(), 'optimized-ckpts') - + print('>> Checking for unconverted .ckpt files in {weights_directory}') ckpt_files = dict() for root, dirs, files in os.walk(weights_directory): @@ -485,7 +483,7 @@ def autoconvert_weights( dest = Path(dest_directory,basename) if not dest.exists(): ckpt_files[Path(root,f)]=dest - + if len(ckpt_files)==0: return @@ -526,7 +524,7 @@ def convert_and_import(self, ckpt_path:Path, diffuser_path:Path)->dict: traceback.print_exc() return new_config - def del_config(model_name:str, gen, opt, completer): + def del_config(self, model_name:str, gen, opt, completer): current_model = gen.model_name if model_name == current_model: print("** Can't delete active model. !switch to another model first. **") @@ -601,7 +599,7 @@ def _model_to_cpu(self,model): def _model_from_cpu(self,model): if self.device == 'cpu': return model - + model.to(self.device) model.cond_stage_model.device = self.device @@ -660,12 +658,12 @@ def _load_vae(self, vae_config): vae_args = {} name_or_path = self.model_name_or_path(vae_config) using_fp16 = self.precision == 'float16' - + vae_args.update( cache_dir=os.path.join(Globals.root,'models',name_or_path), local_files_only=not Globals.internet_available, ) - + print(f' | Loading diffusers VAE from {name_or_path}') if using_fp16: vae_args.update(torch_dtype=torch.float16) From 76075dad06a72bcf14c35f143649a2904a222687 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 24 Dec 2022 18:33:44 +0000 Subject: [PATCH 097/199] fix generatorinpaint error Fixes : "ModuleNotFoundError: No module named 'ldm.invoke.generatorinpaint' https://github.com/invoke-ai/InvokeAI/pull/1583#issuecomment-1363634318 --- ldm/generate.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index c36f6b90298..73bd4829bdf 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -790,10 +790,10 @@ def _make_txt2img2img(self): return self._load_generator('.txt2img2img','Txt2Img2Img') def _make_inpaint(self): - return self._load_generator('inpaint','Inpaint') + return self._load_generator('.inpaint','Inpaint') def _make_omnibus(self): - return self._load_generator('omnibus','Omnibus') + return self._load_generator('.omnibus','Omnibus') def _load_generator(self, module, class_name): if self.is_legacy_model(self.model_name): From 3e8346be9e723f9340846311569cf4faf687abe0 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 24 Dec 2022 18:52:44 +0000 Subject: [PATCH 098/199] quench diffuser safety-checker warning --- ldm/invoke/generator/img2img.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 52dec6f198d..15e79b918ae 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -6,7 +6,7 @@ from ldm.invoke.generator.base import Generator from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData - +from diffusers import logging class Img2Img(Generator): def __init__(self, model, precision): @@ -34,6 +34,7 @@ def make_image(x_T): # FIXME: use x_T for initial seeded noise # We're not at the moment because the pipeline automatically resizes init_image if # necessary, which the x_T input might not match. + logging.set_verbosity_error() # quench safety check warnings pipeline_output = pipeline.img2img_from_embeddings( init_image, strength, steps, conditioning_data, noise_func=self.get_noise_like, From 50f131c4b4481a86fcb47c2f20079aa35e29cfad Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Tue, 27 Dec 2022 23:58:41 -0800 Subject: [PATCH 099/199] diffusers: support stochastic DDIM eta parameter --- ldm/generate.py | 4 +- ldm/invoke/generator/diffusers_pipeline.py | 60 +++++++++++++--------- ldm/invoke/generator/img2img.py | 11 ++-- ldm/invoke/generator/inpaint.py | 10 ++-- ldm/invoke/generator/txt2img.py | 8 +-- ldm/invoke/generator/txt2img2img.py | 13 ++--- 6 files changed, 62 insertions(+), 44 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 73bd4829bdf..542b3f4d615 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -168,7 +168,7 @@ def __init__( self.steps = 50 self.cfg_scale = 7.5 self.sampler_name = sampler_name - self.ddim_eta = 0.0 # same seed always produces same image + self.ddim_eta = ddim_eta # same seed always produces same image self.precision = precision self.strength = 0.75 self.seamless = False @@ -785,7 +785,7 @@ def _make_img2img(self): def _make_embiggen(self): return self._load_generator('.embiggen','Embiggen') - + def _make_txt2img2img(self): return self._load_generator('.txt2img2img','Txt2Img2Img') diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 22bada77eb1..7174ffac0f1 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -1,9 +1,11 @@ from __future__ import annotations +import dataclasses +import inspect import secrets import sys import warnings -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import List, Optional, Union, Callable, Type, TypeVar, Generic, Any if sys.version_info < (3, 10): @@ -201,11 +203,25 @@ class ConditioningData: images that are closely linked to the text `prompt`, usually at the expense of lower image quality. """ extra: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] = None + scheduler_args: dict[str, Any] = field(default_factory=dict) + """Additional arguments to pass to scheduler.step.""" @property def dtype(self): return self.text_embeddings.dtype + def add_scheduler_args_if_applicable(self, scheduler, **kwargs): + scheduler_args = dict(self.scheduler_args) + step_method = inspect.signature(scheduler.step) + for name, value in kwargs.items(): + try: + step_method.bind_partial(**{name: value}) + except TypeError: + # FIXME: don't silently discard arguments + pass # debug("%s does not accept argument named %r", scheduler, name) + else: + scheduler_args[name] = value + return dataclasses.replace(self, scheduler_args=scheduler_args) @dataclass class InvokeAIStableDiffusionPipelineOutput(StableDiffusionPipelineOutput): @@ -297,8 +313,7 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, *, noise: torch.Tensor, callback: Callable[[PipelineIntermediateState], None]=None, - run_id=None, - **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: + run_id=None) -> InvokeAIStableDiffusionPipelineOutput: r""" Function invoked when calling the pipeline for generation. @@ -310,15 +325,13 @@ def image_from_embeddings(self, latents: torch.Tensor, num_inference_steps: int, :param noise: Noise to add to the latents, sampled from a Gaussian distribution. :param callback: :param run_id: - :param extra_step_kwargs: """ result_latents, result_attention_map_saver = self.latents_from_embeddings( latents, num_inference_steps, conditioning_data, noise=noise, run_id=run_id, - callback=callback, - **extra_step_kwargs) + callback=callback) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() @@ -333,7 +346,8 @@ def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: in noise: torch.Tensor, timesteps=None, additional_guidance: List[Callable] = None, run_id=None, - callback: Callable[[PipelineIntermediateState], None] = None, **extra_step_kwargs) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: + callback: Callable[[PipelineIntermediateState], None] = None + ) -> tuple[torch.Tensor, Optional[AttentionMapSaver]]: if timesteps is None: self.scheduler.set_timesteps(num_inference_steps, device=self.unet.device) timesteps = self.scheduler.timesteps @@ -343,8 +357,7 @@ def latents_from_embeddings(self, latents: torch.Tensor, num_inference_steps: in noise=noise, additional_guidance=additional_guidance, run_id=run_id, - callback=callback, - **extra_step_kwargs) + callback=callback) return result.latents, result.attention_map_saver def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, @@ -352,7 +365,7 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, *, noise: torch.Tensor, run_id: str = None, - additional_guidance: List[Callable] = None, **extra_step_kwargs): + additional_guidance: List[Callable] = None): if run_id is None: run_id = secrets.token_urlsafe(self.ID_LENGTH) if additional_guidance is None: @@ -377,8 +390,7 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, for i, t in enumerate(self.progress_bar(timesteps)): batched_t.fill_(t) step_output = self.step(batched_t, latents, conditioning_data, - i, additional_guidance=additional_guidance, - **extra_step_kwargs) + i, additional_guidance=additional_guidance) latents = step_output.prev_sample predicted_original = getattr(step_output, 'pred_original_sample', None) @@ -397,8 +409,7 @@ def generate_latents_from_embeddings(self, latents: torch.Tensor, timesteps, @torch.inference_mode() def step(self, t: torch.Tensor, latents: torch.Tensor, conditioning_data: ConditioningData, - step_index:int | None = None, additional_guidance: List[Callable] = None, - **extra_step_kwargs): + step_index:int | None = None, additional_guidance: List[Callable] = None): # invokeai_diffuser has batched timesteps, but diffusers schedulers expect a single value timestep = t[0] @@ -417,7 +428,8 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, step_index=step_index) # compute the previous noisy sample x_t -> x_t-1 - step_output = self.scheduler.step(noise_pred, timestep, latents, **extra_step_kwargs) + step_output = self.scheduler.step(noise_pred, timestep, latents, + **conditioning_data.scheduler_args) # TODO: this additional_guidance extension point feels redundant with InvokeAIDiffusionComponent. # But the way things are now, scheduler runs _after_ that, so there was @@ -438,8 +450,8 @@ def img2img_from_embeddings(self, conditioning_data: ConditioningData, *, callback: Callable[[PipelineIntermediateState], None] = None, run_id=None, - noise_func=None, - **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: + noise_func=None + ) -> InvokeAIStableDiffusionPipelineOutput: if isinstance(init_image, PIL.Image.Image): init_image = image_resized_to_grid_as_tensor(init_image.convert('RGB')) @@ -455,13 +467,13 @@ def img2img_from_embeddings(self, return self.img2img_from_latents_and_embeddings(initial_latents, num_inference_steps, conditioning_data, strength, - noise, run_id, callback, - **extra_step_kwargs) + noise, run_id, callback) def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_steps, conditioning_data: ConditioningData, strength, - noise: torch.Tensor, run_id=None, callback=None, **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: + noise: torch.Tensor, run_id=None, callback=None + ) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device img2img_pipeline = StableDiffusionImg2ImgPipeline(**self.components) img2img_pipeline.scheduler.set_timesteps(num_inference_steps, device=device) @@ -472,8 +484,7 @@ def img2img_from_latents_and_embeddings(self, initial_latents, num_inference_ste timesteps=timesteps, noise=noise, run_id=run_id, - callback=callback, - **extra_step_kwargs) + callback=callback) # https://discuss.huggingface.co/t/memory-usage-by-later-pipeline-stages/23699 torch.cuda.empty_cache() @@ -493,7 +504,7 @@ def inpaint_from_embeddings( *, callback: Callable[[PipelineIntermediateState], None] = None, run_id=None, noise_func=None, - **extra_step_kwargs) -> InvokeAIStableDiffusionPipelineOutput: + ) -> InvokeAIStableDiffusionPipelineOutput: device = self.unet.device latents_dtype = self.unet.dtype @@ -536,8 +547,7 @@ def inpaint_from_embeddings( init_image_latents, num_inference_steps, conditioning_data, noise=noise, timesteps=timesteps, additional_guidance=guidance, - run_id=run_id, callback=callback, - **extra_step_kwargs) + run_id=run_id, callback=callback) finally: self.invokeai_diffuser.model_forward_callback = self._unet_forward diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 15e79b918ae..0c51648bb55 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -3,10 +3,11 @@ ''' import torch +from diffusers import logging from ldm.invoke.generator.base import Generator from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData -from diffusers import logging + class Img2Img(Generator): def __init__(self, model, precision): @@ -23,13 +24,15 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, """ self.perlin = perlin - uc, c, extra_conditioning_info = conditioning - conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) - # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler + uc, c, extra_conditioning_info = conditioning + conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + + def make_image(x_T): # FIXME: use x_T for initial seeded noise # We're not at the moment because the pipeline automatically resizes init_image if diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 263644176c6..62078311ae3 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -241,14 +241,16 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, self.mask_blur_radius = mask_blur_radius - # todo: support cross-attention control - uc, c, _ = conditioning - conditioning_data = ConditioningData(uc, c, cfg_scale) - # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler + # todo: support cross-attention control + uc, c, _ = conditioning + conditioning_data = (ConditioningData(uc, c, cfg_scale) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + + def make_image(x_T): pipeline_output = pipeline.inpaint_from_embeddings( init_image=init_image, diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 768f20abd14..71749432625 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -23,13 +23,16 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, kwargs are 'width' and 'height' """ self.perlin = perlin - uc, c, extra_conditioning_info = conditioning - conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler + uc, c, extra_conditioning_info = conditioning + conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + + def make_image(x_T) -> PIL.Image.Image: pipeline_output = pipeline.image_from_embeddings( latents=torch.zeros_like(x_T), @@ -37,7 +40,6 @@ def make_image(x_T) -> PIL.Image.Image: num_inference_steps=steps, conditioning_data=conditioning_data, callback=step_callback - # TODO: eta = ddim_eta, # TODO: threshold = threshold, ) if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index a1743988f8f..2c71194532d 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -26,16 +26,18 @@ def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_e kwargs are 'width' and 'height' """ uc, c, extra_conditioning_info = conditioning - conditioning_data = ConditioningData(uc, c, cfg_scale, extra_conditioning_info) - scale_dim = min(width, height) - scale = 512 / scale_dim - - init_width, init_height = trim_to_multiple_of(scale * width, scale * height) # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler + conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + scale_dim = min(width, height) + scale = 512 / scale_dim + + init_width, init_height = trim_to_multiple_of(scale * width, scale * height) + def make_image(x_T): first_pass_latent_output, _ = pipeline.latents_from_embeddings( @@ -44,7 +46,6 @@ def make_image(x_T): conditioning_data=conditioning_data, noise=x_T, callback=step_callback, - # TODO: eta = ddim_eta, # TODO: threshold = threshold, ) From 72f5cbb2123932423d20f96032cd55dbe7986e5b Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Wed, 28 Dec 2022 15:07:42 +0100 Subject: [PATCH 100/199] fix conda env creation on macos --- environments-and-requirements/environment-mac.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/environments-and-requirements/environment-mac.yml b/environments-and-requirements/environment-mac.yml index 99b4093a0c5..e46d214db5f 100644 --- a/environments-and-requirements/environment-mac.yml +++ b/environments-and-requirements/environment-mac.yml @@ -1,6 +1,7 @@ name: invokeai channels: - pytorch + - huggingface - conda-forge - defaults dependencies: @@ -44,7 +45,6 @@ dependencies: - pudb=2019.2 - protobuf=3.20 - py-opencv=4.6 - - safetensors - scipy=1.9 - streamlit=1.12 - sympy=1.10 @@ -52,6 +52,7 @@ dependencies: - tensorboard=2.10 - transformers~=4.25 - pip: + - safetensors~=0.2 - getpass_asterisk - picklescan - taming-transformers-rom1504 From bac090158c108dafea32bfb1ba7e20e8be4627cc Mon Sep 17 00:00:00 2001 From: Damian Stewart Date: Wed, 28 Dec 2022 15:19:31 +0100 Subject: [PATCH 101/199] fix cross-attention with diffusers 0.11 --- ldm/models/diffusion/cross_attention_control.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ldm/models/diffusion/cross_attention_control.py b/ldm/models/diffusion/cross_attention_control.py index eab76e2cbfa..7415f1435be 100644 --- a/ldm/models/diffusion/cross_attention_control.py +++ b/ldm/models/diffusion/cross_attention_control.py @@ -450,10 +450,12 @@ def __init__(self, **kwargs): super().__init__(**kwargs) InvokeAICrossAttentionMixin.__init__(self) - def _attention(self, query, key, value): + def _attention(self, query, key, value, attention_mask=None): #default_result = super()._attention(query, key, value) - damian_result = self.get_invokeai_attention_mem_efficient(query, key, value) + if attention_mask is not None: + print(f"{type(self).__name__} ignoring passed-in attention_mask") + attention_result = self.get_invokeai_attention_mem_efficient(query, key, value) - hidden_states = self.reshape_batch_dim_to_heads(damian_result) + hidden_states = self.reshape_batch_dim_to_heads(attention_result) return hidden_states From e294fcabeb88f8b56b7b9d9fc57a1e6ac5b72de0 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 28 Dec 2022 11:38:59 -0800 Subject: [PATCH 102/199] diffusers: the VAE needs to be tiling as well as the U-Net --- ldm/generate.py | 9 +++++---- ldm/invoke/seamless.py | 1 + 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 542b3f4d615..9c9cc9bff53 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -4,13 +4,13 @@ # Copyright (c) 2022 Robin Rombach and Patrick Esser and contributors import gc +import importlib import os import random import re import sys import time import traceback -import importlib import cv2 import numpy as np @@ -35,18 +35,18 @@ from ldm.invoke.concepts_lib import HuggingFaceConceptsLibrary from ldm.invoke.conditioning import get_uc_and_c_and_ec from ldm.invoke.devices import choose_torch_device, choose_precision +from ldm.invoke.generator.inpaint import infill_methods from ldm.invoke.globals import Globals from ldm.invoke.image_util import InitImageResizer from ldm.invoke.model_cache import ModelCache from ldm.invoke.pngwriter import PngWriter from ldm.invoke.seamless import configure_model_padding -from ldm.invoke.txt2mask import Txt2Mask, SegmentedGrayscale -from ldm.invoke.generator.inpaint import infill_methods - +from ldm.invoke.txt2mask import Txt2Mask from ldm.models.diffusion.ddim import DDIMSampler from ldm.models.diffusion.ksampler import KSampler from ldm.models.diffusion.plms import PLMSSampler + def fix_func(orig): if hasattr(torch.backends, 'mps') and torch.backends.mps.is_available(): def new_func(*args, **kw): @@ -408,6 +408,7 @@ def process_image(image,seed): if isinstance(model, DiffusionPipeline): configure_model_padding(model.unet, seamless, seamless_axes) + configure_model_padding(model.vae, seamless, seamless_axes) else: configure_model_padding(model, seamless, seamless_axes) diff --git a/ldm/invoke/seamless.py b/ldm/invoke/seamless.py index 3a010ef118e..fda363eb7be 100644 --- a/ldm/invoke/seamless.py +++ b/ldm/invoke/seamless.py @@ -12,6 +12,7 @@ def configure_model_padding(model, seamless, seamless_axes): """ Modifies the 2D convolution layers to use a circular padding mode based on the `seamless` and `seamless_axes` options. """ + # TODO: get an explicit interface for this in diffusers: https://github.com/huggingface/diffusers/issues/556 for m in model.modules(): if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)): if seamless: From 795a5c75099b7674e95277fe2032a6ba6fde11ad Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 28 Dec 2022 12:45:30 -0800 Subject: [PATCH 103/199] diffusers: comment on subfolders --- ldm/invoke/model_cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index f66bac737d6..d60cd3f334f 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -701,6 +701,10 @@ def _load_vae(self, vae_config): vae = None deferred_error = None + # A VAE may be in a subfolder of a model's repository. + if 'subfolder' in vae_config: + vae_args['subfolder'] = vae_config['subfolder'] + for fp_args in fp_args_list: # At some point we might need to be able to use different classes here? But for now I think # all Stable Diffusion VAE are AutoencoderKL. @@ -717,8 +721,4 @@ def _load_vae(self, vae_config): if not vae and deferred_error: print(f'** Could not load VAE {name_or_path}: {str(deferred_error)}') - # comment by lstein: I don't know what this does - if 'subfolder' in vae_config: - vae_args['subfolder'] = vae_config['subfolder'] - return vae From 716616597277467ab99a1433fbc218e7f9a23fa8 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 28 Dec 2022 13:35:40 -0800 Subject: [PATCH 104/199] diffusers: embiggen! --- ldm/invoke/generator/embiggen.py | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/ldm/invoke/generator/embiggen.py b/ldm/invoke/generator/embiggen.py index 2fec0606f77..e7505aab2fe 100644 --- a/ldm/invoke/generator/embiggen.py +++ b/ldm/invoke/generator/embiggen.py @@ -8,10 +8,8 @@ from PIL import Image from tqdm import trange -from ldm.invoke.devices import choose_autocast from ldm.invoke.generator.base import Generator from ldm.invoke.generator.img2img import Img2Img -from ldm.models.diffusion.ddim import DDIMSampler class Embiggen(Generator): @@ -24,7 +22,6 @@ def generate(self,prompt,iterations=1,seed=None, image_callback=None, step_callback=None, **kwargs): - scope = choose_autocast(self.precision) make_image = self.get_make_image( prompt, step_callback = step_callback, @@ -34,14 +31,13 @@ def generate(self,prompt,iterations=1,seed=None, seed = seed if seed else self.new_seed() # Noise will be generated by the Img2Img generator when called - with scope(self.model.device.type), self.model.ema_scope(): - for n in trange(iterations, desc='Generating'): - # make_image will call Img2Img which will do the equivalent of get_noise itself - image = make_image() - results.append([image, seed]) - if image_callback is not None: - image_callback(image, seed, prompt_in=prompt) - seed = self.new_seed() + for _ in trange(iterations, desc='Generating'): + # make_image will call Img2Img which will do the equivalent of get_noise itself + image = make_image() + results.append([image, seed]) + if image_callback is not None: + image_callback(image, seed, prompt_in=prompt) + seed = self.new_seed() return results @torch.no_grad() @@ -355,7 +351,7 @@ def make_image(): prompt, iterations = 1, seed = seed, - sampler = DDIMSampler(self.model, device=self.model.device), + sampler = sampler, steps = steps, cfg_scale = cfg_scale, conditioning = conditioning, From 3811d07e75996f669d9c81f36919c4113062aa67 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Thu, 29 Dec 2022 09:32:13 -0800 Subject: [PATCH 105/199] diffusers: make model_cache.list_models serializable --- ldm/invoke/model_cache.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 4551ddc7f49..896185de68a 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -162,6 +162,9 @@ def list_models(self) -> dict: default = self.config[name].default if 'default' in self.config[name] else False vae = self.config[name].vae if 'vae' in self.config[name] else '' + if isinstance(vae, DictConfig): + vae = OmegaConf.to_object(vae) # so it can be JSON-serialized + if self.current_model == name: status = 'active' elif name in self.models: From aa174a9cb25597d2b0e5faa1f4d932469060060e Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 31 Dec 2022 08:10:20 -0800 Subject: [PATCH 106/199] diffusers(inpaint): restore scaling functionality --- ldm/invoke/generator/inpaint.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/generator/inpaint.py b/ldm/invoke/generator/inpaint.py index 62078311ae3..dd43d3cde71 100644 --- a/ldm/invoke/generator/inpaint.py +++ b/ldm/invoke/generator/inpaint.py @@ -264,7 +264,8 @@ def make_image(x_T): if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: attention_maps_callback(pipeline_output.attention_map_saver) - result = pipeline.numpy_to_pil(pipeline_output.images)[0] + + result = self.postprocess_size_and_mask(pipeline.numpy_to_pil(pipeline_output.images)[0]) # Seam paint if this is our first pass (seam_size set to 0 during seam painting) if seam_size > 0: @@ -295,6 +296,10 @@ def make_image(x_T): def sample_to_image(self, samples)->Image.Image: gen_result = super().sample_to_image(samples).convert('RGB') + return self.postprocess_size_and_mask(gen_result) + + + def postprocess_size_and_mask(self, gen_result: Image.Image) -> Image.Image: debug_image(gen_result, "gen_result", debug_status=self.enable_image_debugging) # Resize if necessary @@ -304,7 +309,7 @@ def sample_to_image(self, samples)->Image.Image: if self.pil_image is None or self.pil_mask is None: return gen_result - corrected_result = super().repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) + corrected_result = self.repaste_and_color_correct(gen_result, self.pil_image, self.pil_mask, self.mask_blur_radius) debug_image(corrected_result, "corrected_result", debug_status=self.enable_image_debugging) return corrected_result From 45e34d52b95027b8edf840292154c07fc57b57e1 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 31 Dec 2022 14:20:24 -0500 Subject: [PATCH 107/199] fix requirements clash between numba and numpy 1.24 --- environments-and-requirements/requirements-base.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index ba95913651e..16a2c7ca002 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -15,7 +15,7 @@ huggingface-hub imageio imageio-ffmpeg kornia -numpy +numpy==1.23.* omegaconf opencv-python picklescan From 4e3f5cc1ecc5202d2f9a499b9bb4548c2e51f88c Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 31 Dec 2022 19:09:13 -0800 Subject: [PATCH 108/199] diffusers: allow inpainting model to do non-inpainting tasks --- ldm/invoke/generator/diffusers_pipeline.py | 19 +++++++++++++++---- ldm/invoke/generator/txt2img.py | 6 ++++-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 7174ffac0f1..0e8d82f935c 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -73,10 +73,10 @@ class AddsMaskLatents: This class assumes the same mask and base image should apply to all items in the batch. """ forward: Callable[[torch.Tensor, torch.Tensor, torch.Tensor], torch.Tensor] - mask: torch.FloatTensor - initial_image_latents: torch.FloatTensor + mask: torch.Tensor + initial_image_latents: torch.Tensor - def __call__(self, latents: torch.FloatTensor, t: torch.Tensor, text_embeddings: torch.FloatTensor) -> torch.Tensor: + def __call__(self, latents: torch.Tensor, t: torch.Tensor, text_embeddings: torch.Tensor) -> torch.Tensor: model_input = self.add_mask_channels(latents) return self.forward(model_input, t, text_embeddings) @@ -440,7 +440,18 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, return step_output def _unet_forward(self, latents, t, text_embeddings): - # predict the noise residual + """predict the noise residual""" + if is_inpainting_model(self.unet) and latents.size(1) == 4: + # Pad out normal non-inpainting inputs for an inpainting model. + # FIXME: There are too many layers of functions and we have too many different ways of + # overriding things! This should get handled in a way more consistent with the other + # use of AddsMaskLatents. + latents = AddsMaskLatents( + self._unet_forward, + mask=torch.ones_like(latents[:1, :1], device=latents.device, dtype=latents.dtype), + initial_image_latents=torch.zeros_like(latents[:1], device=latents.device, dtype=latents.dtype) + ).add_mask_channels(latents) + return self.unet(latents, t, encoder_hidden_states=text_embeddings).sample def img2img_from_embeddings(self, diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index 71749432625..e8e3a90d195 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -52,15 +52,17 @@ def make_image(x_T) -> PIL.Image.Image: # returns a tensor filled with random numbers from a normal distribution def get_noise(self,width,height): device = self.model.device + # limit noise to only the diffusion image channels, not the mask channels + input_channels = min(self.latent_channels, 4) if self.use_mps_noise or device.type == 'mps': x = torch.randn([1, - self.latent_channels, + input_channels, height // self.downsampling_factor, width // self.downsampling_factor], device='cpu').to(device) else: x = torch.randn([1, - self.latent_channels, + input_channels, height // self.downsampling_factor, width // self.downsampling_factor], device=device) From 9a9068823cb2df99ea00edfa1c9f0d754a7a5efe Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sun, 1 Jan 2023 15:10:10 -0500 Subject: [PATCH 109/199] start expanding model_cache functionality --- configs/INITIAL_MODELS.yaml | 1 + ldm/invoke/model_cache.py | 49 ++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 28 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index 64158186faf..2432e1f453c 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -37,6 +37,7 @@ waifu-diffusion-1.3: vae: repo_id: stabilityai/sd-vae-ft-mse-original file: vae-ft-mse-840000-ema-pruned.ckpt + recommended: False width: 512 height: 512 trinart-2.0: diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_cache.py index 896185de68a..c15e3ffadf6 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_cache.py @@ -137,6 +137,12 @@ def model_info(self, model_name:str)->dict: return None return self.config[model_name] + def models(self)->list[str]: + ''' + Return a list consisting of all the names of models defined in models.yaml + ''' + return self.config.keys() + def is_legacy(self,model_name:str)->bool: ''' Return true if this is a legacy (.ckpt) model @@ -151,37 +157,22 @@ def list_models(self) -> dict: 'description': description, }, model_name2: { etc } + Please use model_cache.models() to get all the model names, + model_cache.model_info('model-name') to get the stanza for the model + named 'model-name', and model_cache.config to get the full OmegaConf + object derived from models.yaml ''' models = {} for name in self.config: - description = self.config[name].description if 'description' in self.config[name] else '' - weights = self.config[name].weights if 'weights' in self.config[name] else '' - config = self.config[name].config if 'config' in self.config[name] else '' - width = self.config[name].width if 'width' in self.config[name] else 512 - height = self.config[name].height if 'height' in self.config[name] else 512 - default = self.config[name].default if 'default' in self.config[name] else False - vae = self.config[name].vae if 'vae' in self.config[name] else '' - - if isinstance(vae, DictConfig): - vae = OmegaConf.to_object(vae) # so it can be JSON-serialized - - if self.current_model == name: - status = 'active' - elif name in self.models: - status = 'cached' - else: - status = 'not loaded' - - models[name]={ - 'status' : status, - 'description' : description, - 'weights': weights, - 'config': config, - 'width': width, - 'height': height, - 'vae': vae, - 'default': default - } + stanza = self.config[name] + format = stanza.get('format','diffusers') + config = stanza.get('config','no config') + models[name] = dict( + description = stanza.get('description',None), + format = 'vae' if 'VAE/default' in config else format, + status = 'active' if self.current_model == name else 'cached' if name is self.models else 'not loaded', + ) + return models def print_models(self) -> None: @@ -190,6 +181,8 @@ def print_models(self) -> None: ''' models = self.list_models() for name in models: + if models[name]['format'] == 'vae': + continue line = f'{name:25s} {models[name]["status"]:>10s} {models[name]["description"]}' if models[name]['status'] == 'active': line = f'\033[1m{line}\033[0m' From 76fbadeaa62ee391bc430a86d49a2452662f1f51 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 16:35:43 -0500 Subject: [PATCH 110/199] add import_ckpt_model() and import_diffuser_model() methods to model_manager - in addition, model_cache.py is now renamed to model_manager.py --- backend/invoke_ai_web_server.py | 22 ++-- ldm/generate.py | 14 +-- ldm/invoke/CLI.py | 20 ++-- .../{model_cache.py => model_manager.py} | 111 ++++++++++++++++-- ldm/util.py | 33 ++++++ scripts/configure_invokeai.py | 4 +- 6 files changed, 165 insertions(+), 39 deletions(-) rename ldm/invoke/{model_cache.py => model_manager.py} (85%) diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py index e89eaa66e8f..5b64e8f7aef 100644 --- a/backend/invoke_ai_web_server.py +++ b/backend/invoke_ai_web_server.py @@ -296,7 +296,7 @@ def load_socketio_listeners(self, socketio): def handle_request_capabilities(): print(f">> System config requested") config = self.get_system_config() - config["model_list"] = self.generate.model_cache.list_models() + config["model_list"] = self.generate.model_manager.list_models() config["infill_methods"] = infill_methods() socketio.emit("systemConfig", config) @@ -309,7 +309,7 @@ def handle_search_models(search_folder: str): {'search_folder': None, 'found_models': None}, ) else: - search_folder, found_models = self.generate.model_cache.search_models(search_folder) + search_folder, found_models = self.generate.model_manager.search_models(search_folder) socketio.emit( "foundModels", {'search_folder': search_folder, 'found_models': found_models}, @@ -328,17 +328,17 @@ def handle_add_model(new_model_config: dict): del new_model_config['name'] model_attributes = new_model_config update = False - current_model_list = self.generate.model_cache.list_models() + current_model_list = self.generate.model_manager.list_models() if model_name in current_model_list: update = True print(f">> Adding New Model: {model_name}") - self.generate.model_cache.add_model( + self.generate.model_manager.add_model( model_name=model_name, model_attributes=model_attributes, clobber=True) - self.generate.model_cache.commit(opt.conf) + self.generate.model_manager.commit(opt.conf) - new_model_list = self.generate.model_cache.list_models() + new_model_list = self.generate.model_manager.list_models() socketio.emit( "newModelAdded", {"new_model_name": model_name, @@ -356,9 +356,9 @@ def handle_add_model(new_model_config: dict): def handle_delete_model(model_name: str): try: print(f">> Deleting Model: {model_name}") - self.generate.model_cache.del_model(model_name) - self.generate.model_cache.commit(opt.conf) - updated_model_list = self.generate.model_cache.list_models() + self.generate.model_manager.del_model(model_name) + self.generate.model_manager.commit(opt.conf) + updated_model_list = self.generate.model_manager.list_models() socketio.emit( "modelDeleted", {"deleted_model_name": model_name, @@ -377,7 +377,7 @@ def handle_set_model(model_name: str): try: print(f">> Model change requested: {model_name}") model = self.generate.set_model(model_name) - model_list = self.generate.model_cache.list_models() + model_list = self.generate.model_manager.list_models() if model is None: socketio.emit( "modelChangeFailed", @@ -789,7 +789,7 @@ def handle_delete_image(url, thumbnail, uuid, category): # App Functions def get_system_config(self): - model_list: dict = self.generate.model_cache.list_models() + model_list: dict = self.generate.model_manager.list_models() active_model_name = None for model_name, model_dict in model_list.items(): diff --git a/ldm/generate.py b/ldm/generate.py index 9c9cc9bff53..faa3689940c 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -38,7 +38,7 @@ from ldm.invoke.generator.inpaint import infill_methods from ldm.invoke.globals import Globals from ldm.invoke.image_util import InitImageResizer -from ldm.invoke.model_cache import ModelCache +from ldm.invoke.model_manager import ModelCache from ldm.invoke.pngwriter import PngWriter from ldm.invoke.seamless import configure_model_padding from ldm.invoke.txt2mask import Txt2Mask @@ -163,7 +163,7 @@ def __init__( mconfig = OmegaConf.load(conf) self.height = None self.width = None - self.model_cache = None + self.model_manager = None self.iterations = 1 self.steps = 50 self.cfg_scale = 7.5 @@ -210,8 +210,8 @@ def __init__( self.precision = choose_precision(self.device) # model caching system for fast switching - self.model_cache = ModelCache(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) - self.model_name = model or self.model_cache.default_model() or FALLBACK_MODEL_NAME + self.model_manager = ModelCache(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) + self.model_name = model or self.model_manager.default_model() or FALLBACK_MODEL_NAME # for VRAM usage statistics self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None @@ -822,7 +822,7 @@ def set_model(self,model_name): return self.model # the model cache does the loading and offloading - cache = self.model_cache + cache = self.model_manager if not cache.valid_model(model_name): print(f'** "{model_name}" is not a known model name. Please check your models.yaml file') return self.model @@ -967,7 +967,7 @@ def sample_to_lowres_estimated_image(self, samples): return self._make_base().sample_to_lowres_estimated_image(samples) def is_legacy_model(self,model_name)->bool: - return self.model_cache.is_legacy(model_name) + return self.model_manager.is_legacy(model_name) def _set_sampler(self): if isinstance(self.model, DiffusionPipeline): @@ -1027,7 +1027,7 @@ def _set_scheduler(self): sampler_class = scheduler_map[self.sampler_name] msg = f'>> Setting Sampler to {self.sampler_name} ({sampler_class.__name__})' self.sampler = sampler_class.from_pretrained( - self.model_cache.model_name_or_path(self.model_name), + self.model_manager.model_name_or_path(self.model_name), subfolder="scheduler" ) else: diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index b9203aa09d6..995364e709c 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -130,7 +130,7 @@ def main(): # try to autoconvert new models # autoimport new .ckpt files if path := opt.autoconvert: - gen.model_cache.autoconvert_weights( + gen.model_manager.autoconvert_weights( conf_path=opt.conf, weights_directory=path, ) @@ -454,7 +454,7 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: operation = None elif command.startswith('!models'): - gen.model_cache.print_models() + gen.model_manager.print_models() completer.add_history(command) operation = None @@ -607,7 +607,7 @@ def optimize_model(ckpt_path:str, gen, opt, completer): if diffuser_path.exists(): print(f'** {basename} is already optimized. Will not overwrite.') return - new_config = gen.model_cache.convert_and_import(ckpt_path, diffuser_path) + new_config = gen.model_manager.convert_and_import(ckpt_path, diffuser_path) if write_config_file(opt.conf, gen, basename, new_config, clobber=False): completer.add_model(basename) @@ -616,13 +616,13 @@ def del_config(model_name:str, gen, opt, completer): if model_name == current_model: print("** Can't delete active model. !switch to another model first. **") return - gen.model_cache.del_model(model_name) - gen.model_cache.commit(opt.conf) + gen.model_manager.del_model(model_name) + gen.model_manager.commit(opt.conf) print(f'** {model_name} deleted') completer.del_model(model_name) def edit_config(model_name:str, gen, opt, completer): - config = gen.model_cache.config + config = gen.model_manager.config if model_name not in config: print(f'** Unknown model {model_name}') @@ -654,22 +654,22 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak try: print('>> Verifying that new model loads...') - gen.model_cache.add_model(model_name, new_config, clobber) + gen.model_manager.add_model(model_name, new_config, clobber) assert gen.set_model(model_name) is not None, 'model failed to load' except AssertionError as e: traceback.print_exc() print(f'** aborting **') try: - gen.model_cache.del_model(model_name) + gen.model_manager.del_model(model_name) except Exception: pass return False if make_default: print('making this default') - gen.model_cache.set_default_model(model_name) + gen.model_manager.set_default_model(model_name) - gen.model_cache.commit(conf_path) + gen.model_manager.commit(conf_path) do_switch = input(f'Keep model loaded? [y]') if len(do_switch)==0 or do_switch[0] in ('y','Y'): diff --git a/ldm/invoke/model_cache.py b/ldm/invoke/model_manager.py similarity index 85% rename from ldm/invoke/model_cache.py rename to ldm/invoke/model_manager.py index 977c53955e6..33cdca5e07e 100644 --- a/ldm/invoke/model_cache.py +++ b/ldm/invoke/model_manager.py @@ -18,6 +18,7 @@ import warnings from pathlib import Path from typing import Union, Any +from ldm.util import download_with_progress_bar import torch import transformers @@ -120,7 +121,7 @@ def default_model(self) -> str | None: def set_default_model(self,model_name:str) -> None: ''' Set the default model. The change will not take - effect until you call model_cache.commit() + effect until you call model_manager.commit() ''' assert model_name in self.models,f"unknown model '{model_name}'" @@ -131,17 +132,17 @@ def set_default_model(self,model_name:str) -> None: def model_info(self, model_name:str)->dict: ''' - Given a model name returns the config object describing it. + Given a model name returns the OmegaConf (dict-like) object describing it. ''' if model_name not in self.config: return None return self.config[model_name] - def models(self)->list[str]: + def model_names(self)->list[str]: ''' Return a list consisting of all the names of models defined in models.yaml ''' - return self.config.keys() + return list(self.config.keys()) def is_legacy(self,model_name:str)->bool: ''' @@ -157,9 +158,9 @@ def list_models(self) -> dict: 'description': description, }, model_name2: { etc } - Please use model_cache.models() to get all the model names, - model_cache.model_info('model-name') to get the stanza for the model - named 'model-name', and model_cache.config to get the full OmegaConf + Please use model_manager.models() to get all the model names, + model_manager.model_info('model-name') to get the stanza for the model + named 'model-name', and model_manager.config to get the full OmegaConf object derived from models.yaml ''' models = {} @@ -469,6 +470,83 @@ def scan_model(self, model_name, checkpoint): else: print('>> Model scanned ok!') + def import_diffuser_model(self, + repo_or_path:Union[str,Path], + model_name:str=None, + description:str=None, + commit_to_conf:Path=None, + )->bool: + ''' + Attempts to install the indicated diffuser model and returns True if successful. + + "repo_or_path" can be either a repo-id or a path-like object corresponding to the + top of a downloaded diffusers directory. + + You can optionally provide a model name and/or description. If not provided, + then these will be derived from the repo name. If you provide a commit_to_conf + path to the configuration file, then the new entry will be committed to the + models.yaml file. + ''' + model_name = model_name or Path(repo_or_path).stem + description = description or f'imported diffusers model {model_name}' + new_config = dict( + description=description, + format='diffusers', + ) + if isinstance(repo_or_path,Path) and repo_or_path.exists(): + new_config.update(path=repo_or_path) + else: + new_config.update(repo_id=repo_or_path) + + self.add_model(model_name, new_config, True) + if commit_to_conf: + self.commit(commit_to_conf) + return True + + def import_ckpt_model(self, + weights:Union[str,Path], + config:Union[str,Path]='configs/stable-diffusion/v1-inference.yaml', + model_name:str=None, + description:str=None, + commit_to_conf:Path=None, + )->bool: + ''' + Attempts to install the indicated ckpt file and returns True if successful. + + "weights" can be either a path-like object corresponding to a local .ckpt file + or a http/https URL pointing to a remote model. + + "config" is the model config file to use with this ckpt file. It defaults to + v1-inference.yaml. If a URL is provided, the config will be downloaded. + + You can optionally provide a model name and/or description. If not provided, + then these will be derived from the weight file name. If you provide a commit_to_conf + path to the configuration file, then the new entry will be committed to the + models.yaml file. + ''' + weights_path = self._resolve_path(weights,'models/ldm/stable-diffusion-v1') + config_path = self._resolve_path(config,'configs/stable-diffusion') + + if weights_path is None or not weights_path.exists(): + return False + if config_path is None or not config_path.exists(): + return False + + model_name = model_name or Path(basename).stem + description = description or f'imported stable diffusion weights file {model_name}' + new_config = dict( + weights=str(weights_path), + config=str(config_path), + description=description, + format='ckpt', + width=512, + height=512 + ) + self.add_model(model_name, new_config, True) + if commit_to_conf: + self.commit(commit_to_conf) + return True + def autoconvert_weights( self, conf_path:Path, @@ -538,8 +616,8 @@ def del_config(self, model_name:str, gen, opt, completer): if model_name == current_model: print("** Can't delete active model. !switch to another model first. **") return - gen.model_cache.del_model(model_name) - gen.model_cache.commit(opt.conf) + gen.model_manager.del_model(model_name) + gen.model_manager.commit(opt.conf) print(f'** {model_name} deleted') completer.del_model(model_name) @@ -599,6 +677,21 @@ def preamble(self) -> str: # was trained on. ''') + def _resolve_path(self, source:Union[str,Path], dest_directory:str)->Path: + resolved_path = None + if source.startswith('http'): + basename = os.path.basename(source) + if not os.path.isabs(dest_directory): + dest_directory = os.path.join(Globals.root,dest_directory) + dest = os.path.join(dest_directory,basename) + if download_with_progress_bar(source,dest): + resolved_path = Path(dest) + else: + if not os.path.isabs(source): + source = os.path.join(Globals.root,source) + resolved_path = Path(source) + return resolved_path + def _invalidate_cached_model(self,model_name:str) -> None: self.offload_model(model_name) if model_name in self.stack: diff --git a/ldm/util.py b/ldm/util.py index 2039faf0f78..282a56c3e58 100644 --- a/ldm/util.py +++ b/ldm/util.py @@ -5,9 +5,14 @@ from inspect import isfunction from queue import Queue from threading import Thread +from urllib import request +from tqdm import tqdm +from pathlib import Path import numpy as np import torch +import os +import traceback from PIL import Image, ImageDraw, ImageFont @@ -259,3 +264,31 @@ def debug_image(debug_image, debug_text, debug_show=True, debug_result=False, de if debug_result: return image_copy +#------------------------------------- +class ProgressBar(): + def __init__(self,model_name='file'): + self.pbar = None + self.name = model_name + + def __call__(self, block_num, block_size, total_size): + if not self.pbar: + self.pbar=tqdm(desc=self.name, + initial=0, + unit='iB', + unit_scale=True, + unit_divisor=1000, + total=total_size) + self.pbar.update(block_size) + +def download_with_progress_bar(url:str, dest:Path)->bool: + try: + if not os.path.exists(dest): + os.makedirs((os.path.dirname(dest) or '.'), exist_ok=True) + request.urlretrieve(url,dest,ProgressBar(os.path.basename(dest))) + return True + else: + return True + except OSError: + print(traceback.format_exc()) + return False + diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index c26359cc357..202c53e7926 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -40,10 +40,10 @@ transformers.logging.set_verbosity_error() try: - from ldm.invoke.model_cache import ModelCache + from ldm.invoke.model_manager import ModelCache except ImportError: sys.path.append('.') - from ldm.invoke.model_cache import ModelCache + from ldm.invoke.model_manager import ModelCache #--------------------------globals----------------------- Model_dir = 'models' From 8ef3c6824a2806891def7d219fa9bccafe2aa4dd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 17:13:10 -0500 Subject: [PATCH 111/199] allow "recommended" flag to be optional in INITIAL_MODELS.yaml --- configs/INITIAL_MODELS.yaml | 4 ++-- scripts/configure_invokeai.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/configs/INITIAL_MODELS.yaml b/configs/INITIAL_MODELS.yaml index 2432e1f453c..f994db5c7b4 100644 --- a/configs/INITIAL_MODELS.yaml +++ b/configs/INITIAL_MODELS.yaml @@ -82,12 +82,12 @@ voxel_art-1.0: width: 512 height: 512 ft-mse-improved-autoencoder-840000: - description: StabilityAI improved autoencoder fine-tuned for human faces for legacy .ckpt models only (335 MB) + description: StabilityAI improved autoencoder fine-tuned for human faces. Use with legacy .ckpt models ONLY (335 MB) repo_id: stabilityai/sd-vae-ft-mse-original format: ckpt config: VAE/default file: vae-ft-mse-840000-ema-pruned.ckpt - recommended: True + recommended: False width: 512 height: 512 trinart_vae: diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 202c53e7926..4a91c13d286 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -169,7 +169,7 @@ def select_datasets(action:str): counter += 1 else: for ds in Datasets.keys(): - if Datasets[ds]['recommended']: + if Datasets[ds].get('recommended',False): datasets[ds]=counter counter += 1 @@ -193,7 +193,7 @@ def select_datasets(action:str): def recommended_datasets()->dict: datasets = dict() for ds in Datasets.keys(): - if Datasets[ds]['recommended']: + if Datasets[ds].get('recommended',False): datasets[ds]=True return datasets From d8897b7a65ddc15115aa7edb568c15acc2d517cb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 17:24:43 -0500 Subject: [PATCH 112/199] configure_invokeai now downloads VAE diffusers in advance --- scripts/configure_invokeai.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 4a91c13d286..d547a02aabc 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -343,6 +343,8 @@ def _download_repo_or_file(mconfig:DictConfig, access_token:str, precision:str=' path = _download_ckpt_weights(mconfig, access_token) else: path = _download_diffusion_weights(mconfig, access_token, precision=precision) + if 'vae' in mconfig and 'repo_id' in mconfig['vae']: + _download_diffusion_weights(mconfig['vae'], access_token, precision=precision) return path def _download_ckpt_weights(mconfig:DictConfig, access_token:str)->Path: @@ -358,7 +360,7 @@ def _download_ckpt_weights(mconfig:DictConfig, access_token:str)->Path: def _download_diffusion_weights(mconfig:DictConfig, access_token:str, precision:str='float32'): repo_id = mconfig['repo_id'] - model_class = StableDiffusionGeneratorPipeline if mconfig['format']=='diffusers' else AutoencoderKL + model_class = StableDiffusionGeneratorPipeline if mconfig.get('format',None)=='diffusers' else AutoencoderKL extra_arg_list = [{'revision':'fp16'},{}] if precision=='float16' else [{}] path = None for extra_args in extra_arg_list: From dfa5f56004518c3c661d6e89374474f3b787c2bf Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 17:32:23 -0500 Subject: [PATCH 113/199] rename ModelCache to ModelManager --- ldm/generate.py | 4 ++-- ldm/invoke/model_manager.py | 2 +- scripts/configure_invokeai.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index faa3689940c..70a887a693a 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -38,7 +38,7 @@ from ldm.invoke.generator.inpaint import infill_methods from ldm.invoke.globals import Globals from ldm.invoke.image_util import InitImageResizer -from ldm.invoke.model_manager import ModelCache +from ldm.invoke.model_manager import ModelManager from ldm.invoke.pngwriter import PngWriter from ldm.invoke.seamless import configure_model_padding from ldm.invoke.txt2mask import Txt2Mask @@ -210,7 +210,7 @@ def __init__( self.precision = choose_precision(self.device) # model caching system for fast switching - self.model_manager = ModelCache(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) + self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) self.model_name = model or self.model_manager.default_model() or FALLBACK_MODEL_NAME # for VRAM usage statistics diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 33cdca5e07e..d699468e55c 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -33,7 +33,7 @@ DEFAULT_MAX_MODELS=2 -class ModelCache(object): +class ModelManager(object): def __init__(self, config:OmegaConf, device_type:str, precision:str, max_loaded_models=DEFAULT_MAX_MODELS): ''' Initialize with the path to the models.yaml config file, diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index d547a02aabc..c3a583aff2c 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -40,10 +40,10 @@ transformers.logging.set_verbosity_error() try: - from ldm.invoke.model_manager import ModelCache + from ldm.invoke.model_manager import ModelManager except ImportError: sys.path.append('.') - from ldm.invoke.model_manager import ModelCache + from ldm.invoke.model_manager import ModelManager #--------------------------globals----------------------- Model_dir = 'models' From faa30e4a26eed4af1ac4703577d6bd0129481538 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 17:33:08 -0500 Subject: [PATCH 114/199] remove support for `repo_name` in models.yaml --- ldm/invoke/model_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index d699468e55c..a217f567f0e 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -424,8 +424,6 @@ def model_name_or_path(self, model_name:Union[str,DictConfig]) -> str | Path: return path elif 'repo_id' in mconfig: return mconfig['repo_id'] - elif 'repo_name' in mconfig: - return mconfig['repo_name'] else: raise ValueError("Model config must specify either repo_id or path.") From c32a7d2bee4a35856506c580fda579a137d1d0dd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 18:46:25 -0500 Subject: [PATCH 115/199] check for and refuse to load embeddings trained on incompatible models --- ldm/modules/textual_inversion_manager.py | 13 +++++++++---- scripts/invoke.py | 2 +- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/ldm/modules/textual_inversion_manager.py b/ldm/modules/textual_inversion_manager.py index 543fa52111e..f5eecaa949e 100644 --- a/ldm/modules/textual_inversion_manager.py +++ b/ldm/modules/textual_inversion_manager.py @@ -96,10 +96,13 @@ def _add_textual_inversion(self, trigger_str, embedding) -> int: )) return trigger_token_id - except ValueError: - traceback.print_exc() - print(f">> TextualInversionManager was unable to add a textual inversion with trigger string {trigger_str}.") - raise + except ValueError as e: + if str(e).startswith('Warning'): + print(f">> {str(e)}") + else: + traceback.print_exc() + print(f">> TextualInversionManager was unable to add a textual inversion with trigger string {trigger_str}.") + raise def has_textual_inversion_for_trigger_string(self, trigger_string: str) -> bool: @@ -156,6 +159,8 @@ def _get_or_create_token_id_and_assign_embedding(self, token_str: str, embedding token_id = self.tokenizer.convert_tokens_to_ids(token_str) if token_id == self.tokenizer.unk_token_id: raise RuntimeError(f"Unable to find token id for token '{token_str}'") + if self.text_encoder.get_input_embeddings().weight.data[token_id].shape != embedding.shape: + raise ValueError(f"Warning. Cannot load embedding for {token_str}. It was trained on a model with token dimension {embedding.shape[0]}, but the current model has token dimension {self.text_encoder.get_input_embeddings().weight.data[token_id].shape[0]}.") self.text_encoder.get_input_embeddings().weight.data[token_id] = embedding return token_id diff --git a/scripts/invoke.py b/scripts/invoke.py index d38bda4cb5b..cfe6b061144 100755 --- a/scripts/invoke.py +++ b/scripts/invoke.py @@ -2,7 +2,7 @@ import sys import os -if sys.platform == 'Darsin': +if sys.platform == 'darwin': os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1" import ldm.invoke.CLI From 0ee5fc3222a4281accdce24551a9ae0134456f96 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 2 Jan 2023 15:46:43 -0800 Subject: [PATCH 116/199] models.yaml.example: s/repo_name/repo_id and remove extra INITIAL_MODELS now that the main one has diffusers models in it. --- configs/INITIAL_MODELS.diffusers.yaml | 56 --------------------------- configs/models.yaml.example | 10 ++--- 2 files changed, 5 insertions(+), 61 deletions(-) delete mode 100644 configs/INITIAL_MODELS.diffusers.yaml diff --git a/configs/INITIAL_MODELS.diffusers.yaml b/configs/INITIAL_MODELS.diffusers.yaml deleted file mode 100644 index 73a8d77c578..00000000000 --- a/configs/INITIAL_MODELS.diffusers.yaml +++ /dev/null @@ -1,56 +0,0 @@ -stable-diffusion-1.5: - description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) - repo_id: runwayml/stable-diffusion-v1-5 - format: diffusers - recommended: true - width: 512 - height: 512 -inpainting-1.5: - description: RunwayML SD 1.5 model optimized for inpainting (4.27 GB) - repo_id: runwayml/stable-diffusion-inpainting - format: diffusers - recommended: True - width: 512 - height: 512 -ft-mse-improved-autoencoder-840000: - description: StabilityAI improved autoencoder fine-tuned for human faces (recommended; 335 MB) - repo_id: stabilityai/sd-vae-ft-mse - format: diffusers - recommended: True - width: 512 - height: 512 -stable-diffusion-1.4: - description: The original Stable Diffusion version 1.4 weight file (4.27 GB) - repo_id: CompVis/stable-diffusion-v1-4 - format: diffusers - recommended: False - width: 512 - height: 512 -waifu-diffusion-1.3: - description: Stable Diffusion 1.4 fine tuned on anime-styled images (4.27) - repo_id: hakurei/waifu-diffusion - format: diffusers - recommended: False - width: 512 - height: 512 -trinart-2.0: - description: An SD model finetuned with ~40,000 assorted high resolution manga/anime-style pictures (2.13 GB) - repo_id: naclbit/trinart_stable_diffusion_v2 - format: diffusers - recommended: False - width: 512 - height: 512 -papercut-1.0: - description: SD 1.5 fine-tuned for papercut art (use "PaperCut" in your prompts) (2.13 GB) - repo_id: Fictiverse/Stable_Diffusion_PaperCut_Model - format: diffusers - recommended: False - width: 512 - height: 512 -voxel_art-1.0: - description: Stable Diffusion trained on voxel art (use "VoxelArt" in your prompts) (4.27 GB) - repo_id: Fictiverse/Stable_Diffusion_VoxelArt_Model - format: diffusers - recommended: False - width: 512 - height: 512 diff --git a/configs/models.yaml.example b/configs/models.yaml.example index 5187d29b3ea..98f8f77e62c 100644 --- a/configs/models.yaml.example +++ b/configs/models.yaml.example @@ -8,22 +8,22 @@ diffusers-1.4: description: 🤗🧨 Stable Diffusion v1.4 format: diffusers - repo_name: CompVis/stable-diffusion-v1-4 + repo_id: CompVis/stable-diffusion-v1-4 diffusers-1.5: description: 🤗🧨 Stable Diffusion v1.5 format: diffusers - repo_name: runwayml/stable-diffusion-v1-5 + repo_id: runwayml/stable-diffusion-v1-5 default: true diffusers-1.5+mse: description: 🤗🧨 Stable Diffusion v1.5 + MSE-finetuned VAE format: diffusers - repo_name: runwayml/stable-diffusion-v1-5 + repo_id: runwayml/stable-diffusion-v1-5 vae: - repo_name: stabilityai/sd-vae-ft-mse + repo_id: stabilityai/sd-vae-ft-mse diffusers-inpainting-1.5: description: 🤗🧨 inpainting for Stable Diffusion v1.5 format: diffusers - repo_name: runwayml/stable-diffusion-inpainting + repo_id: runwayml/stable-diffusion-inpainting stable-diffusion-1.5: description: The newest Stable Diffusion version 1.5 weight file (4.27 GB) weights: models/ldm/stable-diffusion-v1/v1-5-pruned-emaonly.ckpt From 33d4ee6279107d26b428ec344f86d235a7b65e09 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 2 Jan 2023 19:39:05 -0500 Subject: [PATCH 117/199] add MVP textual inversion script --- .../requirements-base.txt | 1 + scripts/textual_inversion.py | 764 ++++++++++++++++++ 2 files changed, 765 insertions(+) create mode 100755 scripts/textual_inversion.py diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index 16a2c7ca002..35072d309e4 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -1,6 +1,7 @@ # pip will resolve the version which matches torch accelerate albumentations +datasets diffusers[torch]~=0.11 einops eventlet diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py new file mode 100755 index 00000000000..04e22aaf017 --- /dev/null +++ b/scripts/textual_inversion.py @@ -0,0 +1,764 @@ +#!/usr/bin/env python + +# This script was copied from +# https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py +# on January 2, 2023 +# and modified slightly by Lincoln Stein (@lstein) to work with InvokeAI + +import argparse +import logging +import math +import os +import random +from pathlib import Path +from typing import Optional + +import numpy as np +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch.utils.data import Dataset + +import datasets +import diffusers +import PIL +import transformers +from accelerate import Accelerator +from accelerate.logging import get_logger +from accelerate.utils import set_seed +from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel +from diffusers.optimization import get_scheduler +from diffusers.utils import check_min_version +from diffusers.utils.import_utils import is_xformers_available +from huggingface_hub import HfFolder, Repository, whoami + +# invokeai stuff +from ldm.invoke.globals import Globals +from omegaconf import OmegaConf + +# TODO: remove and import from diffusers.utils when the new version of diffusers is released +from packaging import version +from PIL import Image +from torchvision import transforms +from tqdm.auto import tqdm +from transformers import CLIPTextModel, CLIPTokenizer + +if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): + PIL_INTERPOLATION = { + "linear": PIL.Image.Resampling.BILINEAR, + "bilinear": PIL.Image.Resampling.BILINEAR, + "bicubic": PIL.Image.Resampling.BICUBIC, + "lanczos": PIL.Image.Resampling.LANCZOS, + "nearest": PIL.Image.Resampling.NEAREST, + } +else: + PIL_INTERPOLATION = { + "linear": PIL.Image.LINEAR, + "bilinear": PIL.Image.BILINEAR, + "bicubic": PIL.Image.BICUBIC, + "lanczos": PIL.Image.LANCZOS, + "nearest": PIL.Image.NEAREST, + } +# ------------------------------------------------------------------------------ + + +# Will error if the minimal version of diffusers is not installed. Remove at your own risks. +check_min_version("0.10.0.dev0") + + +logger = get_logger(__name__) + + +def save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path): + logger.info("Saving embeddings") + learned_embeds = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[placeholder_token_id] + learned_embeds_dict = {args.placeholder_token: learned_embeds.detach().cpu()} + torch.save(learned_embeds_dict, save_path) + + +def parse_args(): + parser = argparse.ArgumentParser(description="Simple example of a training script.") + parser.add_argument( + "--save_steps", + type=int, + default=500, + help="Save learned_embeds.bin every X updates steps.", + ) + parser.add_argument( + '--root_dir','--root', + type=str, + default=Globals.root, + help="Path to the invokeai runtime directory", + ) + parser.add_argument( + "--only_save_embeds", + action="store_true", + default=False, + help="Save only the embeddings for the new concept.", + ) + parser.add_argument( + "--model", + type=str, + default=None, + required=True, + help="Name of the diffusers model to train against, as defined in configs/models.yaml.", + ) + parser.add_argument( + "--revision", + type=str, + default=None, + required=False, + help="Revision of pretrained model identifier from huggingface.co/models.", + ) + parser.add_argument( + "--tokenizer_name", + type=str, + default=None, + help="Pretrained tokenizer name or path if not the same as model_name", + ) + parser.add_argument( + "--train_data_dir", type=str, default=None, required=True, help="A folder containing the training data." + ) + parser.add_argument( + "--placeholder_token", + type=str, + default=None, + required=True, + help="A token to use as a placeholder for the concept.", + ) + parser.add_argument( + "--initializer_token", type=str, default=None, required=True, help="A token to use as initializer word." + ) + parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'") + parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") + parser.add_argument( + "--output_dir", + type=str, + default=f'{Globals.root}/text-inversion-model', + help="The output directory where the model predictions and checkpoints will be written.", + ) + parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") + parser.add_argument( + "--resolution", + type=int, + default=512, + help=( + "The resolution for input images, all the images in the train/validation dataset will be resized to this" + " resolution" + ), + ) + parser.add_argument( + "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution" + ) + parser.add_argument( + "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader." + ) + parser.add_argument("--num_train_epochs", type=int, default=100) + parser.add_argument( + "--max_train_steps", + type=int, + default=5000, + help="Total number of training steps to perform. If provided, overrides num_train_epochs.", + ) + parser.add_argument( + "--gradient_accumulation_steps", + type=int, + default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.", + ) + parser.add_argument( + "--gradient_checkpointing", + action="store_true", + help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", + ) + parser.add_argument( + "--learning_rate", + type=float, + default=1e-4, + help="Initial learning rate (after the potential warmup period) to use.", + ) + parser.add_argument( + "--scale_lr", + action="store_true", + default=True, + help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", + ) + parser.add_argument( + "--lr_scheduler", + type=str, + default="constant", + help=( + 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' + ' "constant", "constant_with_warmup"]' + ), + ) + parser.add_argument( + "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." + ) + parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.") + parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.") + parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.") + parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer") + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") + parser.add_argument( + "--logging_dir", + type=str, + default="logs", + help=( + "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to" + " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***." + ), + ) + parser.add_argument( + "--mixed_precision", + type=str, + default="no", + choices=["no", "fp16", "bf16"], + help=( + "Whether to use mixed precision. Choose" + "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10." + "and an Nvidia Ampere GPU." + ), + ) + parser.add_argument( + "--allow_tf32", + action="store_true", + help=( + "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see" + " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" + ), + ) + parser.add_argument( + "--report_to", + type=str, + default="tensorboard", + help=( + 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`' + ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' + ), + ) + parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") + parser.add_argument( + "--checkpointing_steps", + type=int, + default=500, + help=( + "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming" + " training using `--resume_from_checkpoint`." + ), + ) + parser.add_argument( + "--resume_from_checkpoint", + type=str, + default=None, + help=( + "Whether training should be resumed from a previous checkpoint. Use a path saved by" + ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' + ), + ) + parser.add_argument( + "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." + ) + + args = parser.parse_args() + env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) + if env_local_rank != -1 and env_local_rank != args.local_rank: + args.local_rank = env_local_rank + + if args.train_data_dir is None: + raise ValueError("You must specify a train data directory.") + + return args + + +imagenet_templates_small = [ + "a photo of a {}", + "a rendering of a {}", + "a cropped photo of the {}", + "the photo of a {}", + "a photo of a clean {}", + "a photo of a dirty {}", + "a dark photo of the {}", + "a photo of my {}", + "a photo of the cool {}", + "a close-up photo of a {}", + "a bright photo of the {}", + "a cropped photo of a {}", + "a photo of the {}", + "a good photo of the {}", + "a photo of one {}", + "a close-up photo of the {}", + "a rendition of the {}", + "a photo of the clean {}", + "a rendition of a {}", + "a photo of a nice {}", + "a good photo of a {}", + "a photo of the nice {}", + "a photo of the small {}", + "a photo of the weird {}", + "a photo of the large {}", + "a photo of a cool {}", + "a photo of a small {}", +] + +imagenet_style_templates_small = [ + "a painting in the style of {}", + "a rendering in the style of {}", + "a cropped painting in the style of {}", + "the painting in the style of {}", + "a clean painting in the style of {}", + "a dirty painting in the style of {}", + "a dark painting in the style of {}", + "a picture in the style of {}", + "a cool painting in the style of {}", + "a close-up painting in the style of {}", + "a bright painting in the style of {}", + "a cropped painting in the style of {}", + "a good painting in the style of {}", + "a close-up painting in the style of {}", + "a rendition in the style of {}", + "a nice painting in the style of {}", + "a small painting in the style of {}", + "a weird painting in the style of {}", + "a large painting in the style of {}", +] + + +class TextualInversionDataset(Dataset): + def __init__( + self, + data_root, + tokenizer, + learnable_property="object", # [object, style] + size=512, + repeats=100, + interpolation="bicubic", + flip_p=0.5, + set="train", + placeholder_token="*", + center_crop=False, + ): + self.data_root = data_root + self.tokenizer = tokenizer + self.learnable_property = learnable_property + self.size = size + self.placeholder_token = placeholder_token + self.center_crop = center_crop + self.flip_p = flip_p + + self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)] + + self.num_images = len(self.image_paths) + self._length = self.num_images + + if set == "train": + self._length = self.num_images * repeats + + self.interpolation = { + "linear": PIL_INTERPOLATION["linear"], + "bilinear": PIL_INTERPOLATION["bilinear"], + "bicubic": PIL_INTERPOLATION["bicubic"], + "lanczos": PIL_INTERPOLATION["lanczos"], + }[interpolation] + + self.templates = imagenet_style_templates_small if learnable_property == "style" else imagenet_templates_small + self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p) + + def __len__(self): + return self._length + + def __getitem__(self, i): + example = {} + image = Image.open(self.image_paths[i % self.num_images]) + + if not image.mode == "RGB": + image = image.convert("RGB") + + placeholder_string = self.placeholder_token + text = random.choice(self.templates).format(placeholder_string) + + example["input_ids"] = self.tokenizer( + text, + padding="max_length", + truncation=True, + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ).input_ids[0] + + # default to score-sde preprocessing + img = np.array(image).astype(np.uint8) + + if self.center_crop: + crop = min(img.shape[0], img.shape[1]) + h, w, = ( + img.shape[0], + img.shape[1], + ) + img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] + + image = Image.fromarray(img) + image = image.resize((self.size, self.size), resample=self.interpolation) + + image = self.flip_transform(image) + image = np.array(image).astype(np.uint8) + image = (image / 127.5 - 1.0).astype(np.float32) + + example["pixel_values"] = torch.from_numpy(image).permute(2, 0, 1) + return example + + +def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): + if token is None: + token = HfFolder.get_token() + if organization is None: + username = whoami(token)["name"] + return f"{username}/{model_id}" + else: + return f"{organization}/{model_id}" + + +def main(): + args = parse_args() + + # setting up things the way invokeai expects them + if os.path.exists(args.root_dir): + Globals.root = args.root_dir + if not os.path.isabs(args.output_dir): + args.output_dir = os.path.join(Globals.root,args.output_dir) + + logging_dir = os.path.join(args.output_dir, args.logging_dir) + + accelerator = Accelerator( + gradient_accumulation_steps=args.gradient_accumulation_steps, + mixed_precision=args.mixed_precision, + log_with=args.report_to, + logging_dir=logging_dir, + ) + + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO, + ) + logger.info(accelerator.state, main_process_only=False) + if accelerator.is_local_main_process: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_warning() + diffusers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + diffusers.utils.logging.set_verbosity_error() + + # If passed along, set the training seed now. + if args.seed is not None: + set_seed(args.seed) + + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + + models_conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) + model_conf = models_conf.get(args.model,None) + assert model_conf is not None,f'Unknown model: {args.model}' + assert model_conf.get('format','diffusers')=='diffusers', "This script only works with models of type 'diffusers'" + pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) + assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {args.model}" + pipeline_args = dict() + if not isinstance(pretrained_model_name_or_path,Path): + pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path)) + + # Load tokenizer + if args.tokenizer_name: + tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) + else: + tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args) + + # Load scheduler and models + noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler", **pipeline_args) + text_encoder = CLIPTextModel.from_pretrained( + pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, **pipeline_args + ) + vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae", revision=args.revision, **pipeline_args) + unet = UNet2DConditionModel.from_pretrained( + pretrained_model_name_or_path, subfolder="unet", revision=args.revision, **pipeline_args + ) + + # Add the placeholder token in tokenizer + num_added_tokens = tokenizer.add_tokens(args.placeholder_token) + if num_added_tokens == 0: + raise ValueError( + f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different" + " `placeholder_token` that is not already in the tokenizer." + ) + + # Convert the initializer_token, placeholder_token to ids + token_ids = tokenizer.encode(args.initializer_token, add_special_tokens=False) + # Check if initializer_token is a single token or a sequence of tokens + if len(token_ids) > 1: + raise ValueError("The initializer token must be a single token.") + + initializer_token_id = token_ids[0] + placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token) + + # Resize the token embeddings as we are adding new special tokens to the tokenizer + text_encoder.resize_token_embeddings(len(tokenizer)) + + # Initialise the newly added placeholder token with the embeddings of the initializer token + token_embeds = text_encoder.get_input_embeddings().weight.data + token_embeds[placeholder_token_id] = token_embeds[initializer_token_id] + + # Freeze vae and unet + vae.requires_grad_(False) + unet.requires_grad_(False) + # Freeze all parameters except for the token embeddings in text encoder + text_encoder.text_model.encoder.requires_grad_(False) + text_encoder.text_model.final_layer_norm.requires_grad_(False) + text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) + + if args.gradient_checkpointing: + # Keep unet in train mode if we are using gradient checkpointing to save memory. + # The dropout cannot be != 0 so it doesn't matter if we are in eval or train mode. + unet.train() + text_encoder.gradient_checkpointing_enable() + unet.enable_gradient_checkpointing() + + if args.enable_xformers_memory_efficient_attention: + if is_xformers_available(): + unet.enable_xformers_memory_efficient_attention() + else: + raise ValueError("xformers is not available. Make sure it is installed correctly") + + # Enable TF32 for faster training on Ampere GPUs, + # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices + if args.allow_tf32: + torch.backends.cuda.matmul.allow_tf32 = True + + if args.scale_lr: + args.learning_rate = ( + args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes + ) + + # Initialize the optimizer + optimizer = torch.optim.AdamW( + text_encoder.get_input_embeddings().parameters(), # only optimize the embeddings + lr=args.learning_rate, + betas=(args.adam_beta1, args.adam_beta2), + weight_decay=args.adam_weight_decay, + eps=args.adam_epsilon, + ) + + # Dataset and DataLoaders creation: + train_dataset = TextualInversionDataset( + data_root=args.train_data_dir, + tokenizer=tokenizer, + size=args.resolution, + placeholder_token=args.placeholder_token, + repeats=args.repeats, + learnable_property=args.learnable_property, + center_crop=args.center_crop, + set="train", + ) + train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) + + # Scheduler and math around the number of training steps. + overrode_max_train_steps = False + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if args.max_train_steps is None: + args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + overrode_max_train_steps = True + + lr_scheduler = get_scheduler( + args.lr_scheduler, + optimizer=optimizer, + num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, + num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + ) + + # Prepare everything with our `accelerator`. + text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + text_encoder, optimizer, train_dataloader, lr_scheduler + ) + + # For mixed precision training we cast the text_encoder and vae weights to half-precision + # as these models are only used for inference, keeping weights in full precision is not required. + weight_dtype = torch.float32 + if accelerator.mixed_precision == "fp16": + weight_dtype = torch.float16 + elif accelerator.mixed_precision == "bf16": + weight_dtype = torch.bfloat16 + + # Move vae and unet to device and cast to weight_dtype + unet.to(accelerator.device, dtype=weight_dtype) + vae.to(accelerator.device, dtype=weight_dtype) + + # We need to recalculate our total training steps as the size of the training dataloader may have changed. + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if overrode_max_train_steps: + args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + # Afterwards we recalculate our number of training epochs + args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) + + # We need to initialize the trackers we use, and also store our configuration. + # The trackers initializes automatically on the main process. + if accelerator.is_main_process: + accelerator.init_trackers("textual_inversion", config=vars(args)) + + # Train! + total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps + + logger.info("***** Running training *****") + logger.info(f" Num examples = {len(train_dataset)}") + logger.info(f" Num Epochs = {args.num_train_epochs}") + logger.info(f" Instantaneous batch size per device = {args.train_batch_size}") + logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") + logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") + logger.info(f" Total optimization steps = {args.max_train_steps}") + global_step = 0 + first_epoch = 0 + + # Potentially load in the weights and states from a previous save + if args.resume_from_checkpoint: + if args.resume_from_checkpoint != "latest": + path = os.path.basename(args.resume_from_checkpoint) + else: + # Get the most recent checkpoint + dirs = os.listdir(args.output_dir) + dirs = [d for d in dirs if d.startswith("checkpoint")] + dirs = sorted(dirs, key=lambda x: int(x.split("-")[1])) + path = dirs[-1] + accelerator.print(f"Resuming from checkpoint {path}") + accelerator.load_state(os.path.join(args.output_dir, path)) + global_step = int(path.split("-")[1]) + + resume_global_step = global_step * args.gradient_accumulation_steps + first_epoch = resume_global_step // num_update_steps_per_epoch + resume_step = resume_global_step % num_update_steps_per_epoch + + # Only show the progress bar once on each machine. + progress_bar = tqdm(range(global_step, args.max_train_steps), disable=not accelerator.is_local_main_process) + progress_bar.set_description("Steps") + + # keep original embeddings as reference + orig_embeds_params = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight.data.clone() + + for epoch in range(first_epoch, args.num_train_epochs): + text_encoder.train() + for step, batch in enumerate(train_dataloader): + # Skip steps until we reach the resumed step + if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: + if step % args.gradient_accumulation_steps == 0: + progress_bar.update(1) + continue + + with accelerator.accumulate(text_encoder): + # Convert images to latent space + latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample().detach() + latents = latents * 0.18215 + + # Sample noise that we'll add to the latents + noise = torch.randn_like(latents) + bsz = latents.shape[0] + # Sample a random timestep for each image + timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device) + timesteps = timesteps.long() + + # Add noise to the latents according to the noise magnitude at each timestep + # (this is the forward diffusion process) + noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) + + # Get the text embedding for conditioning + encoder_hidden_states = text_encoder(batch["input_ids"])[0].to(dtype=weight_dtype) + + # Predict the noise residual + model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample + + # Get the target for loss depending on the prediction type + if noise_scheduler.config.prediction_type == "epsilon": + target = noise + elif noise_scheduler.config.prediction_type == "v_prediction": + target = noise_scheduler.get_velocity(latents, noise, timesteps) + else: + raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}") + + loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") + + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + # Let's make sure we don't update any embedding weights besides the newly added token + index_no_updates = torch.arange(len(tokenizer)) != placeholder_token_id + with torch.no_grad(): + accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[ + index_no_updates + ] = orig_embeds_params[index_no_updates] + + # Checks if the accelerator has performed an optimization step behind the scenes + if accelerator.sync_gradients: + progress_bar.update(1) + global_step += 1 + if global_step % args.save_steps == 0: + save_path = os.path.join(args.output_dir, f"learned_embeds-steps-{global_step}.bin") + save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) + + if global_step % args.checkpointing_steps == 0: + if accelerator.is_main_process: + save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") + accelerator.save_state(save_path) + logger.info(f"Saved state to {save_path}") + + logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} + progress_bar.set_postfix(**logs) + accelerator.log(logs, step=global_step) + + if global_step >= args.max_train_steps: + break + + # Create the pipeline using using the trained modules and save it. + accelerator.wait_for_everyone() + if accelerator.is_main_process: + if args.push_to_hub and args.only_save_embeds: + logger.warn("Enabling full model saving because --push_to_hub=True was specified.") + save_full_model = True + else: + save_full_model = not args.only_save_embeds + if save_full_model: + pipeline = StableDiffusionPipeline.from_pretrained( + pretrained_model_name_or_path, + text_encoder=accelerator.unwrap_model(text_encoder), + vae=vae, + unet=unet, + tokenizer=tokenizer, + **pipeline_args, + ) + pipeline.save_pretrained(args.output_dir) + # Save the newly trained embeddings + save_path = os.path.join(args.output_dir, "learned_embeds.bin") + save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) + + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + + accelerator.end_training() + + +if __name__ == "__main__": + main() From 088e5462db3372e8b38554b1ad02f4fb4ca23eee Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 2 Jan 2023 16:14:42 -0800 Subject: [PATCH 118/199] refactor(InvokeAIDiffuserComponent): factor out _combine() --- .../diffusion/shared_invokeai_diffusion.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index bf1ace72e18..282dedefc04 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -1,11 +1,11 @@ -import traceback from math import ceil from typing import Callable, Optional, Union import torch from ldm.models.diffusion.cross_attention_control import Arguments, \ - remove_cross_attention_control, setup_cross_attention_control, Context, get_cross_attention_modules, CrossAttentionType + remove_cross_attention_control, setup_cross_attention_control, Context, get_cross_attention_modules, \ + CrossAttentionType from ldm.models.diffusion.cross_attention_map_saving import AttentionMapSaver @@ -107,12 +107,7 @@ def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor, else: unconditioned_next_x, conditioned_next_x = self.apply_standard_conditioning(x, sigma, unconditioning, conditioning) - # to scale how much effect conditioning has, calculate the changes it does and then scale that - scaled_delta = (conditioned_next_x - unconditioned_next_x) * unconditional_guidance_scale - combined_next_x = unconditioned_next_x + scaled_delta - - return combined_next_x - + return self._combine(unconditioned_next_x, conditioned_next_x, unconditional_guidance_scale) # methods below are called from do_diffusion_step and should be considered private to this class. @@ -183,6 +178,12 @@ def apply_cross_attention_controlled_conditioning(self, x:torch.Tensor, sigma, u return unconditioned_next_x, conditioned_next_x + def _combine(self, unconditioned_next_x, conditioned_next_x, guidance_scale): + # to scale how much effect conditioning has, calculate the changes it does and then scale that + scaled_delta = (conditioned_next_x - unconditioned_next_x) * guidance_scale + combined_next_x = unconditioned_next_x + scaled_delta + return combined_next_x + def estimate_percent_through(self, step_index, sigma): if step_index is not None and self.cross_attention_control_context is not None: # percent_through will never reach 1.0 (but this is intended) From 430f044f6f6050bf4fbfe2581f218bfba4282f61 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 2 Jan 2023 18:38:21 -0800 Subject: [PATCH 119/199] InvokeAIDiffuserComponent: implement threshold --- ldm/invoke/generator/diffusers_pipeline.py | 7 ++- ldm/invoke/generator/img2img.py | 8 +++- ldm/invoke/generator/txt2img.py | 11 +++-- ldm/invoke/generator/txt2img2img.py | 12 +++-- .../diffusion/shared_invokeai_diffusion.py | 48 ++++++++++++++++++- 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/ldm/invoke/generator/diffusers_pipeline.py b/ldm/invoke/generator/diffusers_pipeline.py index 0e8d82f935c..b4a0703f18e 100644 --- a/ldm/invoke/generator/diffusers_pipeline.py +++ b/ldm/invoke/generator/diffusers_pipeline.py @@ -39,7 +39,7 @@ from torchvision.transforms.functional import resize as tv_resize from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer -from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent +from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent, ThresholdSettings from ldm.modules.textual_inversion_manager import TextualInversionManager @@ -205,6 +205,7 @@ class ConditioningData: extra: Optional[InvokeAIDiffuserComponent.ExtraConditioningInfo] = None scheduler_args: dict[str, Any] = field(default_factory=dict) """Additional arguments to pass to scheduler.step.""" + threshold: Optional[ThresholdSettings] = None @property def dtype(self): @@ -425,7 +426,9 @@ def step(self, t: torch.Tensor, latents: torch.Tensor, latent_model_input, t, conditioning_data.unconditioned_embeddings, conditioning_data.text_embeddings, conditioning_data.guidance_scale, - step_index=step_index) + step_index=step_index, + threshold=conditioning_data.threshold + ) # compute the previous noisy sample x_t -> x_t-1 step_output = self.scheduler.step(noise_pred, timestep, latents, diff --git a/ldm/invoke/generator/img2img.py b/ldm/invoke/generator/img2img.py index 0c51648bb55..fedf6d3abc9 100644 --- a/ldm/invoke/generator/img2img.py +++ b/ldm/invoke/generator/img2img.py @@ -7,6 +7,7 @@ from ldm.invoke.generator.base import Generator from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData +from ldm.models.diffusion.shared_invokeai_diffusion import ThresholdSettings class Img2Img(Generator): @@ -29,8 +30,11 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, pipeline.scheduler = sampler uc, c, extra_conditioning_info = conditioning - conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) - .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + conditioning_data = ( + ConditioningData( + uc, c, cfg_scale, extra_conditioning_info, + threshold = ThresholdSettings(threshold, warmup=0.2) if threshold else None) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) def make_image(x_T): diff --git a/ldm/invoke/generator/txt2img.py b/ldm/invoke/generator/txt2img.py index e8e3a90d195..174c1e469dc 100644 --- a/ldm/invoke/generator/txt2img.py +++ b/ldm/invoke/generator/txt2img.py @@ -6,6 +6,7 @@ from .base import Generator from .diffusers_pipeline import StableDiffusionGeneratorPipeline, ConditioningData +from ...models.diffusion.shared_invokeai_diffusion import ThresholdSettings class Txt2Img(Generator): @@ -29,8 +30,11 @@ def get_make_image(self,prompt,sampler,steps,cfg_scale,ddim_eta, pipeline.scheduler = sampler uc, c, extra_conditioning_info = conditioning - conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) - .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + conditioning_data = ( + ConditioningData( + uc, c, cfg_scale, extra_conditioning_info, + threshold = ThresholdSettings(threshold, warmup=0.2) if threshold else None) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) def make_image(x_T) -> PIL.Image.Image: @@ -39,8 +43,7 @@ def make_image(x_T) -> PIL.Image.Image: noise=x_T, num_inference_steps=steps, conditioning_data=conditioning_data, - callback=step_callback - # TODO: threshold = threshold, + callback=step_callback, ) if pipeline_output.attention_map_saver is not None and attention_maps_callback is not None: attention_maps_callback(pipeline_output.attention_map_saver) diff --git a/ldm/invoke/generator/txt2img2img.py b/ldm/invoke/generator/txt2img2img.py index 2c71194532d..e356f719c48 100644 --- a/ldm/invoke/generator/txt2img2img.py +++ b/ldm/invoke/generator/txt2img2img.py @@ -10,6 +10,7 @@ from ldm.invoke.generator.base import Generator from ldm.invoke.generator.diffusers_pipeline import trim_to_multiple_of, StableDiffusionGeneratorPipeline, \ ConditioningData +from ldm.models.diffusion.shared_invokeai_diffusion import ThresholdSettings class Txt2Img2Img(Generator): @@ -19,20 +20,23 @@ def __init__(self, model, precision): def get_make_image(self, prompt:str, sampler, steps:int, cfg_scale:float, ddim_eta, conditioning, width:int, height:int, strength:float, - step_callback:Optional[Callable]=None, **kwargs): + step_callback:Optional[Callable]=None, threshold=0.0, **kwargs): """ Returns a function returning an image derived from the prompt and the initial image Return value depends on the seed at the time you call it kwargs are 'width' and 'height' """ - uc, c, extra_conditioning_info = conditioning # noinspection PyTypeChecker pipeline: StableDiffusionGeneratorPipeline = self.model pipeline.scheduler = sampler - conditioning_data = (ConditioningData(uc, c, cfg_scale, extra_conditioning_info) - .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) + uc, c, extra_conditioning_info = conditioning + conditioning_data = ( + ConditioningData( + uc, c, cfg_scale, extra_conditioning_info, + threshold = ThresholdSettings(threshold, warmup=0.2) if threshold else None) + .add_scheduler_args_if_applicable(pipeline.scheduler, eta=ddim_eta)) scale_dim = min(width, height) scale = 512 / scale_dim diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index 282dedefc04..52fe9e8ce61 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -1,6 +1,9 @@ +import math +from dataclasses import dataclass from math import ceil from typing import Callable, Optional, Union +import numpy as np import torch from ldm.models.diffusion.cross_attention_control import Arguments, \ @@ -9,6 +12,12 @@ from ldm.models.diffusion.cross_attention_map_saving import AttentionMapSaver +@dataclass(frozen=True) +class ThresholdSettings: + threshold: float + warmup: float + + class InvokeAIDiffuserComponent: ''' The aim of this component is to provide a single place for code that can be applied identically to @@ -18,6 +27,7 @@ class InvokeAIDiffuserComponent: * Cross attention control ("prompt2prompt") * Hybrid conditioning (used for inpainting) ''' + debug_thresholding = True class ExtraConditioningInfo: @@ -78,7 +88,8 @@ def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor, unconditioning: Union[torch.Tensor,dict], conditioning: Union[torch.Tensor,dict], unconditional_guidance_scale: float, - step_index: Optional[int]=None + step_index: Optional[int]=None, + threshold: Optional[ThresholdSettings]=None, ): """ :param x: current latents @@ -87,6 +98,7 @@ def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor, :param conditioning: embeddings for conditioned output. for hybrid conditioning this is a dict of tensors [B x 77 x 768], otherwise a single tensor [B x 77 x 768] :param unconditional_guidance_scale: aka CFG scale, controls how much effect the conditioning tensor has :param step_index: counts upwards from 0 to (step_count-1) (as passed to setup_cross_attention_control, if using). May be called multiple times for a single step, therefore do not assume that its value will monotically increase. If None, will be estimated by comparing sigma against self.model.sigmas . + :param threshold: threshold to apply after each step :return: the new latents after applying the model to x using unscaled unconditioning and CFG-scaled conditioning. """ @@ -107,7 +119,12 @@ def do_diffusion_step(self, x: torch.Tensor, sigma: torch.Tensor, else: unconditioned_next_x, conditioned_next_x = self.apply_standard_conditioning(x, sigma, unconditioning, conditioning) - return self._combine(unconditioned_next_x, conditioned_next_x, unconditional_guidance_scale) + combined_next_x = self._combine(unconditioned_next_x, conditioned_next_x, unconditional_guidance_scale) + + if threshold: + combined_next_x = self._threshold(threshold.threshold, threshold.warmup, combined_next_x, sigma) + + return combined_next_x # methods below are called from do_diffusion_step and should be considered private to this class. @@ -184,6 +201,33 @@ def _combine(self, unconditioned_next_x, conditioned_next_x, guidance_scale): combined_next_x = unconditioned_next_x + scaled_delta return combined_next_x + def _threshold(self, threshold, warmup, latents: torch.Tensor, sigma) -> torch.Tensor: + warmup_scale = (1 - sigma.item() / 1000) / warmup if warmup else math.inf + if warmup_scale < 1: + # This arithmetic based on https://github.com/invoke-ai/InvokeAI/pull/395 + warming_threshold = 1 + (threshold - 1) * warmup_scale + current_threshold = np.clip(warming_threshold, 1, threshold) + else: + current_threshold = threshold + + if current_threshold <= 0: + return latents + maxval = latents.max().item() + minval = latents.min().item() + + scale = 0.7 # default value from #395 + + if maxval < current_threshold and minval > -current_threshold: + return latents + + if maxval > current_threshold: + maxval = np.clip(maxval * scale, 1, current_threshold) + + if minval < -current_threshold: + minval = np.clip(minval * scale, -current_threshold, -1) + + return latents.clamp(minval, maxval) + def estimate_percent_through(self, step_index, sigma): if step_index is not None and self.cross_attention_control_context is not None: # percent_through will never reach 1.0 (but this is intended) From 9737077681f13ecb7c37165ee7c34118c16a022a Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Mon, 2 Jan 2023 23:52:15 -0800 Subject: [PATCH 120/199] InvokeAIDiffuserComponent: diagnostic logs for threshold ...this does not look right --- ldm/models/diffusion/shared_invokeai_diffusion.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ldm/models/diffusion/shared_invokeai_diffusion.py b/ldm/models/diffusion/shared_invokeai_diffusion.py index 52fe9e8ce61..06305463941 100644 --- a/ldm/models/diffusion/shared_invokeai_diffusion.py +++ b/ldm/models/diffusion/shared_invokeai_diffusion.py @@ -27,7 +27,7 @@ class InvokeAIDiffuserComponent: * Cross attention control ("prompt2prompt") * Hybrid conditioning (used for inpainting) ''' - debug_thresholding = True + debug_thresholding = False class ExtraConditioningInfo: @@ -217,6 +217,13 @@ def _threshold(self, threshold, warmup, latents: torch.Tensor, sigma) -> torch.T scale = 0.7 # default value from #395 + if self.debug_thresholding: + std, mean = [i.item() for i in torch.std_mean(latents)] + outside = torch.count_nonzero((latents < -current_threshold) | (latents > current_threshold)) + print(f"\nThreshold: 𝜎={sigma.item()} threshold={current_threshold:.3f} (of {threshold:.3f})\n" + f" | min, mean, max = {minval:.3f}, {mean:.3f}, {maxval:.3f}\tstd={std}\n" + f" | {outside / latents.numel() * 100:.2f}% values outside threshold") + if maxval < current_threshold and minval > -current_threshold: return latents @@ -226,6 +233,11 @@ def _threshold(self, threshold, warmup, latents: torch.Tensor, sigma) -> torch.T if minval < -current_threshold: minval = np.clip(minval * scale, -current_threshold, -1) + if self.debug_thresholding: + outside = torch.count_nonzero((latents < minval) | (latents > maxval)) + print(f" | min, , max = {minval:.3f}, , {maxval:.3f}\t(scaled by {scale})\n" + f" | {outside / latents.numel() * 100:.2f}% values will be clamped") + return latents.clamp(minval, maxval) def estimate_percent_through(self, step_index, sigma): From 0f7d8fd434e5517f4c4f60a94b3bcddc7ba16ddd Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 3 Jan 2023 18:52:57 -0500 Subject: [PATCH 121/199] add a curses-based frontend to textual inversion - not quite working yet - requires npyscreen installed - on windows will also have the windows-curses requirement, but not added to requirements yet --- .../requirements-base.txt | 1 + ldm/invoke/textual_inversion_training.py | 768 ++++++++++++++++++ scripts/textual_inversion.py | 763 +---------------- scripts/textual_inversion_fe.py | 186 +++++ 4 files changed, 959 insertions(+), 759 deletions(-) create mode 100644 ldm/invoke/textual_inversion_training.py create mode 100755 scripts/textual_inversion_fe.py diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index 35072d309e4..c0d89767ab1 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -16,6 +16,7 @@ huggingface-hub imageio imageio-ffmpeg kornia +npyscreen numpy==1.23.* omegaconf opencv-python diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py new file mode 100644 index 00000000000..75369d96133 --- /dev/null +++ b/ldm/invoke/textual_inversion_training.py @@ -0,0 +1,768 @@ +# This code was copied from +# https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py +# on January 2, 2023 +# and modified slightly by Lincoln Stein (@lstein) to work with InvokeAI + +import argparse +from argparse import Namespace +import logging +import math +import os +import random +from pathlib import Path +from typing import Optional + +import numpy as np +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch.utils.data import Dataset + +import datasets +import diffusers +import PIL +import transformers +from accelerate import Accelerator +from accelerate.logging import get_logger +from accelerate.utils import set_seed +from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel +from diffusers.optimization import get_scheduler +from diffusers.utils import check_min_version +from diffusers.utils.import_utils import is_xformers_available +from huggingface_hub import HfFolder, Repository, whoami + +# invokeai stuff +from ldm.invoke.globals import Globals +from omegaconf import OmegaConf + +# TODO: remove and import from diffusers.utils when the new version of diffusers is released +from packaging import version +from PIL import Image +from torchvision import transforms +from tqdm.auto import tqdm +from transformers import CLIPTextModel, CLIPTokenizer + +if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): + PIL_INTERPOLATION = { + "linear": PIL.Image.Resampling.BILINEAR, + "bilinear": PIL.Image.Resampling.BILINEAR, + "bicubic": PIL.Image.Resampling.BICUBIC, + "lanczos": PIL.Image.Resampling.LANCZOS, + "nearest": PIL.Image.Resampling.NEAREST, + } +else: + PIL_INTERPOLATION = { + "linear": PIL.Image.LINEAR, + "bilinear": PIL.Image.BILINEAR, + "bicubic": PIL.Image.BICUBIC, + "lanczos": PIL.Image.LANCZOS, + "nearest": PIL.Image.NEAREST, + } +# ------------------------------------------------------------------------------ + + +# Will error if the minimal version of diffusers is not installed. Remove at your own risks. +check_min_version("0.10.0.dev0") + + +logger = get_logger(__name__) + + +def save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path): + logger.info("Saving embeddings") + learned_embeds = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[placeholder_token_id] + learned_embeds_dict = {args.placeholder_token: learned_embeds.detach().cpu()} + torch.save(learned_embeds_dict, save_path) + +def parse_args(): + parser = argparse.ArgumentParser(description="Simple example of a training script.") + parser.add_argument( + "--save_steps", + type=int, + default=500, + help="Save learned_embeds.bin every X updates steps.", + ) + parser.add_argument( + '--root_dir','--root', + type=str, + default=Globals.root, + help="Path to the invokeai runtime directory", + ) + parser.add_argument( + "--only_save_embeds", + action="store_true", + default=False, + help="Save only the embeddings for the new concept.", + ) + parser.add_argument( + "--model", + type=str, + default=None, + required=False, + help="Name of the diffusers model to train against, as defined in configs/models.yaml.", + ) + parser.add_argument( + "--revision", + type=str, + default=None, + required=False, + help="Revision of pretrained model identifier from huggingface.co/models.", + ) + parser.add_argument( + "--tokenizer_name", + type=str, + default=None, + help="Pretrained tokenizer name or path if not the same as model_name", + ) + parser.add_argument( + "--train_data_dir", type=str, default=None, required=False, help="A folder containing the training data." + ) + parser.add_argument( + "--placeholder_token", + type=str, + default=None, + required=False, + help="A token to use as a placeholder for the concept.", + ) + parser.add_argument( + "--initializer_token", type=str, default=None, required=False, help="A token to use as initializer word." + ) + parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'") + parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") + parser.add_argument( + "--output_dir", + type=str, + default=f'{Globals.root}/text-inversion-model', + help="The output directory where the model predictions and checkpoints will be written.", + ) + parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") + parser.add_argument( + "--resolution", + type=int, + default=512, + help=( + "The resolution for input images, all the images in the train/validation dataset will be resized to this" + " resolution" + ), + ) + parser.add_argument( + "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution" + ) + parser.add_argument( + "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader." + ) + parser.add_argument("--num_train_epochs", type=int, default=100) + parser.add_argument( + "--max_train_steps", + type=int, + default=5000, + help="Total number of training steps to perform. If provided, overrides num_train_epochs.", + ) + parser.add_argument( + "--gradient_accumulation_steps", + type=int, + default=1, + help="Number of updates steps to accumulate before performing a backward/update pass.", + ) + parser.add_argument( + "--gradient_checkpointing", + action="store_true", + help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", + ) + parser.add_argument( + "--learning_rate", + type=float, + default=1e-4, + help="Initial learning rate (after the potential warmup period) to use.", + ) + parser.add_argument( + "--scale_lr", + action="store_true", + default=True, + help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", + ) + parser.add_argument( + "--lr_scheduler", + type=str, + default="constant", + help=( + 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' + ' "constant", "constant_with_warmup"]' + ), + ) + parser.add_argument( + "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." + ) + parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.") + parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.") + parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.") + parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer") + parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") + parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") + parser.add_argument( + "--logging_dir", + type=str, + default="logs", + help=( + "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to" + " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***." + ), + ) + parser.add_argument( + "--mixed_precision", + type=str, + default="no", + choices=["no", "fp16", "bf16"], + help=( + "Whether to use mixed precision. Choose" + "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10." + "and an Nvidia Ampere GPU." + ), + ) + parser.add_argument( + "--allow_tf32", + action="store_true", + help=( + "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see" + " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" + ), + ) + parser.add_argument( + "--report_to", + type=str, + default="tensorboard", + help=( + 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`' + ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' + ), + ) + parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") + parser.add_argument( + "--checkpointing_steps", + type=int, + default=500, + help=( + "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming" + " training using `--resume_from_checkpoint`." + ), + ) + parser.add_argument( + "--resume_from_checkpoint", + type=str, + default=None, + help=( + "Whether training should be resumed from a previous checkpoint. Use a path saved by" + ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' + ), + ) + parser.add_argument( + "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." + ) + + args = parser.parse_args() + env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) + if env_local_rank != -1 and env_local_rank != args.local_rank: + args.local_rank = env_local_rank + return args + + +imagenet_templates_small = [ + "a photo of a {}", + "a rendering of a {}", + "a cropped photo of the {}", + "the photo of a {}", + "a photo of a clean {}", + "a photo of a dirty {}", + "a dark photo of the {}", + "a photo of my {}", + "a photo of the cool {}", + "a close-up photo of a {}", + "a bright photo of the {}", + "a cropped photo of a {}", + "a photo of the {}", + "a good photo of the {}", + "a photo of one {}", + "a close-up photo of the {}", + "a rendition of the {}", + "a photo of the clean {}", + "a rendition of a {}", + "a photo of a nice {}", + "a good photo of a {}", + "a photo of the nice {}", + "a photo of the small {}", + "a photo of the weird {}", + "a photo of the large {}", + "a photo of a cool {}", + "a photo of a small {}", +] + +imagenet_style_templates_small = [ + "a painting in the style of {}", + "a rendering in the style of {}", + "a cropped painting in the style of {}", + "the painting in the style of {}", + "a clean painting in the style of {}", + "a dirty painting in the style of {}", + "a dark painting in the style of {}", + "a picture in the style of {}", + "a cool painting in the style of {}", + "a close-up painting in the style of {}", + "a bright painting in the style of {}", + "a cropped painting in the style of {}", + "a good painting in the style of {}", + "a close-up painting in the style of {}", + "a rendition in the style of {}", + "a nice painting in the style of {}", + "a small painting in the style of {}", + "a weird painting in the style of {}", + "a large painting in the style of {}", +] + + +class TextualInversionDataset(Dataset): + def __init__( + self, + data_root, + tokenizer, + learnable_property="object", # [object, style] + size=512, + repeats=100, + interpolation="bicubic", + flip_p=0.5, + set="train", + placeholder_token="*", + center_crop=False, + ): + self.data_root = data_root + self.tokenizer = tokenizer + self.learnable_property = learnable_property + self.size = size + self.placeholder_token = placeholder_token + self.center_crop = center_crop + self.flip_p = flip_p + + self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)] + + self.num_images = len(self.image_paths) + self._length = self.num_images + + if set == "train": + self._length = self.num_images * repeats + + self.interpolation = { + "linear": PIL_INTERPOLATION["linear"], + "bilinear": PIL_INTERPOLATION["bilinear"], + "bicubic": PIL_INTERPOLATION["bicubic"], + "lanczos": PIL_INTERPOLATION["lanczos"], + }[interpolation] + + self.templates = imagenet_style_templates_small if learnable_property == "style" else imagenet_templates_small + self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p) + + def __len__(self): + return self._length + + def __getitem__(self, i): + example = {} + image = Image.open(self.image_paths[i % self.num_images]) + + if not image.mode == "RGB": + image = image.convert("RGB") + + placeholder_string = self.placeholder_token + text = random.choice(self.templates).format(placeholder_string) + + example["input_ids"] = self.tokenizer( + text, + padding="max_length", + truncation=True, + max_length=self.tokenizer.model_max_length, + return_tensors="pt", + ).input_ids[0] + + # default to score-sde preprocessing + img = np.array(image).astype(np.uint8) + + if self.center_crop: + crop = min(img.shape[0], img.shape[1]) + h, w, = ( + img.shape[0], + img.shape[1], + ) + img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] + + image = Image.fromarray(img) + image = image.resize((self.size, self.size), resample=self.interpolation) + + image = self.flip_transform(image) + image = np.array(image).astype(np.uint8) + image = (image / 127.5 - 1.0).astype(np.float32) + + example["pixel_values"] = torch.from_numpy(image).permute(2, 0, 1) + return example + + +def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): + if token is None: + token = HfFolder.get_token() + if organization is None: + username = whoami(token)["name"] + return f"{username}/{model_id}" + else: + return f"{organization}/{model_id}" + + +def do_textual_inversion_training(args:Namespace): + + # Out of laziness, we are getting our parameterse from an argparse Namespace + # which is a legacy of the original script. This needs to be changed. + + if args.train_data_dir is None: + raise ValueError("You must specify a training data directory with --train_data_dir.") + + if args.placeholder_token is None: + raise ValueError("You must specify a placeholder token with --placeholder_token.") + + if args.initializer__token is None: + raise ValueError("You must specify an initializer token with --initializer_token.") + + if args.model is None: + raise ValueError("You must specify a model name with --model.") + + # setting up things the way invokeai expects them + if os.path.exists(args.root_dir): + Globals.root = args.root_dir + if not os.path.isabs(args.output_dir): + args.output_dir = os.path.join(Globals.root,args.output_dir) + + logging_dir = os.path.join(args.output_dir, args.logging_dir) + + accelerator = Accelerator( + gradient_accumulation_steps=args.gradient_accumulation_steps, + mixed_precision=args.mixed_precision, + log_with=args.report_to, + logging_dir=logging_dir, + ) + + # Make one log on every process with the configuration for debugging. + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + level=logging.INFO, + ) + logger.info(accelerator.state, main_process_only=False) + if accelerator.is_local_main_process: + datasets.utils.logging.set_verbosity_warning() + transformers.utils.logging.set_verbosity_warning() + diffusers.utils.logging.set_verbosity_info() + else: + datasets.utils.logging.set_verbosity_error() + transformers.utils.logging.set_verbosity_error() + diffusers.utils.logging.set_verbosity_error() + + # If passed along, set the training seed now. + if args.seed is not None: + set_seed(args.seed) + + # Handle the repository creation + if accelerator.is_main_process: + if args.push_to_hub: + if args.hub_model_id is None: + repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + else: + repo_name = args.hub_model_id + repo = Repository(args.output_dir, clone_from=repo_name) + + with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + if "step_*" not in gitignore: + gitignore.write("step_*\n") + if "epoch_*" not in gitignore: + gitignore.write("epoch_*\n") + elif args.output_dir is not None: + os.makedirs(args.output_dir, exist_ok=True) + + models_conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) + model_conf = models_conf.get(args.model,None) + assert model_conf is not None,f'Unknown model: {args.model}' + assert model_conf.get('format','diffusers')=='diffusers', "This script only works with models of type 'diffusers'" + pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) + assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {args.model}" + pipeline_args = dict() + if not isinstance(pretrained_model_name_or_path,Path): + pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path)) + + # Load tokenizer + if args.tokenizer_name: + tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) + else: + tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args) + + # Load scheduler and models + noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler", **pipeline_args) + text_encoder = CLIPTextModel.from_pretrained( + pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, **pipeline_args + ) + vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae", revision=args.revision, **pipeline_args) + unet = UNet2DConditionModel.from_pretrained( + pretrained_model_name_or_path, subfolder="unet", revision=args.revision, **pipeline_args + ) + + # Add the placeholder token in tokenizer + num_added_tokens = tokenizer.add_tokens(args.placeholder_token) + if num_added_tokens == 0: + raise ValueError( + f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different" + " `placeholder_token` that is not already in the tokenizer." + ) + + # Convert the initializer_token, placeholder_token to ids + token_ids = tokenizer.encode(args.initializer_token, add_special_tokens=False) + # Check if initializer_token is a single token or a sequence of tokens + if len(token_ids) > 1: + raise ValueError("The initializer token must be a single token.") + + initializer_token_id = token_ids[0] + placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token) + + # Resize the token embeddings as we are adding new special tokens to the tokenizer + text_encoder.resize_token_embeddings(len(tokenizer)) + + # Initialise the newly added placeholder token with the embeddings of the initializer token + token_embeds = text_encoder.get_input_embeddings().weight.data + token_embeds[placeholder_token_id] = token_embeds[initializer_token_id] + + # Freeze vae and unet + vae.requires_grad_(False) + unet.requires_grad_(False) + # Freeze all parameters except for the token embeddings in text encoder + text_encoder.text_model.encoder.requires_grad_(False) + text_encoder.text_model.final_layer_norm.requires_grad_(False) + text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) + + if args.gradient_checkpointing: + # Keep unet in train mode if we are using gradient checkpointing to save memory. + # The dropout cannot be != 0 so it doesn't matter if we are in eval or train mode. + unet.train() + text_encoder.gradient_checkpointing_enable() + unet.enable_gradient_checkpointing() + + if args.enable_xformers_memory_efficient_attention: + if is_xformers_available(): + unet.enable_xformers_memory_efficient_attention() + else: + raise ValueError("xformers is not available. Make sure it is installed correctly") + + # Enable TF32 for faster training on Ampere GPUs, + # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices + if args.allow_tf32: + torch.backends.cuda.matmul.allow_tf32 = True + + if args.scale_lr: + args.learning_rate = ( + args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes + ) + + # Initialize the optimizer + optimizer = torch.optim.AdamW( + text_encoder.get_input_embeddings().parameters(), # only optimize the embeddings + lr=args.learning_rate, + betas=(args.adam_beta1, args.adam_beta2), + weight_decay=args.adam_weight_decay, + eps=args.adam_epsilon, + ) + + # Dataset and DataLoaders creation: + train_dataset = TextualInversionDataset( + data_root=args.train_data_dir, + tokenizer=tokenizer, + size=args.resolution, + placeholder_token=args.placeholder_token, + repeats=args.repeats, + learnable_property=args.learnable_property, + center_crop=args.center_crop, + set="train", + ) + train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) + + # Scheduler and math around the number of training steps. + overrode_max_train_steps = False + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if args.max_train_steps is None: + args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + overrode_max_train_steps = True + + lr_scheduler = get_scheduler( + args.lr_scheduler, + optimizer=optimizer, + num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, + num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + ) + + # Prepare everything with our `accelerator`. + text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + text_encoder, optimizer, train_dataloader, lr_scheduler + ) + + # For mixed precision training we cast the text_encoder and vae weights to half-precision + # as these models are only used for inference, keeping weights in full precision is not required. + weight_dtype = torch.float32 + if accelerator.mixed_precision == "fp16": + weight_dtype = torch.float16 + elif accelerator.mixed_precision == "bf16": + weight_dtype = torch.bfloat16 + + # Move vae and unet to device and cast to weight_dtype + unet.to(accelerator.device, dtype=weight_dtype) + vae.to(accelerator.device, dtype=weight_dtype) + + # We need to recalculate our total training steps as the size of the training dataloader may have changed. + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + if overrode_max_train_steps: + args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + # Afterwards we recalculate our number of training epochs + args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) + + # We need to initialize the trackers we use, and also store our configuration. + # The trackers initializes automatically on the main process. + if accelerator.is_main_process: + accelerator.init_trackers("textual_inversion", config=vars(args)) + + # Train! + total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps + + logger.info("***** Running training *****") + logger.info(f" Num examples = {len(train_dataset)}") + logger.info(f" Num Epochs = {args.num_train_epochs}") + logger.info(f" Instantaneous batch size per device = {args.train_batch_size}") + logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") + logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") + logger.info(f" Total optimization steps = {args.max_train_steps}") + global_step = 0 + first_epoch = 0 + + # Potentially load in the weights and states from a previous save + if args.resume_from_checkpoint: + if args.resume_from_checkpoint != "latest": + path = os.path.basename(args.resume_from_checkpoint) + else: + # Get the most recent checkpoint + dirs = os.listdir(args.output_dir) + dirs = [d for d in dirs if d.startswith("checkpoint")] + dirs = sorted(dirs, key=lambda x: int(x.split("-")[1])) + path = dirs[-1] + accelerator.print(f"Resuming from checkpoint {path}") + accelerator.load_state(os.path.join(args.output_dir, path)) + global_step = int(path.split("-")[1]) + + resume_global_step = global_step * args.gradient_accumulation_steps + first_epoch = resume_global_step // num_update_steps_per_epoch + resume_step = resume_global_step % num_update_steps_per_epoch + + # Only show the progress bar once on each machine. + progress_bar = tqdm(range(global_step, args.max_train_steps), disable=not accelerator.is_local_main_process) + progress_bar.set_description("Steps") + + # keep original embeddings as reference + orig_embeds_params = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight.data.clone() + + for epoch in range(first_epoch, args.num_train_epochs): + text_encoder.train() + for step, batch in enumerate(train_dataloader): + # Skip steps until we reach the resumed step + if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: + if step % args.gradient_accumulation_steps == 0: + progress_bar.update(1) + continue + + with accelerator.accumulate(text_encoder): + # Convert images to latent space + latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample().detach() + latents = latents * 0.18215 + + # Sample noise that we'll add to the latents + noise = torch.randn_like(latents) + bsz = latents.shape[0] + # Sample a random timestep for each image + timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device) + timesteps = timesteps.long() + + # Add noise to the latents according to the noise magnitude at each timestep + # (this is the forward diffusion process) + noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) + + # Get the text embedding for conditioning + encoder_hidden_states = text_encoder(batch["input_ids"])[0].to(dtype=weight_dtype) + + # Predict the noise residual + model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample + + # Get the target for loss depending on the prediction type + if noise_scheduler.config.prediction_type == "epsilon": + target = noise + elif noise_scheduler.config.prediction_type == "v_prediction": + target = noise_scheduler.get_velocity(latents, noise, timesteps) + else: + raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}") + + loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") + + accelerator.backward(loss) + + optimizer.step() + lr_scheduler.step() + optimizer.zero_grad() + + # Let's make sure we don't update any embedding weights besides the newly added token + index_no_updates = torch.arange(len(tokenizer)) != placeholder_token_id + with torch.no_grad(): + accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[ + index_no_updates + ] = orig_embeds_params[index_no_updates] + + # Checks if the accelerator has performed an optimization step behind the scenes + if accelerator.sync_gradients: + progress_bar.update(1) + global_step += 1 + if global_step % args.save_steps == 0: + save_path = os.path.join(args.output_dir, f"learned_embeds-steps-{global_step}.bin") + save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) + + if global_step % args.checkpointing_steps == 0: + if accelerator.is_main_process: + save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") + accelerator.save_state(save_path) + logger.info(f"Saved state to {save_path}") + + logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} + progress_bar.set_postfix(**logs) + accelerator.log(logs, step=global_step) + + if global_step >= args.max_train_steps: + break + + # Create the pipeline using using the trained modules and save it. + accelerator.wait_for_everyone() + if accelerator.is_main_process: + if args.push_to_hub and args.only_save_embeds: + logger.warn("Enabling full model saving because --push_to_hub=True was specified.") + save_full_model = True + else: + save_full_model = not args.only_save_embeds + if save_full_model: + pipeline = StableDiffusionPipeline.from_pretrained( + pretrained_model_name_or_path, + text_encoder=accelerator.unwrap_model(text_encoder), + vae=vae, + unet=unet, + tokenizer=tokenizer, + **pipeline_args, + ) + pipeline.save_pretrained(args.output_dir) + # Save the newly trained embeddings + save_path = os.path.join(args.output_dir, "learned_embeds.bin") + save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) + + if args.push_to_hub: + repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) + + accelerator.end_training() diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py index 04e22aaf017..81784f25349 100755 --- a/scripts/textual_inversion.py +++ b/scripts/textual_inversion.py @@ -1,764 +1,9 @@ #!/usr/bin/env python -# This script was copied from -# https://github.com/huggingface/diffusers/blob/main/examples/textual_inversion/textual_inversion.py -# on January 2, 2023 -# and modified slightly by Lincoln Stein (@lstein) to work with InvokeAI - -import argparse -import logging -import math -import os -import random -from pathlib import Path -from typing import Optional - -import numpy as np -import torch -import torch.nn.functional as F -import torch.utils.checkpoint -from torch.utils.data import Dataset - -import datasets -import diffusers -import PIL -import transformers -from accelerate import Accelerator -from accelerate.logging import get_logger -from accelerate.utils import set_seed -from diffusers import AutoencoderKL, DDPMScheduler, StableDiffusionPipeline, UNet2DConditionModel -from diffusers.optimization import get_scheduler -from diffusers.utils import check_min_version -from diffusers.utils.import_utils import is_xformers_available -from huggingface_hub import HfFolder, Repository, whoami - -# invokeai stuff -from ldm.invoke.globals import Globals -from omegaconf import OmegaConf - -# TODO: remove and import from diffusers.utils when the new version of diffusers is released -from packaging import version -from PIL import Image -from torchvision import transforms -from tqdm.auto import tqdm -from transformers import CLIPTextModel, CLIPTokenizer - -if version.parse(version.parse(PIL.__version__).base_version) >= version.parse("9.1.0"): - PIL_INTERPOLATION = { - "linear": PIL.Image.Resampling.BILINEAR, - "bilinear": PIL.Image.Resampling.BILINEAR, - "bicubic": PIL.Image.Resampling.BICUBIC, - "lanczos": PIL.Image.Resampling.LANCZOS, - "nearest": PIL.Image.Resampling.NEAREST, - } -else: - PIL_INTERPOLATION = { - "linear": PIL.Image.LINEAR, - "bilinear": PIL.Image.BILINEAR, - "bicubic": PIL.Image.BICUBIC, - "lanczos": PIL.Image.LANCZOS, - "nearest": PIL.Image.NEAREST, - } -# ------------------------------------------------------------------------------ - - -# Will error if the minimal version of diffusers is not installed. Remove at your own risks. -check_min_version("0.10.0.dev0") - - -logger = get_logger(__name__) - - -def save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path): - logger.info("Saving embeddings") - learned_embeds = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[placeholder_token_id] - learned_embeds_dict = {args.placeholder_token: learned_embeds.detach().cpu()} - torch.save(learned_embeds_dict, save_path) - - -def parse_args(): - parser = argparse.ArgumentParser(description="Simple example of a training script.") - parser.add_argument( - "--save_steps", - type=int, - default=500, - help="Save learned_embeds.bin every X updates steps.", - ) - parser.add_argument( - '--root_dir','--root', - type=str, - default=Globals.root, - help="Path to the invokeai runtime directory", - ) - parser.add_argument( - "--only_save_embeds", - action="store_true", - default=False, - help="Save only the embeddings for the new concept.", - ) - parser.add_argument( - "--model", - type=str, - default=None, - required=True, - help="Name of the diffusers model to train against, as defined in configs/models.yaml.", - ) - parser.add_argument( - "--revision", - type=str, - default=None, - required=False, - help="Revision of pretrained model identifier from huggingface.co/models.", - ) - parser.add_argument( - "--tokenizer_name", - type=str, - default=None, - help="Pretrained tokenizer name or path if not the same as model_name", - ) - parser.add_argument( - "--train_data_dir", type=str, default=None, required=True, help="A folder containing the training data." - ) - parser.add_argument( - "--placeholder_token", - type=str, - default=None, - required=True, - help="A token to use as a placeholder for the concept.", - ) - parser.add_argument( - "--initializer_token", type=str, default=None, required=True, help="A token to use as initializer word." - ) - parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'") - parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") - parser.add_argument( - "--output_dir", - type=str, - default=f'{Globals.root}/text-inversion-model', - help="The output directory where the model predictions and checkpoints will be written.", - ) - parser.add_argument("--seed", type=int, default=None, help="A seed for reproducible training.") - parser.add_argument( - "--resolution", - type=int, - default=512, - help=( - "The resolution for input images, all the images in the train/validation dataset will be resized to this" - " resolution" - ), - ) - parser.add_argument( - "--center_crop", action="store_true", help="Whether to center crop images before resizing to resolution" - ) - parser.add_argument( - "--train_batch_size", type=int, default=16, help="Batch size (per device) for the training dataloader." - ) - parser.add_argument("--num_train_epochs", type=int, default=100) - parser.add_argument( - "--max_train_steps", - type=int, - default=5000, - help="Total number of training steps to perform. If provided, overrides num_train_epochs.", - ) - parser.add_argument( - "--gradient_accumulation_steps", - type=int, - default=1, - help="Number of updates steps to accumulate before performing a backward/update pass.", - ) - parser.add_argument( - "--gradient_checkpointing", - action="store_true", - help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.", - ) - parser.add_argument( - "--learning_rate", - type=float, - default=1e-4, - help="Initial learning rate (after the potential warmup period) to use.", - ) - parser.add_argument( - "--scale_lr", - action="store_true", - default=True, - help="Scale the learning rate by the number of GPUs, gradient accumulation steps, and batch size.", - ) - parser.add_argument( - "--lr_scheduler", - type=str, - default="constant", - help=( - 'The scheduler type to use. Choose between ["linear", "cosine", "cosine_with_restarts", "polynomial",' - ' "constant", "constant_with_warmup"]' - ), - ) - parser.add_argument( - "--lr_warmup_steps", type=int, default=500, help="Number of steps for the warmup in the lr scheduler." - ) - parser.add_argument("--adam_beta1", type=float, default=0.9, help="The beta1 parameter for the Adam optimizer.") - parser.add_argument("--adam_beta2", type=float, default=0.999, help="The beta2 parameter for the Adam optimizer.") - parser.add_argument("--adam_weight_decay", type=float, default=1e-2, help="Weight decay to use.") - parser.add_argument("--adam_epsilon", type=float, default=1e-08, help="Epsilon value for the Adam optimizer") - parser.add_argument("--push_to_hub", action="store_true", help="Whether or not to push the model to the Hub.") - parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") - parser.add_argument( - "--logging_dir", - type=str, - default="logs", - help=( - "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to" - " *output_dir/runs/**CURRENT_DATETIME_HOSTNAME***." - ), - ) - parser.add_argument( - "--mixed_precision", - type=str, - default="no", - choices=["no", "fp16", "bf16"], - help=( - "Whether to use mixed precision. Choose" - "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10." - "and an Nvidia Ampere GPU." - ), - ) - parser.add_argument( - "--allow_tf32", - action="store_true", - help=( - "Whether or not to allow TF32 on Ampere GPUs. Can be used to speed up training. For more information, see" - " https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices" - ), - ) - parser.add_argument( - "--report_to", - type=str, - default="tensorboard", - help=( - 'The integration to report the results and logs to. Supported platforms are `"tensorboard"`' - ' (default), `"wandb"` and `"comet_ml"`. Use `"all"` to report to all integrations.' - ), - ) - parser.add_argument("--local_rank", type=int, default=-1, help="For distributed training: local_rank") - parser.add_argument( - "--checkpointing_steps", - type=int, - default=500, - help=( - "Save a checkpoint of the training state every X updates. These checkpoints are only suitable for resuming" - " training using `--resume_from_checkpoint`." - ), - ) - parser.add_argument( - "--resume_from_checkpoint", - type=str, - default=None, - help=( - "Whether training should be resumed from a previous checkpoint. Use a path saved by" - ' `--checkpointing_steps`, or `"latest"` to automatically select the last available checkpoint.' - ), - ) - parser.add_argument( - "--enable_xformers_memory_efficient_attention", action="store_true", help="Whether or not to use xformers." - ) - - args = parser.parse_args() - env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) - if env_local_rank != -1 and env_local_rank != args.local_rank: - args.local_rank = env_local_rank - - if args.train_data_dir is None: - raise ValueError("You must specify a train data directory.") - - return args - - -imagenet_templates_small = [ - "a photo of a {}", - "a rendering of a {}", - "a cropped photo of the {}", - "the photo of a {}", - "a photo of a clean {}", - "a photo of a dirty {}", - "a dark photo of the {}", - "a photo of my {}", - "a photo of the cool {}", - "a close-up photo of a {}", - "a bright photo of the {}", - "a cropped photo of a {}", - "a photo of the {}", - "a good photo of the {}", - "a photo of one {}", - "a close-up photo of the {}", - "a rendition of the {}", - "a photo of the clean {}", - "a rendition of a {}", - "a photo of a nice {}", - "a good photo of a {}", - "a photo of the nice {}", - "a photo of the small {}", - "a photo of the weird {}", - "a photo of the large {}", - "a photo of a cool {}", - "a photo of a small {}", -] - -imagenet_style_templates_small = [ - "a painting in the style of {}", - "a rendering in the style of {}", - "a cropped painting in the style of {}", - "the painting in the style of {}", - "a clean painting in the style of {}", - "a dirty painting in the style of {}", - "a dark painting in the style of {}", - "a picture in the style of {}", - "a cool painting in the style of {}", - "a close-up painting in the style of {}", - "a bright painting in the style of {}", - "a cropped painting in the style of {}", - "a good painting in the style of {}", - "a close-up painting in the style of {}", - "a rendition in the style of {}", - "a nice painting in the style of {}", - "a small painting in the style of {}", - "a weird painting in the style of {}", - "a large painting in the style of {}", -] - - -class TextualInversionDataset(Dataset): - def __init__( - self, - data_root, - tokenizer, - learnable_property="object", # [object, style] - size=512, - repeats=100, - interpolation="bicubic", - flip_p=0.5, - set="train", - placeholder_token="*", - center_crop=False, - ): - self.data_root = data_root - self.tokenizer = tokenizer - self.learnable_property = learnable_property - self.size = size - self.placeholder_token = placeholder_token - self.center_crop = center_crop - self.flip_p = flip_p - - self.image_paths = [os.path.join(self.data_root, file_path) for file_path in os.listdir(self.data_root)] - - self.num_images = len(self.image_paths) - self._length = self.num_images - - if set == "train": - self._length = self.num_images * repeats - - self.interpolation = { - "linear": PIL_INTERPOLATION["linear"], - "bilinear": PIL_INTERPOLATION["bilinear"], - "bicubic": PIL_INTERPOLATION["bicubic"], - "lanczos": PIL_INTERPOLATION["lanczos"], - }[interpolation] - - self.templates = imagenet_style_templates_small if learnable_property == "style" else imagenet_templates_small - self.flip_transform = transforms.RandomHorizontalFlip(p=self.flip_p) - - def __len__(self): - return self._length - - def __getitem__(self, i): - example = {} - image = Image.open(self.image_paths[i % self.num_images]) - - if not image.mode == "RGB": - image = image.convert("RGB") - - placeholder_string = self.placeholder_token - text = random.choice(self.templates).format(placeholder_string) - - example["input_ids"] = self.tokenizer( - text, - padding="max_length", - truncation=True, - max_length=self.tokenizer.model_max_length, - return_tensors="pt", - ).input_ids[0] - - # default to score-sde preprocessing - img = np.array(image).astype(np.uint8) - - if self.center_crop: - crop = min(img.shape[0], img.shape[1]) - h, w, = ( - img.shape[0], - img.shape[1], - ) - img = img[(h - crop) // 2 : (h + crop) // 2, (w - crop) // 2 : (w + crop) // 2] - - image = Image.fromarray(img) - image = image.resize((self.size, self.size), resample=self.interpolation) - - image = self.flip_transform(image) - image = np.array(image).astype(np.uint8) - image = (image / 127.5 - 1.0).astype(np.float32) - - example["pixel_values"] = torch.from_numpy(image).permute(2, 0, 1) - return example - - -def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: Optional[str] = None): - if token is None: - token = HfFolder.get_token() - if organization is None: - username = whoami(token)["name"] - return f"{username}/{model_id}" - else: - return f"{organization}/{model_id}" - - -def main(): - args = parse_args() - - # setting up things the way invokeai expects them - if os.path.exists(args.root_dir): - Globals.root = args.root_dir - if not os.path.isabs(args.output_dir): - args.output_dir = os.path.join(Globals.root,args.output_dir) - - logging_dir = os.path.join(args.output_dir, args.logging_dir) - - accelerator = Accelerator( - gradient_accumulation_steps=args.gradient_accumulation_steps, - mixed_precision=args.mixed_precision, - log_with=args.report_to, - logging_dir=logging_dir, - ) - - # Make one log on every process with the configuration for debugging. - logging.basicConfig( - format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", - datefmt="%m/%d/%Y %H:%M:%S", - level=logging.INFO, - ) - logger.info(accelerator.state, main_process_only=False) - if accelerator.is_local_main_process: - datasets.utils.logging.set_verbosity_warning() - transformers.utils.logging.set_verbosity_warning() - diffusers.utils.logging.set_verbosity_info() - else: - datasets.utils.logging.set_verbosity_error() - transformers.utils.logging.set_verbosity_error() - diffusers.utils.logging.set_verbosity_error() - - # If passed along, set the training seed now. - if args.seed is not None: - set_seed(args.seed) - - # Handle the repository creation - if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) - else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) - - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: - if "step_*" not in gitignore: - gitignore.write("step_*\n") - if "epoch_*" not in gitignore: - gitignore.write("epoch_*\n") - elif args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) - - models_conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) - model_conf = models_conf.get(args.model,None) - assert model_conf is not None,f'Unknown model: {args.model}' - assert model_conf.get('format','diffusers')=='diffusers', "This script only works with models of type 'diffusers'" - pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) - assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {args.model}" - pipeline_args = dict() - if not isinstance(pretrained_model_name_or_path,Path): - pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path)) - - # Load tokenizer - if args.tokenizer_name: - tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) - else: - tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args) - - # Load scheduler and models - noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler", **pipeline_args) - text_encoder = CLIPTextModel.from_pretrained( - pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, **pipeline_args - ) - vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae", revision=args.revision, **pipeline_args) - unet = UNet2DConditionModel.from_pretrained( - pretrained_model_name_or_path, subfolder="unet", revision=args.revision, **pipeline_args - ) - - # Add the placeholder token in tokenizer - num_added_tokens = tokenizer.add_tokens(args.placeholder_token) - if num_added_tokens == 0: - raise ValueError( - f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different" - " `placeholder_token` that is not already in the tokenizer." - ) - - # Convert the initializer_token, placeholder_token to ids - token_ids = tokenizer.encode(args.initializer_token, add_special_tokens=False) - # Check if initializer_token is a single token or a sequence of tokens - if len(token_ids) > 1: - raise ValueError("The initializer token must be a single token.") - - initializer_token_id = token_ids[0] - placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token) - - # Resize the token embeddings as we are adding new special tokens to the tokenizer - text_encoder.resize_token_embeddings(len(tokenizer)) - - # Initialise the newly added placeholder token with the embeddings of the initializer token - token_embeds = text_encoder.get_input_embeddings().weight.data - token_embeds[placeholder_token_id] = token_embeds[initializer_token_id] - - # Freeze vae and unet - vae.requires_grad_(False) - unet.requires_grad_(False) - # Freeze all parameters except for the token embeddings in text encoder - text_encoder.text_model.encoder.requires_grad_(False) - text_encoder.text_model.final_layer_norm.requires_grad_(False) - text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) - - if args.gradient_checkpointing: - # Keep unet in train mode if we are using gradient checkpointing to save memory. - # The dropout cannot be != 0 so it doesn't matter if we are in eval or train mode. - unet.train() - text_encoder.gradient_checkpointing_enable() - unet.enable_gradient_checkpointing() - - if args.enable_xformers_memory_efficient_attention: - if is_xformers_available(): - unet.enable_xformers_memory_efficient_attention() - else: - raise ValueError("xformers is not available. Make sure it is installed correctly") - - # Enable TF32 for faster training on Ampere GPUs, - # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - if args.allow_tf32: - torch.backends.cuda.matmul.allow_tf32 = True - - if args.scale_lr: - args.learning_rate = ( - args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes - ) - - # Initialize the optimizer - optimizer = torch.optim.AdamW( - text_encoder.get_input_embeddings().parameters(), # only optimize the embeddings - lr=args.learning_rate, - betas=(args.adam_beta1, args.adam_beta2), - weight_decay=args.adam_weight_decay, - eps=args.adam_epsilon, - ) - - # Dataset and DataLoaders creation: - train_dataset = TextualInversionDataset( - data_root=args.train_data_dir, - tokenizer=tokenizer, - size=args.resolution, - placeholder_token=args.placeholder_token, - repeats=args.repeats, - learnable_property=args.learnable_property, - center_crop=args.center_crop, - set="train", - ) - train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) - - # Scheduler and math around the number of training steps. - overrode_max_train_steps = False - num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) - if args.max_train_steps is None: - args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch - overrode_max_train_steps = True - - lr_scheduler = get_scheduler( - args.lr_scheduler, - optimizer=optimizer, - num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, - ) - - # Prepare everything with our `accelerator`. - text_encoder, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - text_encoder, optimizer, train_dataloader, lr_scheduler - ) - - # For mixed precision training we cast the text_encoder and vae weights to half-precision - # as these models are only used for inference, keeping weights in full precision is not required. - weight_dtype = torch.float32 - if accelerator.mixed_precision == "fp16": - weight_dtype = torch.float16 - elif accelerator.mixed_precision == "bf16": - weight_dtype = torch.bfloat16 - - # Move vae and unet to device and cast to weight_dtype - unet.to(accelerator.device, dtype=weight_dtype) - vae.to(accelerator.device, dtype=weight_dtype) - - # We need to recalculate our total training steps as the size of the training dataloader may have changed. - num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) - if overrode_max_train_steps: - args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch - # Afterwards we recalculate our number of training epochs - args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) - - # We need to initialize the trackers we use, and also store our configuration. - # The trackers initializes automatically on the main process. - if accelerator.is_main_process: - accelerator.init_trackers("textual_inversion", config=vars(args)) - - # Train! - total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps - - logger.info("***** Running training *****") - logger.info(f" Num examples = {len(train_dataset)}") - logger.info(f" Num Epochs = {args.num_train_epochs}") - logger.info(f" Instantaneous batch size per device = {args.train_batch_size}") - logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") - logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") - logger.info(f" Total optimization steps = {args.max_train_steps}") - global_step = 0 - first_epoch = 0 - - # Potentially load in the weights and states from a previous save - if args.resume_from_checkpoint: - if args.resume_from_checkpoint != "latest": - path = os.path.basename(args.resume_from_checkpoint) - else: - # Get the most recent checkpoint - dirs = os.listdir(args.output_dir) - dirs = [d for d in dirs if d.startswith("checkpoint")] - dirs = sorted(dirs, key=lambda x: int(x.split("-")[1])) - path = dirs[-1] - accelerator.print(f"Resuming from checkpoint {path}") - accelerator.load_state(os.path.join(args.output_dir, path)) - global_step = int(path.split("-")[1]) - - resume_global_step = global_step * args.gradient_accumulation_steps - first_epoch = resume_global_step // num_update_steps_per_epoch - resume_step = resume_global_step % num_update_steps_per_epoch - - # Only show the progress bar once on each machine. - progress_bar = tqdm(range(global_step, args.max_train_steps), disable=not accelerator.is_local_main_process) - progress_bar.set_description("Steps") - - # keep original embeddings as reference - orig_embeds_params = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight.data.clone() - - for epoch in range(first_epoch, args.num_train_epochs): - text_encoder.train() - for step, batch in enumerate(train_dataloader): - # Skip steps until we reach the resumed step - if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: - if step % args.gradient_accumulation_steps == 0: - progress_bar.update(1) - continue - - with accelerator.accumulate(text_encoder): - # Convert images to latent space - latents = vae.encode(batch["pixel_values"].to(dtype=weight_dtype)).latent_dist.sample().detach() - latents = latents * 0.18215 - - # Sample noise that we'll add to the latents - noise = torch.randn_like(latents) - bsz = latents.shape[0] - # Sample a random timestep for each image - timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (bsz,), device=latents.device) - timesteps = timesteps.long() - - # Add noise to the latents according to the noise magnitude at each timestep - # (this is the forward diffusion process) - noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps) - - # Get the text embedding for conditioning - encoder_hidden_states = text_encoder(batch["input_ids"])[0].to(dtype=weight_dtype) - - # Predict the noise residual - model_pred = unet(noisy_latents, timesteps, encoder_hidden_states).sample - - # Get the target for loss depending on the prediction type - if noise_scheduler.config.prediction_type == "epsilon": - target = noise - elif noise_scheduler.config.prediction_type == "v_prediction": - target = noise_scheduler.get_velocity(latents, noise, timesteps) - else: - raise ValueError(f"Unknown prediction type {noise_scheduler.config.prediction_type}") - - loss = F.mse_loss(model_pred.float(), target.float(), reduction="mean") - - accelerator.backward(loss) - - optimizer.step() - lr_scheduler.step() - optimizer.zero_grad() - - # Let's make sure we don't update any embedding weights besides the newly added token - index_no_updates = torch.arange(len(tokenizer)) != placeholder_token_id - with torch.no_grad(): - accelerator.unwrap_model(text_encoder).get_input_embeddings().weight[ - index_no_updates - ] = orig_embeds_params[index_no_updates] - - # Checks if the accelerator has performed an optimization step behind the scenes - if accelerator.sync_gradients: - progress_bar.update(1) - global_step += 1 - if global_step % args.save_steps == 0: - save_path = os.path.join(args.output_dir, f"learned_embeds-steps-{global_step}.bin") - save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) - - if global_step % args.checkpointing_steps == 0: - if accelerator.is_main_process: - save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") - accelerator.save_state(save_path) - logger.info(f"Saved state to {save_path}") - - logs = {"loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} - progress_bar.set_postfix(**logs) - accelerator.log(logs, step=global_step) - - if global_step >= args.max_train_steps: - break - - # Create the pipeline using using the trained modules and save it. - accelerator.wait_for_everyone() - if accelerator.is_main_process: - if args.push_to_hub and args.only_save_embeds: - logger.warn("Enabling full model saving because --push_to_hub=True was specified.") - save_full_model = True - else: - save_full_model = not args.only_save_embeds - if save_full_model: - pipeline = StableDiffusionPipeline.from_pretrained( - pretrained_model_name_or_path, - text_encoder=accelerator.unwrap_model(text_encoder), - vae=vae, - unet=unet, - tokenizer=tokenizer, - **pipeline_args, - ) - pipeline.save_pretrained(args.output_dir) - # Save the newly trained embeddings - save_path = os.path.join(args.output_dir, "learned_embeds.bin") - save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) - - if args.push_to_hub: - repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) - - accelerator.end_training() +# Copyright 2023, Lincoln Stein @lstein +from ldm.invoke.textual_inversion_training import parse_args, do_textual_inversion_training if __name__ == "__main__": - main() + args = parse_args() + do_textual_inversion_training(args) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py new file mode 100755 index 00000000000..b07b4ede682 --- /dev/null +++ b/scripts/textual_inversion_fe.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +import npyscreen +import os +import sys +import curses +from ldm.invoke.globals import Globals +from omegaconf import OmegaConf +from pathlib import Path +from typing import List +from argparse import Namespace + +class textualInversionForm(npyscreen.FormMultiPageAction): + resolutions = [512, 768, 1024] + lr_schedulers = [ + "linear", "cosine", "cosine_with_restarts", + "polynomial","constant", "constant_with_warmup" + ] + + def afterEditing(self): + self.parentApp.setNextForm(None) + + def create(self): + self.model_names, default = self.get_model_names() + default_token = 'cat-toy' + + self.model = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name='Model Name', + values=self.model_names, + value=default, + max_height=len(self.model_names)+1 + ) + self.initializer_token = self.add_widget_intelligent( + npyscreen.TitleText, + name="Initializer Token", + value=default_token, + ) + self.placeholder_token = self.add_widget_intelligent( + npyscreen.TitleText, + name="Placeholder Token", + value=f'<{default_token}>' + ) + self.train_data_dir = self.add_widget_intelligent( + npyscreen.TitleFilenameCombo, + name='Data Training Directory', + select_dir=True, + must_exist=True, + value=Path(Globals.root) / 'training-data' / default_token + ) + self.output_dir = self.add_widget_intelligent( + npyscreen.TitleFilenameCombo, + name='Output Destination Directory', + select_dir=True, + must_exist=False, + value=Path(Globals.root) / 'embeddings' / default_token + ) + self.resolution = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name='Resolution', + values = self.resolutions, + value=0, + scroll_exit = True, + max_height=3 + ) + self.train_batch_size = self.add_widget_intelligent( + npyscreen.TitleSlider, + name='Batch Size', + out_of=10, + step=1, + lowest=1, + value=1 + ) + self.gradient_accumulation_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name='Gradient Accumulation Steps', + out_of=10, + step=1, + lowest=1, + value=4 + ) + self.max_train_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name='Max Training Steps', + out_of=10000, + step=500, + lowest=1, + value=3000 + ) + self.learning_rate = self.add_widget_intelligent( + npyscreen.TitleText, + name="Learning Rate", + value='5.0e-04', + ) + self.scale_lr = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Scale learning rate by number GPUs, steps and batch size", + value=True + ) + self.lr_scheduler = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name='Learning rate scheduler', + values = self.lr_schedulers, + max_height=7, + scroll_exit = True, + value=4) + self.lr_warmup_steps = self.add_widget_intelligent( + npyscreen.TitleSlider, + name='Warmup Steps', + out_of=100, + step=1, + lowest=0, + value=0 + ) + self.initializer_token.when_value_edited = self.initializer_changed + + def initializer_changed(self): + new_value = self.initializer_token.value + self.placeholder_token.value = f'<{new_value}>' + self.train_data_dir.value = Path(Globals.root) / 'training-data' / new_value + self.output_dir.value = Path(Globals.root) / 'embeddings' / new_value + + def on_ok(self): + if self.validate_field_values(): + self.parentApp.setNextForm(None) + self.editing = False + self.run_textual_inversion() + else: + self.editing = True + + def validate_field_values(self)->bool: + bad_fields = [] + if self.model.value is None: + bad_fields.append('Model Name must correspond to a known model in models.yaml') + if self.train_data_dir.value is None: + bad_fields.append('Data Training Directory cannot be empty') + if self.output_dir.value is None: + bad_fields.append('The Output Destination Directory cannot be empty') + if len(bad_fields) > 0: + message = 'The following problems were detected and must be corrected:' + for problem in bad_fields: + message += f'\n* {problem}' + npyscreen.notify_confirm(message) + return False + else: + return True + + def get_model_names(self)->(List[str],int): + conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) + model_names = list(conf.keys()) + defaults = [idx for idx in range(len(model_names)) if 'default' in conf[model_names[idx]]] + return (model_names,defaults[0]) + + def create_namespace(self): + args = Namespace() + + # the choices + args.model = self.model_names[self.model.value[0]] + args.resolution = self.resolutions[self.resolution.value[0]] + args.lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]] + + # all the strings + for attr in ('initializer_token','placeholder_token','train_data_dir','output_dir','scale_lr'): + setattr(args,attr,getattr(self,attr).value) + # all the integers + for attr in ('train_batch_size','gradient_accumulation_steps', + 'max_train_steps','lr_warmup_steps'): + setattr(args,attr,int(getattr(self,attr).value)) + # the floats (just one) + args.learning_rate = float(self.learning_rate.value) + return args + + def run_textual_inversion(self): + npyscreen.notify('Launching textual inversion training. This will take a while...') + from ldm.invoke.textual_inversion_training import do_textual_inversion_training, parse_args + args = parse_args() + args.root_dir = Globals.root + do_textual_inversion_training(args) + +class MyApplication(npyscreen.NPSAppManaged): + def onStart(self): + npyscreen.setTheme(npyscreen.Themes.DefaultTheme) + self.main = self.addForm('MAIN', textualInversionForm, name='Textual Inversion Settings') + +if __name__ == '__main__': + TestApp = MyApplication().run() From 1ff402f8f56fb7e8f57e9896e61ac96c4b036e2d Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 3 Jan 2023 23:42:14 -0500 Subject: [PATCH 122/199] add curses-based interface for textual inversion --- .../requirements-base.txt | 1 + installer/templates/invoke.bat.in | 9 +- installer/templates/invoke.sh.in | 11 +- ldm/invoke/textual_inversion_training.py | 257 ++++++++++-------- scripts/textual_inversion.py | 6 +- scripts/textual_inversion_fe.py | 120 +++++--- setup.py | 3 +- 7 files changed, 251 insertions(+), 156 deletions(-) diff --git a/environments-and-requirements/requirements-base.txt b/environments-and-requirements/requirements-base.txt index c0d89767ab1..fda97aaa2d8 100644 --- a/environments-and-requirements/requirements-base.txt +++ b/environments-and-requirements/requirements-base.txt @@ -37,6 +37,7 @@ test-tube>=0.7.5 torch-fidelity torchmetrics transformers~=4.25 +windows-curses; sys_platform == 'win32' https://github.com/Birch-san/k-diffusion/archive/refs/heads/mps.zip#egg=k-diffusion https://github.com/invoke-ai/PyPatchMatch/archive/refs/tags/0.1.5.zip#egg=pypatchmatch https://github.com/openai/CLIP/archive/eaa22acb90a5876642d0507623e859909230a52d.zip#egg=clip diff --git a/installer/templates/invoke.bat.in b/installer/templates/invoke.bat.in index 19885d84678..3a67cb2f577 100644 --- a/installer/templates/invoke.bat.in +++ b/installer/templates/invoke.bat.in @@ -9,8 +9,10 @@ set INVOKEAI_ROOT=. echo Do you want to generate images using the echo 1. command-line echo 2. browser-based UI -echo 3. open the developer console -set /P restore="Please enter 1, 2 or 3: " +echo 3. textual inversion training +echo 4. open the developer console +set /P restore="Please enter 1, 2, 3 or 4: [1] " +if not defined restore set restore=1 IF /I "%restore%" == "1" ( echo Starting the InvokeAI command-line.. python .venv\Scripts\invoke.py %* @@ -18,6 +20,9 @@ IF /I "%restore%" == "1" ( echo Starting the InvokeAI browser-based UI.. python .venv\Scripts\invoke.py --web %* ) ELSE IF /I "%restore%" == "3" ( + echo Starting textual inversion training.. + python .venv\Scripts\textual_inversion_fe.py --web %* +) ELSE IF /I "%restore%" == "4" ( echo Developer Console echo Python command is: where python diff --git a/installer/templates/invoke.sh.in b/installer/templates/invoke.sh.in index 7b67c95a60c..143ce49beaf 100644 --- a/installer/templates/invoke.sh.in +++ b/installer/templates/invoke.sh.in @@ -19,12 +19,15 @@ if [ "$0" != "bash" ]; then echo "Do you want to generate images using the" echo "1. command-line" echo "2. browser-based UI" - echo "3. open the developer console" - read -p "Please enter 1, 2, or 3: " yn - case $yn in + echo "3. textual inversion training" + echo "4. open the developer console" + read -p "Please enter 1, 2, 3 or 4: [1] " yn + choice=${yn:='1'} + case $choice in 1 ) printf "\nStarting the InvokeAI command-line..\n"; .venv/bin/python .venv/bin/invoke.py $*;; 2 ) printf "\nStarting the InvokeAI browser-based UI..\n"; .venv/bin/python .venv/bin/invoke.py --web $*;; - 3 ) printf "\nDeveloper Console:\n"; file_name=$(basename "${BASH_SOURCE[0]}"); bash --init-file "$file_name";; + 3 ) printf "\nStarting Textual Inversion:\n"; .venv/bin/python .venv/bin/textual_inversion_fe.py $*;; + 4 ) printf "\nDeveloper Console:\n"; file_name=$(basename "${BASH_SOURCE[0]}"); bash --init-file "$file_name";; * ) echo "Invalid selection"; exit;; esac else # in developer console diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py index 75369d96133..34e83718d6c 100644 --- a/ldm/invoke/textual_inversion_training.py +++ b/ldm/invoke/textual_inversion_training.py @@ -84,7 +84,7 @@ def parse_args(): ) parser.add_argument( '--root_dir','--root', - type=str, + type=Path, default=Globals.root, help="Path to the invokeai runtime directory", ) @@ -98,7 +98,7 @@ def parse_args(): "--model", type=str, default=None, - required=False, + required=True, help="Name of the diffusers model to train against, as defined in configs/models.yaml.", ) parser.add_argument( @@ -115,23 +115,31 @@ def parse_args(): help="Pretrained tokenizer name or path if not the same as model_name", ) parser.add_argument( - "--train_data_dir", type=str, default=None, required=False, help="A folder containing the training data." + "--train_data_dir", + type=Path, + default=None, + required=True, + help="A folder containing the training data." ) parser.add_argument( "--placeholder_token", type=str, default=None, - required=False, + required=True, help="A token to use as a placeholder for the concept.", ) parser.add_argument( - "--initializer_token", type=str, default=None, required=False, help="A token to use as initializer word." + "--initializer_token", + type=str, + default=None, + required=False, + help="A token to use as initializer word." ) parser.add_argument("--learnable_property", type=str, default="object", help="Choose between 'object' and 'style'") parser.add_argument("--repeats", type=int, default=100, help="How many times to repeat the training data.") parser.add_argument( "--output_dir", - type=str, + type=Path, default=f'{Globals.root}/text-inversion-model', help="The output directory where the model predictions and checkpoints will be written.", ) @@ -201,7 +209,7 @@ def parse_args(): parser.add_argument("--hub_token", type=str, default=None, help="The token to use to push to the Model Hub.") parser.add_argument( "--logging_dir", - type=str, + type=Path, default="logs", help=( "[TensorBoard](https://www.tensorflow.org/tensorboard) log directory. Will default to" @@ -248,7 +256,7 @@ def parse_args(): ) parser.add_argument( "--resume_from_checkpoint", - type=str, + type=Path, default=None, help=( "Whether training should be resumed from a previous checkpoint. Use a path saved by" @@ -260,9 +268,6 @@ def parse_args(): ) args = parser.parse_args() - env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) - if env_local_rank != -1 and env_local_rank != args.local_rank: - args.local_rank = env_local_rank return args @@ -412,35 +417,60 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: return f"{organization}/{model_id}" -def do_textual_inversion_training(args:Namespace): - - # Out of laziness, we are getting our parameterse from an argparse Namespace - # which is a legacy of the original script. This needs to be changed. - - if args.train_data_dir is None: - raise ValueError("You must specify a training data directory with --train_data_dir.") - - if args.placeholder_token is None: - raise ValueError("You must specify a placeholder token with --placeholder_token.") - - if args.initializer__token is None: - raise ValueError("You must specify an initializer token with --initializer_token.") - - if args.model is None: - raise ValueError("You must specify a model name with --model.") +def do_textual_inversion_training( + model:str, + train_data_dir:Path, + output_dir:Path, + placeholder_token:str, + initializer_token:str, + save_steps:int=500, + only_save_embeds:bool=False, + revision:str=None, + tokenizer_name:str=None, + learnable_property:str='object', + repeats:int=100, + seed:int=None, + resolution:int=512, + center_crop:bool=False, + train_batch_size:int=16, + num_train_epochs:int=100, + max_train_steps:int=5000, + gradient_accumulation_steps:int=1, + gradient_checkpointing:bool=False, + learning_rate:float=1e-4, + scale_lr:bool=True, + lr_scheduler:str='constant', + lr_warmup_steps:int=500, + adam_beta1:float=0.9, + adam_beta2:float=0.999, + adam_weight_decay:float=1e-02, + adam_epsilon:float=1e-08, + push_to_hub:bool=False, + hub_token:str=None, + logging_dir:Path=Path('logs'), + mixed_precision:str='no', + allow_tf32:bool=False, + report_to:str='tensorboard', + local_rank:int=-1, + checkpointing_steps:int=500, + resume_from_checkpoint:Path=None, + enable_xformers_memory_efficient_attention:bool=False, + root_dir:Path=None +): + env_local_rank = int(os.environ.get("LOCAL_RANK", -1)) + if env_local_rank != -1 and env_local_rank != local_rank: + local_rank = env_local_rank # setting up things the way invokeai expects them - if os.path.exists(args.root_dir): - Globals.root = args.root_dir - if not os.path.isabs(args.output_dir): - args.output_dir = os.path.join(Globals.root,args.output_dir) + if not os.path.isabs(output_dir): + output_dir = os.path.join(Globals.root,output_dir) - logging_dir = os.path.join(args.output_dir, args.logging_dir) + logging_dir = output_dir / logging_dir accelerator = Accelerator( - gradient_accumulation_steps=args.gradient_accumulation_steps, - mixed_precision=args.mixed_precision, - log_with=args.report_to, + gradient_accumulation_steps=gradient_accumulation_steps, + mixed_precision=mixed_precision, + log_with=report_to, logging_dir=logging_dir, ) @@ -461,68 +491,70 @@ def do_textual_inversion_training(args:Namespace): diffusers.utils.logging.set_verbosity_error() # If passed along, set the training seed now. - if args.seed is not None: - set_seed(args.seed) + if seed is not None: + set_seed(seed) # Handle the repository creation if accelerator.is_main_process: - if args.push_to_hub: - if args.hub_model_id is None: - repo_name = get_full_repo_name(Path(args.output_dir).name, token=args.hub_token) + if push_to_hub: + if hub_model_id is None: + repo_name = get_full_repo_name(Path(output_dir).name, token=hub_token) else: - repo_name = args.hub_model_id - repo = Repository(args.output_dir, clone_from=repo_name) + repo_name = hub_model_id + repo = Repository(output_dir, clone_from=repo_name) - with open(os.path.join(args.output_dir, ".gitignore"), "w+") as gitignore: + with open(os.path.join(output_dir, ".gitignore"), "w+") as gitignore: if "step_*" not in gitignore: gitignore.write("step_*\n") if "epoch_*" not in gitignore: gitignore.write("epoch_*\n") - elif args.output_dir is not None: - os.makedirs(args.output_dir, exist_ok=True) + elif output_dir is not None: + os.makedirs(output_dir, exist_ok=True) models_conf = OmegaConf.load(os.path.join(Globals.root,'configs/models.yaml')) - model_conf = models_conf.get(args.model,None) - assert model_conf is not None,f'Unknown model: {args.model}' + model_conf = models_conf.get(model,None) + assert model_conf is not None,f'Unknown model: {model}' assert model_conf.get('format','diffusers')=='diffusers', "This script only works with models of type 'diffusers'" pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) - assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {args.model}" + assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}" pipeline_args = dict() if not isinstance(pretrained_model_name_or_path,Path): - pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path)) + pipeline_args.update( + cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path) + ) # Load tokenizer - if args.tokenizer_name: - tokenizer = CLIPTokenizer.from_pretrained(args.tokenizer_name) + if tokenizer_name: + tokenizer = CLIPTokenizer.from_pretrained(tokenizer_name) else: tokenizer = CLIPTokenizer.from_pretrained(pretrained_model_name_or_path, subfolder="tokenizer", **pipeline_args) # Load scheduler and models noise_scheduler = DDPMScheduler.from_pretrained(pretrained_model_name_or_path, subfolder="scheduler", **pipeline_args) text_encoder = CLIPTextModel.from_pretrained( - pretrained_model_name_or_path, subfolder="text_encoder", revision=args.revision, **pipeline_args + pretrained_model_name_or_path, subfolder="text_encoder", revision=revision, **pipeline_args ) - vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae", revision=args.revision, **pipeline_args) + vae = AutoencoderKL.from_pretrained(pretrained_model_name_or_path, subfolder="vae", revision=revision, **pipeline_args) unet = UNet2DConditionModel.from_pretrained( - pretrained_model_name_or_path, subfolder="unet", revision=args.revision, **pipeline_args + pretrained_model_name_or_path, subfolder="unet", revision=revision, **pipeline_args ) # Add the placeholder token in tokenizer - num_added_tokens = tokenizer.add_tokens(args.placeholder_token) + num_added_tokens = tokenizer.add_tokens(placeholder_token) if num_added_tokens == 0: raise ValueError( - f"The tokenizer already contains the token {args.placeholder_token}. Please pass a different" + f"The tokenizer already contains the token {placeholder_token}. Please pass a different" " `placeholder_token` that is not already in the tokenizer." ) # Convert the initializer_token, placeholder_token to ids - token_ids = tokenizer.encode(args.initializer_token, add_special_tokens=False) + token_ids = tokenizer.encode(initializer_token, add_special_tokens=False) # Check if initializer_token is a single token or a sequence of tokens if len(token_ids) > 1: - raise ValueError("The initializer token must be a single token.") + raise ValueError(f"The initializer token must be a single token. Provided initializer={initializer_token}. Token ids={token_ids}") initializer_token_id = token_ids[0] - placeholder_token_id = tokenizer.convert_tokens_to_ids(args.placeholder_token) + placeholder_token_id = tokenizer.convert_tokens_to_ids(placeholder_token) # Resize the token embeddings as we are adding new special tokens to the tokenizer text_encoder.resize_token_embeddings(len(tokenizer)) @@ -539,14 +571,14 @@ def do_textual_inversion_training(args:Namespace): text_encoder.text_model.final_layer_norm.requires_grad_(False) text_encoder.text_model.embeddings.position_embedding.requires_grad_(False) - if args.gradient_checkpointing: + if gradient_checkpointing: # Keep unet in train mode if we are using gradient checkpointing to save memory. # The dropout cannot be != 0 so it doesn't matter if we are in eval or train mode. unet.train() text_encoder.gradient_checkpointing_enable() unet.enable_gradient_checkpointing() - if args.enable_xformers_memory_efficient_attention: + if enable_xformers_memory_efficient_attention: if is_xformers_available(): unet.enable_xformers_memory_efficient_attention() else: @@ -554,48 +586,48 @@ def do_textual_inversion_training(args:Namespace): # Enable TF32 for faster training on Ampere GPUs, # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices - if args.allow_tf32: + if allow_tf32: torch.backends.cuda.matmul.allow_tf32 = True - if args.scale_lr: - args.learning_rate = ( - args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes + if scale_lr: + learning_rate = ( + learning_rate * gradient_accumulation_steps * train_batch_size * accelerator.num_processes ) # Initialize the optimizer optimizer = torch.optim.AdamW( text_encoder.get_input_embeddings().parameters(), # only optimize the embeddings - lr=args.learning_rate, - betas=(args.adam_beta1, args.adam_beta2), - weight_decay=args.adam_weight_decay, - eps=args.adam_epsilon, + lr=learning_rate, + betas=(adam_beta1, adam_beta2), + weight_decay=adam_weight_decay, + eps=adam_epsilon, ) # Dataset and DataLoaders creation: train_dataset = TextualInversionDataset( - data_root=args.train_data_dir, + data_root=train_data_dir, tokenizer=tokenizer, - size=args.resolution, - placeholder_token=args.placeholder_token, - repeats=args.repeats, - learnable_property=args.learnable_property, - center_crop=args.center_crop, + size=resolution, + placeholder_token=placeholder_token, + repeats=repeats, + learnable_property=learnable_property, + center_crop=center_crop, set="train", ) - train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size, shuffle=True) + train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True) # Scheduler and math around the number of training steps. overrode_max_train_steps = False - num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) - if args.max_train_steps is None: - args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps) + if max_train_steps is None: + max_train_steps = num_train_epochs * num_update_steps_per_epoch overrode_max_train_steps = True lr_scheduler = get_scheduler( - args.lr_scheduler, + lr_scheduler, optimizer=optimizer, - num_warmup_steps=args.lr_warmup_steps * args.gradient_accumulation_steps, - num_training_steps=args.max_train_steps * args.gradient_accumulation_steps, + num_warmup_steps=lr_warmup_steps * gradient_accumulation_steps, + num_training_steps=max_train_steps * gradient_accumulation_steps, ) # Prepare everything with our `accelerator`. @@ -616,61 +648,64 @@ def do_textual_inversion_training(args:Namespace): vae.to(accelerator.device, dtype=weight_dtype) # We need to recalculate our total training steps as the size of the training dataloader may have changed. - num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) + num_update_steps_per_epoch = math.ceil(len(train_dataloader) / gradient_accumulation_steps) if overrode_max_train_steps: - args.max_train_steps = args.num_train_epochs * num_update_steps_per_epoch + max_train_steps = num_train_epochs * num_update_steps_per_epoch # Afterwards we recalculate our number of training epochs - args.num_train_epochs = math.ceil(args.max_train_steps / num_update_steps_per_epoch) + num_train_epochs = math.ceil(max_train_steps / num_update_steps_per_epoch) # We need to initialize the trackers we use, and also store our configuration. # The trackers initializes automatically on the main process. if accelerator.is_main_process: - accelerator.init_trackers("textual_inversion", config=vars(args)) + params = locals() + for k in params: # init_trackers() doesn't like objects + params[k] = str(params[k]) if isinstance(params[k],object) else params[k] + accelerator.init_trackers("textual_inversion", config=params) # Train! - total_batch_size = args.train_batch_size * accelerator.num_processes * args.gradient_accumulation_steps + total_batch_size = train_batch_size * accelerator.num_processes * gradient_accumulation_steps logger.info("***** Running training *****") logger.info(f" Num examples = {len(train_dataset)}") - logger.info(f" Num Epochs = {args.num_train_epochs}") - logger.info(f" Instantaneous batch size per device = {args.train_batch_size}") + logger.info(f" Num Epochs = {num_train_epochs}") + logger.info(f" Instantaneous batch size per device = {train_batch_size}") logger.info(f" Total train batch size (w. parallel, distributed & accumulation) = {total_batch_size}") - logger.info(f" Gradient Accumulation steps = {args.gradient_accumulation_steps}") - logger.info(f" Total optimization steps = {args.max_train_steps}") + logger.info(f" Gradient Accumulation steps = {gradient_accumulation_steps}") + logger.info(f" Total optimization steps = {max_train_steps}") global_step = 0 first_epoch = 0 # Potentially load in the weights and states from a previous save - if args.resume_from_checkpoint: - if args.resume_from_checkpoint != "latest": - path = os.path.basename(args.resume_from_checkpoint) + if resume_from_checkpoint: + if resume_from_checkpoint != "latest": + path = os.path.basename(resume_from_checkpoint) else: # Get the most recent checkpoint - dirs = os.listdir(args.output_dir) + dirs = os.listdir(output_dir) dirs = [d for d in dirs if d.startswith("checkpoint")] dirs = sorted(dirs, key=lambda x: int(x.split("-")[1])) path = dirs[-1] accelerator.print(f"Resuming from checkpoint {path}") - accelerator.load_state(os.path.join(args.output_dir, path)) + accelerator.load_state(os.path.join(output_dir, path)) global_step = int(path.split("-")[1]) - resume_global_step = global_step * args.gradient_accumulation_steps + resume_global_step = global_step * gradient_accumulation_steps first_epoch = resume_global_step // num_update_steps_per_epoch resume_step = resume_global_step % num_update_steps_per_epoch # Only show the progress bar once on each machine. - progress_bar = tqdm(range(global_step, args.max_train_steps), disable=not accelerator.is_local_main_process) + progress_bar = tqdm(range(global_step, max_train_steps), disable=not accelerator.is_local_main_process) progress_bar.set_description("Steps") # keep original embeddings as reference orig_embeds_params = accelerator.unwrap_model(text_encoder).get_input_embeddings().weight.data.clone() - for epoch in range(first_epoch, args.num_train_epochs): + for epoch in range(first_epoch, num_train_epochs): text_encoder.train() for step, batch in enumerate(train_dataloader): # Skip steps until we reach the resumed step - if args.resume_from_checkpoint and epoch == first_epoch and step < resume_step: - if step % args.gradient_accumulation_steps == 0: + if resume_from_checkpoint and epoch == first_epoch and step < resume_step: + if step % gradient_accumulation_steps == 0: progress_bar.update(1) continue @@ -723,13 +758,13 @@ def do_textual_inversion_training(args:Namespace): if accelerator.sync_gradients: progress_bar.update(1) global_step += 1 - if global_step % args.save_steps == 0: - save_path = os.path.join(args.output_dir, f"learned_embeds-steps-{global_step}.bin") + if global_step % save_steps == 0: + save_path = os.path.join(output_dir, f"learned_embeds-steps-{global_step}.bin") save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) - if global_step % args.checkpointing_steps == 0: + if global_step % checkpointing_steps == 0: if accelerator.is_main_process: - save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") + save_path = os.path.join(output_dir, f"checkpoint-{global_step}") accelerator.save_state(save_path) logger.info(f"Saved state to {save_path}") @@ -737,17 +772,17 @@ def do_textual_inversion_training(args:Namespace): progress_bar.set_postfix(**logs) accelerator.log(logs, step=global_step) - if global_step >= args.max_train_steps: + if global_step >= max_train_steps: break # Create the pipeline using using the trained modules and save it. accelerator.wait_for_everyone() if accelerator.is_main_process: - if args.push_to_hub and args.only_save_embeds: + if push_to_hub and only_save_embeds: logger.warn("Enabling full model saving because --push_to_hub=True was specified.") save_full_model = True else: - save_full_model = not args.only_save_embeds + save_full_model = not only_save_embeds if save_full_model: pipeline = StableDiffusionPipeline.from_pretrained( pretrained_model_name_or_path, @@ -757,12 +792,12 @@ def do_textual_inversion_training(args:Namespace): tokenizer=tokenizer, **pipeline_args, ) - pipeline.save_pretrained(args.output_dir) + pipeline.save_pretrained(output_dir) # Save the newly trained embeddings - save_path = os.path.join(args.output_dir, "learned_embeds.bin") + save_path = os.path.join(output_dir, "learned_embeds.bin") save_progress(text_encoder, placeholder_token_id, accelerator, args, save_path) - if args.push_to_hub: + if push_to_hub: repo.push_to_hub(commit_message="End of training", blocking=False, auto_lfs_prune=True) accelerator.end_training() diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py index 81784f25349..24ce18a0077 100755 --- a/scripts/textual_inversion.py +++ b/scripts/textual_inversion.py @@ -1,9 +1,11 @@ #!/usr/bin/env python # Copyright 2023, Lincoln Stein @lstein - +from ldm.invoke.globals import Globals from ldm.invoke.textual_inversion_training import parse_args, do_textual_inversion_training if __name__ == "__main__": args = parse_args() - do_textual_inversion_training(args) + Globals.root = args.root_dir or Globals.root + kwargs = vars(args) + do_textual_inversion_training(**kwargs) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py index b07b4ede682..79d96a5a437 100755 --- a/scripts/textual_inversion_fe.py +++ b/scripts/textual_inversion_fe.py @@ -4,11 +4,12 @@ import os import sys import curses +import re from ldm.invoke.globals import Globals from omegaconf import OmegaConf from pathlib import Path from typing import List -from argparse import Namespace +import argparse class textualInversionForm(npyscreen.FormMultiPageAction): resolutions = [512, 768, 1024] @@ -22,7 +23,8 @@ def afterEditing(self): def create(self): self.model_names, default = self.get_model_names() - default_token = 'cat-toy' + default_initializer_token = '★' + default_placeholder_token = '' self.model = self.add_widget_intelligent( npyscreen.TitleSelectOne, @@ -31,45 +33,65 @@ def create(self): value=default, max_height=len(self.model_names)+1 ) - self.initializer_token = self.add_widget_intelligent( + self.placeholder_token = self.add_widget_intelligent( npyscreen.TitleText, - name="Initializer Token", - value=default_token, + name="Placeholder token", + value='', ) - self.placeholder_token = self.add_widget_intelligent( + self.nextrely -= 1 + self.nextrelx += 30 + self.prompt_token = self.add_widget_intelligent( + npyscreen.FixedText, + name="Trigger term for use in prompt", + value='', + ) + self.nextrelx -= 30 + self.initializer_token = self.add_widget_intelligent( npyscreen.TitleText, - name="Placeholder Token", - value=f'<{default_token}>' + name="Initializer token", + value=default_initializer_token, + ) + self.learnable_property = self.add_widget_intelligent( + npyscreen.TitleSelectOne, + name="Learnable property", + values=['object','style'], + value=0, + max_height=3, ) self.train_data_dir = self.add_widget_intelligent( npyscreen.TitleFilenameCombo, name='Data Training Directory', select_dir=True, must_exist=True, - value=Path(Globals.root) / 'training-data' / default_token + value=Path(Globals.root) / 'training-data' / default_placeholder_token ) self.output_dir = self.add_widget_intelligent( npyscreen.TitleFilenameCombo, name='Output Destination Directory', select_dir=True, must_exist=False, - value=Path(Globals.root) / 'embeddings' / default_token + value=Path(Globals.root) / 'embeddings' / default_placeholder_token ) self.resolution = self.add_widget_intelligent( npyscreen.TitleSelectOne, - name='Resolution', + name='Image resolution (pixels)', values = self.resolutions, value=0, scroll_exit = True, - max_height=3 + max_height=4, + ) + self.center_crop = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Center crop images before resizing to resolution", + value=False, ) self.train_batch_size = self.add_widget_intelligent( npyscreen.TitleSlider, name='Batch Size', - out_of=10, + out_of=50, step=1, lowest=1, - value=1 + value=16 ) self.gradient_accumulation_steps = self.add_widget_intelligent( npyscreen.TitleSlider, @@ -97,6 +119,11 @@ def create(self): name="Scale learning rate by number GPUs, steps and batch size", value=True ) + self.enable_xformers_memory_efficient_attention = self.add_widget_intelligent( + npyscreen.Checkbox, + name="Use xformers acceleration", + value=False, + ) self.lr_scheduler = self.add_widget_intelligent( npyscreen.TitleSelectOne, name='Learning rate scheduler', @@ -112,19 +139,22 @@ def create(self): lowest=0, value=0 ) - self.initializer_token.when_value_edited = self.initializer_changed + self.placeholder_token.when_value_edited = self.initializer_changed def initializer_changed(self): - new_value = self.initializer_token.value - self.placeholder_token.value = f'<{new_value}>' - self.train_data_dir.value = Path(Globals.root) / 'training-data' / new_value - self.output_dir.value = Path(Globals.root) / 'embeddings' / new_value + placeholder = self.placeholder_token.value + self.prompt_token.value = f'(Trigger by using <{placeholder}> in your prompts)' + self.train_data_dir.value = Path(Globals.root) / 'training-data' / placeholder + self.output_dir.value = Path(Globals.root) / 'embeddings' / placeholder def on_ok(self): if self.validate_field_values(): self.parentApp.setNextForm(None) self.editing = False - self.run_textual_inversion() + self.parentApp.ti_arguments = self.marshall_arguments() + npyscreen.notify('Launching textual inversion training. This will take a while...') + # The module load takes a while, so we do it while the form and message are still up + import ldm.invoke.textual_inversion_training else: self.editing = True @@ -151,36 +181,54 @@ def get_model_names(self)->(List[str],int): defaults = [idx for idx in range(len(model_names)) if 'default' in conf[model_names[idx]]] return (model_names,defaults[0]) - def create_namespace(self): - args = Namespace() + def marshall_arguments(self): + args = dict() # the choices - args.model = self.model_names[self.model.value[0]] - args.resolution = self.resolutions[self.resolution.value[0]] - args.lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]] + args.update( + model = self.model_names[self.model.value[0]], + resolution = self.resolutions[self.resolution.value[0]], + lr_scheduler = self.lr_schedulers[self.lr_scheduler.value[0]] + ) # all the strings for attr in ('initializer_token','placeholder_token','train_data_dir','output_dir','scale_lr'): - setattr(args,attr,getattr(self,attr).value) + args[attr] = getattr(self,attr).value # all the integers for attr in ('train_batch_size','gradient_accumulation_steps', 'max_train_steps','lr_warmup_steps'): - setattr(args,attr,int(getattr(self,attr).value)) + args[attr] = int(getattr(self,attr).value) + # the floats (just one) - args.learning_rate = float(self.learning_rate.value) + args.update( + learning_rate = float(self.learning_rate.value) + ) return args - def run_textual_inversion(self): - npyscreen.notify('Launching textual inversion training. This will take a while...') - from ldm.invoke.textual_inversion_training import do_textual_inversion_training, parse_args - args = parse_args() - args.root_dir = Globals.root - do_textual_inversion_training(args) - class MyApplication(npyscreen.NPSAppManaged): + def __init__(self): + super().__init__() + self.ti_arguments=None + def onStart(self): npyscreen.setTheme(npyscreen.Themes.DefaultTheme) self.main = self.addForm('MAIN', textualInversionForm, name='Textual Inversion Settings') if __name__ == '__main__': - TestApp = MyApplication().run() + parser = argparse.ArgumentParser(description='InvokeAI textual inversion training') + parser.add_argument( + '--root_dir','--root-dir', + type=Path, + default=Globals.root, + help='Path to the invokeai runtime directory', + ) + args = parser.parse_args() + Globals.root = args.root_dir + + myapplication = MyApplication() + myapplication.run() + + from ldm.invoke.textual_inversion_training import do_textual_inversion_training + if args := myapplication.ti_arguments: + do_textual_inversion_training(**args) + diff --git a/setup.py b/setup.py index 3324a0184f4..06e53057e3b 100644 --- a/setup.py +++ b/setup.py @@ -85,7 +85,8 @@ def list_files(directory): 'Topic :: Scientific/Engineering :: Image Processing', ], scripts = ['scripts/invoke.py','scripts/configure_invokeai.py', 'scripts/sd-metadata.py', - 'scripts/preload_models.py', 'scripts/images2prompt.py','scripts/merge_embeddings.py' + 'scripts/preload_models.py', 'scripts/images2prompt.py','scripts/merge_embeddings.py', + 'scripts/textual_inversion_fe.py','scripts/textual_inversion.py' ], data_files=FRONTEND_FILES, ) From 690365a82d647a8ea32930cff8c12bf66d17d4b9 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 3 Jan 2023 23:45:20 -0500 Subject: [PATCH 123/199] fix crash in convert_and_import() - This corrects a "local variable referenced before assignment" error in model_manager.convert_and_import() --- ldm/invoke/CLI.py | 2 +- ldm/invoke/model_manager.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 27986609a83..d347ff93399 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -610,7 +610,7 @@ def optimize_model(ckpt_path:str, gen, opt, completer): print(f'** {basename} is already optimized. Will not overwrite.') return new_config = gen.model_manager.convert_and_import(ckpt_path, diffuser_path) - if write_config_file(opt.conf, gen, basename, new_config, clobber=False): + if new_config and write_config_file(opt.conf, gen, basename, new_config, clobber=False): completer.add_model(basename) def del_config(model_name:str, gen, opt, completer): diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index a217f567f0e..ac39591d276 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -582,6 +582,7 @@ def convert_and_import(self, ckpt_path:Path, diffuser_path:Path)->dict: Convert a legacy ckpt weights file to diffuser model and import into models.yaml. ''' + new_config = None from ldm.invoke.ckpt_to_diffuser import convert_ckpt_to_diffuser import transformers if diffuser_path.exists(): From 13129fd56b129fa8748de7513ddc40e8352526eb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 3 Jan 2023 23:54:50 -0500 Subject: [PATCH 124/199] potential workaround for no 'state_dict' key error - As reported in https://github.com/huggingface/diffusers/issues/1876 --- ldm/invoke/ckpt_to_diffuser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index 7d8510f3166..f04c68a814e 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -798,7 +798,10 @@ def convert_ckpt_to_diffuser(checkpoint_path:str, else: print("global_step key not found in model") global_step = None - checkpoint = checkpoint["state_dict"] + + # sometimes there is a state_dict key and sometimes not + if 'state_dict' in checkpoint: + checkpoint = checkpoint["state_dict"] upcast_attention = False if original_config_file is None: From 8bcf4b7d5f21e77bc1daf58ed03aec8f4e9cd6aa Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 4 Jan 2023 00:02:45 -0500 Subject: [PATCH 125/199] create TI output dir if needed --- scripts/textual_inversion_fe.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py index 79d96a5a437..9a8363a500b 100755 --- a/scripts/textual_inversion_fe.py +++ b/scripts/textual_inversion_fe.py @@ -70,7 +70,7 @@ def create(self): name='Output Destination Directory', select_dir=True, must_exist=False, - value=Path(Globals.root) / 'embeddings' / default_placeholder_token + value=Path(Globals.root) / 'trained-checkpoints' / default_placeholder_token ) self.resolution = self.add_widget_intelligent( npyscreen.TitleSelectOne, @@ -230,5 +230,6 @@ def onStart(self): from ldm.invoke.textual_inversion_training import do_textual_inversion_training if args := myapplication.ti_arguments: + os.makedirs(args['output_dir'],exist_ok=True) do_textual_inversion_training(**args) From 2ac2f8120ccec323a97fb08d6dd850eaf2d02d66 Mon Sep 17 00:00:00 2001 From: Wybartel-luxmc <37852506+Wybartel-luxmc@users.noreply.github.com> Date: Wed, 4 Jan 2023 11:25:59 -0800 Subject: [PATCH 126/199] Update environment-lin-cuda.yml (#2159) Fixing line 42 to be the proper order to define the transformers requirement: ~= instead of =~ --- environments-and-requirements/environment-lin-cuda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environments-and-requirements/environment-lin-cuda.yml b/environments-and-requirements/environment-lin-cuda.yml index f48f2f3110b..82b3a8a3703 100644 --- a/environments-and-requirements/environment-lin-cuda.yml +++ b/environments-and-requirements/environment-lin-cuda.yml @@ -39,7 +39,7 @@ dependencies: - test-tube>=0.7.5 - torch-fidelity==0.3.0 - torchmetrics==0.7.0 - - transformers=~4.25 + - transformers~=4.25 - git+https://github.com/openai/CLIP.git@main#egg=clip - git+https://github.com/Birch-san/k-diffusion.git@mps#egg=k-diffusion - git+https://github.com/invoke-ai/clipseg.git@relaxed-python-requirement#egg=clipseg From 34626f35f2ec57f3f1fd902adb2ebdc5eced0e48 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 4 Jan 2023 14:07:39 -0800 Subject: [PATCH 127/199] diffusers: update sampler-to-scheduler mapping based on https://github.com/huggingface/diffusers/issues/277#issuecomment-1371428672 --- ldm/generate.py | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 70a887a693a..4267eb1b4b2 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -13,20 +13,13 @@ import traceback import cv2 +import diffusers import numpy as np import skimage import torch import transformers from PIL import Image, ImageOps -from diffusers import HeunDiscreteScheduler from diffusers.pipeline_utils import DiffusionPipeline -from diffusers.schedulers.scheduling_ddim import DDIMScheduler -from diffusers.schedulers.scheduling_dpmsolver_multistep import DPMSolverMultistepScheduler -from diffusers.schedulers.scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler -from diffusers.schedulers.scheduling_euler_discrete import EulerDiscreteScheduler -from diffusers.schedulers.scheduling_ipndm import IPNDMScheduler -from diffusers.schedulers.scheduling_lms_discrete import LMSDiscreteScheduler -from diffusers.schedulers.scheduling_pndm import PNDMScheduler from omegaconf import OmegaConf from pytorch_lightning import seed_everything, logging @@ -1008,19 +1001,21 @@ def _set_sampler_legacy(self): def _set_scheduler(self): default = self.model.scheduler + # See https://github.com/huggingface/diffusers/issues/277#issuecomment-1371428672 scheduler_map = dict( - ddim=DDIMScheduler, - dpmpp_2=DPMSolverMultistepScheduler, - ipndm=IPNDMScheduler, + ddim=diffusers.DDIMScheduler, + dpmpp_2=diffusers.DPMSolverMultistepScheduler, + k_dpm_2=diffusers.KDPM2DiscreteScheduler, + k_dpm_2_a=diffusers.KDPM2AncestralDiscreteScheduler, # DPMSolverMultistepScheduler is technically not `k_` anything, as it is neither # the k-diffusers implementation nor included in EDM (Karras 2022), but we can # provide an alias for compatibility. - k_dpmpp_2=DPMSolverMultistepScheduler, - k_euler=EulerDiscreteScheduler, - k_euler_a=EulerAncestralDiscreteScheduler, - k_heun=HeunDiscreteScheduler, - k_lms=LMSDiscreteScheduler, - plms=PNDMScheduler, + k_dpmpp_2=diffusers.DPMSolverMultistepScheduler, + k_euler=diffusers.EulerDiscreteScheduler, + k_euler_a=diffusers.EulerAncestralDiscreteScheduler, + k_heun=diffusers.HeunDiscreteScheduler, + k_lms=diffusers.LMSDiscreteScheduler, + plms=diffusers.PNDMScheduler, ) if self.sampler_name in scheduler_map: From 7d65152a73f8244a6fa3c8df9c3e3c5666506fab Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Wed, 4 Jan 2023 19:00:37 -0500 Subject: [PATCH 128/199] improve user exp for ckt to diffusers conversion - !optimize_models command now operates on an existing ckpt file entry in models.yaml - replaces existing entry, rather than adding a new one - offers to delete the ckpt file after conversion --- ldm/invoke/CLI.py | 54 ++++++++++++++++++++++++++----------- ldm/invoke/model_manager.py | 27 ++++++++++++++----- ldm/invoke/readline.py | 34 +++++++++++++---------- 3 files changed, 78 insertions(+), 37 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index d347ff93399..ec51b47637d 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -168,7 +168,7 @@ def main_loop(gen, opt): # The readline completer reads history from the .dream_history file located in the # output directory specified at the time of script launch. We do not currently support # changing the history file midstream when the output directory is changed. - completer = get_completer(opt, models=list(model_config.keys())) + completer = get_completer(opt, models=gen.model_manager.list_models()) set_default_output_dir(opt, completer) add_embedding_terms(gen, completer) output_cntr = completer.get_current_history_length()+1 @@ -474,9 +474,9 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: elif command.startswith('!optimize'): path = shlex.split(command) if len(path) < 2: - print('** please provide a path to a .ckpt file') - elif not os.path.exists(path[1]): - print(f'** {path[1]}: file not found') + print('** please provide an installed model name') + elif not path[1] in gen.model_manager.list_models(): + print(f'** {path[1]}: model not found') else: optimize_model(path[1], gen, opt, completer) completer.add_history(command) @@ -600,18 +600,41 @@ def add_weights_to_config(model_path:str, gen, opt, completer): make_default = input('Make this the default model? [n] ') in ('y','Y') if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default): - completer.add_model(model_name) + completer.models(gen.model_manager.list_models()) + +def optimize_model(model_name:str, gen, opt, completer): + manager = gen.model_manager + if not (model_info := manager.model_info(model_name)): + print(f'** unknown model: {model_name}') + return + ckpt_path = Path(model_info['weights']) + if not ckpt_path.is_absolute(): + ckpt_path = Path(Globals.root,ckpt_path) -def optimize_model(ckpt_path:str, gen, opt, completer): - ckpt_path = Path(ckpt_path) basename = ckpt_path.stem diffuser_path = Path(Globals.root, 'models','optimized-ckpts',basename) if diffuser_path.exists(): - print(f'** {basename} is already optimized. Will not overwrite.') + print(f'** {model_name} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.') + return + + new_config = gen.model_manager.convert_and_import( + ckpt_path, + diffuser_path, + model_name=model_name, + model_description=model_info['description'], + commit_to_conf=opt.conf, + ) + if not new_config: return - new_config = gen.model_manager.convert_and_import(ckpt_path, diffuser_path) - if new_config and write_config_file(opt.conf, gen, basename, new_config, clobber=False): - completer.add_model(basename) + + response = input(f'OK to delete original .ckpt file at ({ckpt_path} ? [n] ') + if response.startswith(('y','Y')): + ckpt_path.unlink(missing_ok=True) + print(f'{ckpt_path} deleted') + + completer.update_models(gen.model_manager.list_models()) + if input(f'Load optimized model {model_name}? [y] ') not in ('n','N'): + gen.set_model(model_name) def del_config(model_name:str, gen, opt, completer): current_model = gen.model_name @@ -621,7 +644,7 @@ def del_config(model_name:str, gen, opt, completer): gen.model_manager.del_model(model_name) gen.model_manager.commit(opt.conf) print(f'** {model_name} deleted') - completer.del_model(model_name) + completer.update_models(gen.model_manager.list_models()) def edit_config(model_name:str, gen, opt, completer): config = gen.model_manager.config @@ -643,15 +666,15 @@ def edit_config(model_name:str, gen, opt, completer): completer.complete_extensions(None) write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default) -def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False): +def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False)->bool: current_model = gen.model_name - op = 'modify' if clobber else 'import' + op = 'write' if clobber else 'import' print('\n>> New configuration:') if make_default: new_config['default'] = True print(yaml.dump({model_name:new_config})) - if input(f'OK to {op} [n]? ') not in ('y','Y'): + if input(f'OK to {op} [y]? ').startswith(('n','N')): return False try: @@ -672,7 +695,6 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak gen.model_manager.set_default_model(model_name) gen.model_manager.commit(conf_path) - do_switch = input(f'Keep model loaded? [y]') if len(do_switch)==0 or do_switch[0] in ('y','Y'): pass diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index ac39591d276..1b8c3e81170 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -16,6 +16,7 @@ import time import traceback import warnings +import shutil from pathlib import Path from typing import Union, Any from ldm.util import download_with_progress_bar @@ -156,6 +157,7 @@ def list_models(self) -> dict: Return a dict of models in the format: { model_name1: {'status': ('active'|'cached'|'not loaded'), 'description': description, + 'format': ('ckpt'|'diffusers'|'vae'), }, model_name2: { etc } Please use model_manager.models() to get all the model names, @@ -184,7 +186,7 @@ def print_models(self) -> None: for name in models: if models[name]['format'] == 'vae': continue - line = f'{name:25s} {models[name]["status"]:>10s} {models[name]["description"]}' + line = f'{name:25s} {models[name]["status"]:>10s} {models[name]["format"]:10s} {models[name]["description"]}' if models[name]['status'] == 'active': line = f'\033[1m{line}\033[0m' print(line) @@ -577,7 +579,13 @@ def autoconvert_weights( self.convert_and_import(ckpt, ckpt_files[ckpt]) self.commit(conf_path) - def convert_and_import(self, ckpt_path:Path, diffuser_path:Path)->dict: + def convert_and_import(self, + ckpt_path:Path, + diffuser_path:Path, + model_name=None, + model_description=None, + commit_to_conf:Path=None, + )->dict: ''' Convert a legacy ckpt weights file to diffuser model and import into models.yaml. @@ -589,25 +597,30 @@ def convert_and_import(self, ckpt_path:Path, diffuser_path:Path)->dict: print(f'ERROR: The path {str(diffuser_path)} already exists. Please move or remove it and try again.') return - print(f'>> {ckpt_path.name}: optimizing (30-60s).') + model_name = model_name or diffuser_path.name + model_description = model_description or 'Optimized version of {model_name}' + print(f'>> {model_name}: optimizing (30-60s).') try: - model_name = diffuser_path.name verbosity =transformers.logging.get_verbosity() transformers.logging.set_verbosity_error() - convert_ckpt_to_diffuser(ckpt_path, diffuser_path) + convert_ckpt_to_diffuser(ckpt_path, diffuser_path,extract_ema=True) transformers.logging.set_verbosity(verbosity) print(f'>> Success. Optimized model is now located at {str(diffuser_path)}') print(f'>> Writing new config file entry for {model_name}...',end='') new_config = dict( path=str(diffuser_path), - description=f'Optimized version of {model_name}', + description=model_description, format='diffusers', ) + self.del_model(model_name) self.add_model(model_name, new_config, True) - print('done.') + if commit_to_conf: + self.commit(commit_to_conf) except Exception as e: print(f'** Conversion failed: {str(e)}') traceback.print_exc() + + print('done.') return new_config def del_config(self, model_name:str, gen, opt, completer): diff --git a/ldm/invoke/readline.py b/ldm/invoke/readline.py index 3023d6db971..02fc46cec6d 100644 --- a/ldm/invoke/readline.py +++ b/ldm/invoke/readline.py @@ -67,8 +67,11 @@ '!edit_model', '!del_model', ) +CKPT_MODEL_COMMANDS = ( + '!optimize_model', +) WEIGHT_COMMANDS = ( - '!import_model','!optimize_model', + '!import_model', ) IMG_PATH_COMMANDS = ( '--outdir[=\s]', @@ -91,9 +94,9 @@ text_regexp = '(' + '|'.join(TEXT_PATH_COMMANDS) + ')\s*\S*$' class Completer(object): - def __init__(self, options, models=[]): + def __init__(self, options, models={}): self.options = sorted(options) - self.models = sorted(models) + self.models = models self.seeds = set() self.matches = list() self.default_dir = None @@ -134,6 +137,10 @@ def complete(self, text, state): elif re.match('^'+'|'.join(MODEL_COMMANDS),buffer): self.matches= self._model_completions(text, state) + # looking for a ckpt model + elif re.match('^'+'|'.join(CKPT_MODEL_COMMANDS),buffer): + self.matches= self._model_completions(text, state, ckpt_only=True) + elif re.search(weight_regexp,buffer): self.matches = self._path_completions( text, @@ -242,18 +249,12 @@ def set_line(self,line)->None: self.linebuffer = line readline.redisplay() - def add_model(self,model_name:str)->None: - ''' - add a model name to the completion list - ''' - self.models.append(model_name) - - def del_model(self,model_name:str)->None: + def update_models(self,models:dict)->None: ''' - removes a model name from the completion list + update our list of models ''' - self.models.remove(model_name) - + self.models = models + def _seed_completions(self, text, state): m = re.search('(-S\s?|--seed[=\s]?)(\d*)',text) if m: @@ -294,7 +295,7 @@ def _concept_completions(self, text, state): matches.sort() return matches - def _model_completions(self, text, state): + def _model_completions(self, text, state, ckpt_only=False): m = re.search('(!switch\s+)(\w*)',text) if m: switch = m.groups()[0] @@ -304,6 +305,11 @@ def _model_completions(self, text, state): partial = text matches = list() for s in self.models: + format = self.models[s]['format'] + if format == 'vae': + continue + if ckpt_only and format != 'ckpt': + continue if s.startswith(partial): matches.append(switch+s) matches.sort() From ec981745e7301ad5a007a62c8d600bb5c7cd6e92 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Wed, 4 Jan 2023 19:42:12 -0800 Subject: [PATCH 129/199] web: adapt progress callback to deal with old generator or new diffusers pipeline --- backend/invoke_ai_web_server.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/backend/invoke_ai_web_server.py b/backend/invoke_ai_web_server.py index 3e0301346e0..f41385bad64 100644 --- a/backend/invoke_ai_web_server.py +++ b/backend/invoke_ai_web_server.py @@ -933,9 +933,7 @@ def generate_images( init_img_path = self.get_image_path_from_url(init_img_url) generation_parameters["init_img"] = Image.open(init_img_path).convert('RGB') - def image_progress(progress_state: PipelineIntermediateState): - step = progress_state.step - sample = progress_state.latents + def image_progress(sample, step): if self.canceled.is_set(): raise CanceledException @@ -1206,9 +1204,16 @@ def image_done(image, seed, first_seed, attention_maps_image=None): print(generation_parameters) + def diffusers_step_callback_adapter(*cb_args, **kwargs): + if isinstance(cb_args[0], PipelineIntermediateState): + progress_state: PipelineIntermediateState = cb_args[0] + return image_progress(progress_state.latents, progress_state.step) + else: + return image_progress(*cb_args, **kwargs) + self.generate.prompt2image( **generation_parameters, - step_callback=image_progress, + step_callback=diffusers_step_callback_adapter, image_callback=image_done ) From fb178d52c94a5b25e93ed6a5acaf19297885b50c Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 5 Jan 2023 15:10:09 -0500 Subject: [PATCH 130/199] clean-up model_manager code - add_model() verified to work for .ckpt local paths, .ckpt remote URLs, diffusers local paths, and diffusers repo_ids - convert_and_import() verified to work for local and remove .ckpt files --- ldm/invoke/CLI.py | 146 +++++++++++++++++++++++------------- ldm/invoke/model_manager.py | 16 ++-- 2 files changed, 104 insertions(+), 58 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index ec51b47637d..be1a00311fb 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -463,11 +463,9 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: elif command.startswith('!import'): path = shlex.split(command) if len(path) < 2: - print('** please provide a path to a .ckpt or .vae model file') - elif not os.path.exists(path[1]): - print(f'** {path[1]}: file not found') + print('** please provide (1) a URL to a .ckpt file to import; (2) a local path to a .ckpt file; or (3) a diffusers repository id in the form stabilityai/stable-diffusion-2-1') else: - add_weights_to_config(path[1], gen, opt, completer) + import_model(path[1], gen, opt, completer) completer.add_history(command) operation = None @@ -548,59 +546,101 @@ def set_default_output_dir(opt:Args, completer:Completer): completer.set_default_dir(opt.outdir) -def add_weights_to_config(model_path:str, gen, opt, completer): - print(f'>> Model import in process. Please enter the values needed to configure this model:') - print() +def import_model(model_path:str, gen, opt, completer): + ''' + model_path can be (1) a URL to a .ckpt file; (2) a local .ckpt file path; or + (3) a huggingface repository id + ''' + model_name = None + + if model_path.startswith('http') or os.path.exists(model_path): + model_name = import_ckpt_model(model_path, gen, opt, completer) + elif re.match('^[\w-]+/[\w-]+$',model_path): + model_name = import_diffuser_model(model_path, gen, opt, completer) + else: + print(f'** {model_path} is neither the path to a .ckpt file nor a diffusers repository id. Can\'t import.') - new_config = {} - new_config['weights'] = model_path - new_config['format'] = 'ckpt' + if not model_name: + return + + if not _verify_load(model_name, gen): + print(f'** model failed to load. Discarding configuration entry') + gen.model_manager.del_model(model_name) + return + + if input('Make this the default model? [n] ') in ('y','Y'): + gen.model_manager.set_default_model(model_name) - done = False - while not done: - model_name = input('Short name for this model: ') - if not re.match('^[\w._-]+$',model_name): - print('** model name must contain only words, digits and the characters [._-] **') - else: - done = True - new_config['description'] = input('Description of this model: ') + gen.model_manager.commit(opt.conf) + completer.update_models(gen.model_manager.list_models()) + print(f'>> {model_name} successfully installed') + +def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: + manager = gen.model_manager + model_name, model_description = _get_model_name_and_desc(manager) + + if not manager.import_diffuser_model( + path_or_repo, + model_name = model_name, + description = model_description): + print(f'** model failed to import') + return None + return model_name + +def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: + manager = gen.model_manager + model_name, model_description = _get_model_name_and_desc(manager) + config_file = None completer.complete_extensions(('.yaml','.yml')) completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml' - done = False while not done: - new_config['config'] = input('Configuration file for this model: ') - done = os.path.exists(new_config['config']) + config_file = input('Configuration file for this model: ') + done = os.path.exists(config_file) + completer.complete_extensions(None) - done = False - completer.complete_extensions(('.vae.pt','.vae','.ckpt')) - while not done: - vae = input('VAE autoencoder file for this model [None]: ') - if os.path.exists(vae): - new_config['vae'] = vae - done = True - else: - done = len(vae)==0 + if not manager.import_ckpt_model( + path_or_url, + config = config_file, + model_name = model_name, + model_description = model_description + ): + print(f'** model failed to import') + return None - completer.complete_extensions(None) + return model_name - for field in ('width','height'): - done = False - while not done: - try: - completer.linebuffer = '512' - value = int(input(f'Default image {field}: ')) - assert value >= 64 and value <= 2048 - new_config[field] = value - done = True - except: - print('** Please enter a valid integer between 64 and 2048') +def _verify_load(model_name:str, gen)->bool: + print('>> Verifying that new model loads...') + current_model = gen.model_name + if not gen.model_manager.get_model(model_name): + return False + do_switch = input(f'Keep model loaded? [y] ') + if len(do_switch)>0 and do_switch[0] not in ('y','Y'): + gen.set_model(model_name) + else: + print('>> Restoring previous model') + gen.set_model(current_model) + return True - make_default = input('Make this the default model? [n] ') in ('y','Y') +def _get_model_name_and_desc(model_manager): + model_name = None + model_description = None + existing_models = model_manager.list_models() - if write_config_file(opt.conf, gen, model_name, new_config, make_default=make_default): - completer.models(gen.model_manager.list_models()) + done = False + while not done: + model_name = input('Short name for this model: ') + if not re.match('^[\w._-]+$',model_name): + print('** model name must contain only words, digits and the characters [._-] **') + elif model_name in existing_models: + print(f'** the name {model_name} is already in use. Pick another.') + else: + done = True + model_description = input('Description for this model: ') + + return model_name, model_description def optimize_model(model_name:str, gen, opt, completer): manager = gen.model_manager @@ -627,15 +667,15 @@ def optimize_model(model_name:str, gen, opt, completer): if not new_config: return - response = input(f'OK to delete original .ckpt file at ({ckpt_path} ? [n] ') - if response.startswith(('y','Y')): - ckpt_path.unlink(missing_ok=True) - print(f'{ckpt_path} deleted') - completer.update_models(gen.model_manager.list_models()) if input(f'Load optimized model {model_name}? [y] ') not in ('n','N'): gen.set_model(model_name) + response = input(f'Delete the original .ckpt file at ({ckpt_path} ? [n] ') + if response.startswith(('y','Y')): + ckpt_path.unlink(missing_ok=True) + print(f'{ckpt_path} deleted') + def del_config(model_name:str, gen, opt, completer): current_model = gen.model_name if model_name == current_model: @@ -646,6 +686,9 @@ def del_config(model_name:str, gen, opt, completer): print(f'** {model_name} deleted') completer.update_models(gen.model_manager.list_models()) + +# NOTE: edit_config() must be rewritten for diffusers +# We should only allow user to change: name, description, default def edit_config(model_name:str, gen, opt, completer): config = gen.model_manager.config @@ -666,6 +709,7 @@ def edit_config(model_name:str, gen, opt, completer): completer.complete_extensions(None) write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default) +# NOTE: delete this whole function def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False)->bool: current_model = gen.model_name @@ -695,7 +739,7 @@ def write_config_file(conf_path, gen, model_name, new_config, clobber=False, mak gen.model_manager.set_default_model(model_name) gen.model_manager.commit(conf_path) - do_switch = input(f'Keep model loaded? [y]') + do_switch = input(f'Keep model loaded? [y] ') if len(do_switch)==0 or do_switch[0] in ('y','Y'): pass else: diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 1b8c3e81170..467aaac2f60 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -99,7 +99,7 @@ def get_model(self, model_name:str): assert self.current_model,'** FATAL: no current model to restore to' print(f'** restoring {self.current_model}') self.get_model(self.current_model) - return + return None self.current_model = model_name self._push_newest_model(model_name) @@ -150,7 +150,9 @@ def is_legacy(self,model_name:str)->bool: Return true if this is a legacy (.ckpt) model ''' info = self.model_info(model_name) - return info['format']=='ckpt' if info else False + if 'weights' in info and info['weights'].endswith('.ckpt'): + return True + return False def list_models(self) -> dict: ''' @@ -507,7 +509,7 @@ def import_ckpt_model(self, weights:Union[str,Path], config:Union[str,Path]='configs/stable-diffusion/v1-inference.yaml', model_name:str=None, - description:str=None, + model_description:str=None, commit_to_conf:Path=None, )->bool: ''' @@ -532,12 +534,12 @@ def import_ckpt_model(self, if config_path is None or not config_path.exists(): return False - model_name = model_name or Path(basename).stem - description = description or f'imported stable diffusion weights file {model_name}' + model_name = model_name or Path(weights).stem + model_description = model_description or f'imported stable diffusion weights file {model_name}' new_config = dict( weights=str(weights_path), config=str(config_path), - description=description, + description=model_description, format='ckpt', width=512, height=512 @@ -668,7 +670,7 @@ def commit(self,config_file_path:str) -> None: ''' yaml_str = OmegaConf.to_yaml(self.config) if not os.path.isabs(config_file_path): - config_file_path = os.path.normpath(os.path.join(Globals.root,opt.conf)) + config_file_path = os.path.normpath(os.path.join(Globals.root,config_file_path)) tmpfile = os.path.join(os.path.dirname(config_file_path),'new_config.tmp') with open(tmpfile, 'w') as outfile: outfile.write(self.preamble()) From 61d8be1b5597e48394c279b666aa68a9fd41875d Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 5 Jan 2023 20:54:10 -0500 Subject: [PATCH 131/199] handle edge cases for import_model() and convert_model() --- ldm/invoke/CLI.py | 172 +++++++++++++++++++----------------- ldm/invoke/args.py | 7 +- ldm/invoke/model_manager.py | 2 +- ldm/invoke/readline.py | 5 +- 4 files changed, 101 insertions(+), 85 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index be1a00311fb..eb01b2af78e 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -469,6 +469,18 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: completer.add_history(command) operation = None + elif command.startswith('!convert'): + path = shlex.split(command) + if len(path) < 2: + print('** please provide the path to a .ckpt or .safetensors model') + elif not os.path.exists(path[1]): + print(f'** {path[1]}: model not found') + else: + optimize_model(path[1], gen, opt, completer) + completer.add_history(command) + operation = None + + elif command.startswith('!optimize'): path = shlex.split(command) if len(path) < 2: @@ -553,7 +565,7 @@ def import_model(model_path:str, gen, opt, completer): ''' model_name = None - if model_path.startswith('http') or os.path.exists(model_path): + if model_path.startswith(('http:','https:','ftp:')) or os.path.exists(model_path): model_name = import_ckpt_model(model_path, gen, opt, completer) elif re.match('^[\w-]+/[\w-]+$',model_path): model_name = import_diffuser_model(model_path, gen, opt, completer) @@ -577,7 +589,14 @@ def import_model(model_path:str, gen, opt, completer): def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: manager = gen.model_manager - model_name, model_description = _get_model_name_and_desc(manager) + default_name = Path(path_or_url).stem + default_description = f'Imported model {default_name}' + model_name, model_description = _get_model_name_and_desc( + manager, + completer, + model_name=default_name, + model_description=default_description + ) if not manager.import_diffuser_model( path_or_repo, @@ -589,7 +608,14 @@ def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: manager = gen.model_manager - model_name, model_description = _get_model_name_and_desc(manager) + default_name = Path(path_or_url).stem + default_description = f'Imported model {default_name}' + model_name, model_description = _get_model_name_and_desc( + manager, + completer, + model_name=default_name, + model_description=default_description + ) config_file = None completer.complete_extensions(('.yaml','.yml')) @@ -617,51 +643,56 @@ def _verify_load(model_name:str, gen)->bool: if not gen.model_manager.get_model(model_name): return False do_switch = input(f'Keep model loaded? [y] ') - if len(do_switch)>0 and do_switch[0] not in ('y','Y'): + if len(do_switch)==0 or do_switch[0] in ('y','Y'): gen.set_model(model_name) else: print('>> Restoring previous model') gen.set_model(current_model) return True -def _get_model_name_and_desc(model_manager): - model_name = None - model_description = None - existing_models = model_manager.list_models() - - done = False - while not done: - model_name = input('Short name for this model: ') - if not re.match('^[\w._-]+$',model_name): - print('** model name must contain only words, digits and the characters [._-] **') - elif model_name in existing_models: - print(f'** the name {model_name} is already in use. Pick another.') - else: - done = True - model_description = input('Description for this model: ') - +def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_description:str=''): + model_name = _get_model_name(model_manager.list_models(),completer,model_name) + completer.linebuffer = model_description + model_description = input(f'Description for this model [{model_description}]: ') or model_description return model_name, model_description -def optimize_model(model_name:str, gen, opt, completer): +def optimize_model(model_name_or_path:str, gen, opt, completer): manager = gen.model_manager - if not (model_info := manager.model_info(model_name)): - print(f'** unknown model: {model_name}') + ckpt_path = None + + if (model_info := manager.model_info(model_name_or_path)): + if 'weights' in model_info: + ckpt_path = Path(model_info['weights']) + model_name = model_name_or_path + model_description = model_info['description'] + else: + printf('** {model_name_or_path} is not a legacy .ckpt weights file') + return + elif os.path.exists(model_name_or_path): + ckpt_path = Path(model_name_or_path) + model_name,model_description = _get_model_name_and_desc( + manager, + completer, + ckpt_path.stem, + f'Converted model {ckpt_path.stem}' + ) + else: + print(f'** {model_name_or_path} is neither an existing model nor the path to a .ckpt file') return - ckpt_path = Path(model_info['weights']) + if not ckpt_path.is_absolute(): ckpt_path = Path(Globals.root,ckpt_path) - basename = ckpt_path.stem - diffuser_path = Path(Globals.root, 'models','optimized-ckpts',basename) + diffuser_path = Path(Globals.root, 'models','optimized-ckpts',model_name) if diffuser_path.exists(): - print(f'** {model_name} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.') + print(f'** {model_name_or_path} is already optimized. Will not overwrite. If this is an error, please remove the directory {diffuser_path} and try again.') return new_config = gen.model_manager.convert_and_import( ckpt_path, diffuser_path, model_name=model_name, - model_description=model_info['description'], + model_description=model_description, commit_to_conf=opt.conf, ) if not new_config: @@ -686,65 +717,46 @@ def del_config(model_name:str, gen, opt, completer): print(f'** {model_name} deleted') completer.update_models(gen.model_manager.list_models()) - -# NOTE: edit_config() must be rewritten for diffusers -# We should only allow user to change: name, description, default def edit_config(model_name:str, gen, opt, completer): - config = gen.model_manager.config + current_model = gen.model_name + if model_name == current_model: + print("** Can't edit the active model. !switch to another model first. **") + return - if model_name not in config: + manager = gen.model_manager + if not (info := manager.model_info(model_name)): print(f'** Unknown model {model_name}') return print(f'\n>> Editing model {model_name} from configuration file {opt.conf}') + new_name,new_description = _get_model_name_and_desc(gen.model_manager, + completer, + model_name=model_name, + model_description=info['description'] + ) + info['description'] = new_description + if new_name != model_name: + manager.add_model(new_name,info) + manager.del_model(model_name) + manager.commit(opt.conf) + completer.update_models(manager.list_models()) + print('>> Model successfully updated') + +def _get_model_name(existing_names,completer,default_name:str='')->str: + done = False + completer.linebuffer = default_name + while not done: + model_name = input(f'Short name for this model [{default_name}]: ') + if len(model_name)==0: + model_name = default_name + if not re.match('^[\w._-]+$',model_name): + print('** model name must contain only words, digits and the characters [._-] **') + elif model_name in existing_names: + print(f'** the name {model_name} is already in use. Pick another.') + else: + done = True + return model_name - conf = config[model_name] - new_config = {} - completer.complete_extensions(('.yaml','.yml','.ckpt','.vae.pt')) - for field in ('description', 'weights', 'vae', 'config', 'width', 'height', 'format'): - completer.linebuffer = str(conf[field]) if field in conf else '' - new_value = input(f'{field}: ') - new_config[field] = int(new_value) if field in ('width','height') else new_value - make_default = input('Make this the default model? [n] ') in ('y','Y') - completer.complete_extensions(None) - write_config_file(opt.conf, gen, model_name, new_config, clobber=True, make_default=make_default) - -# NOTE: delete this whole function -def write_config_file(conf_path, gen, model_name, new_config, clobber=False, make_default=False)->bool: - current_model = gen.model_name - - op = 'write' if clobber else 'import' - print('\n>> New configuration:') - if make_default: - new_config['default'] = True - print(yaml.dump({model_name:new_config})) - if input(f'OK to {op} [y]? ').startswith(('n','N')): - return False - - try: - print('>> Verifying that new model loads...') - gen.model_manager.add_model(model_name, new_config, clobber) - assert gen.set_model(model_name) is not None, 'model failed to load' - except AssertionError as e: - traceback.print_exc() - print(f'** aborting **') - try: - gen.model_manager.del_model(model_name) - except Exception: - pass - return False - - if make_default: - print('making this default') - gen.model_manager.set_default_model(model_name) - - gen.model_manager.commit(conf_path) - do_switch = input(f'Keep model loaded? [y] ') - if len(do_switch)==0 or do_switch[0] in ('y','Y'): - pass - else: - gen.set_model(current_model) - return True def do_textmask(gen, opt, callback): image_path = opt.prompt diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 6b21df47a34..7d160e825ec 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -745,8 +745,11 @@ def _create_dream_cmd_parser(self): *Model manipulation* !models -- list models in configs/models.yaml !switch -- switch to model named - !import_model path/to/weights/file.ckpt -- adds a .ckpt model to your config - !optimize_model path/to/weights/file.ckpt -- converts a .ckpt file model a diffusers model + !import_model /path/to/weights/file.ckpt -- adds a .ckpt model to your config + !import_model http://path_to_model.ckpt -- downloads and adds a .ckpt model to your config + !import_model hakurei/waifu-diffusion -- downloads and adds a diffusers model to your config + !optimize_model -- converts a .ckpt model to a diffusers model + !convert_model /path/to/weights/file.ckpt -- converts a .ckpt file path to a diffusers model !edit_model -- edit a model's description !del_model -- delete a model """ diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 467aaac2f60..e7887ffc2a4 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -693,7 +693,7 @@ def preamble(self) -> str: def _resolve_path(self, source:Union[str,Path], dest_directory:str)->Path: resolved_path = None - if source.startswith('http'): + if source.startswith(('http:','https:','ftp:')): basename = os.path.basename(source) if not os.path.isabs(dest_directory): dest_directory = os.path.join(Globals.root,dest_directory) diff --git a/ldm/invoke/readline.py b/ldm/invoke/readline.py index 02fc46cec6d..f14af0714f5 100644 --- a/ldm/invoke/readline.py +++ b/ldm/invoke/readline.py @@ -24,7 +24,7 @@ readline_available = False IMG_EXTENSIONS = ('.png','.jpg','.jpeg','.PNG','.JPG','.JPEG','.gif','.GIF') -WEIGHT_EXTENSIONS = ('.ckpt','.bae') +WEIGHT_EXTENSIONS = ('.ckpt','.vae','.safetensors') TEXT_EXTENSIONS = ('.txt','.TXT') CONFIG_EXTENSIONS = ('.yaml','.yml') COMMANDS = ( @@ -59,7 +59,7 @@ '--png_compression','-z', '--text_mask','-tm', '!fix','!fetch','!replay','!history','!search','!clear', - '!models','!switch','!import_model','!optimize_model','!edit_model','!del_model', + '!models','!switch','!import_model','!optimize_model','!convert_model','!edit_model','!del_model', '!mask', ) MODEL_COMMANDS = ( @@ -72,6 +72,7 @@ ) WEIGHT_COMMANDS = ( '!import_model', + '!convert_model', ) IMG_PATH_COMMANDS = ( '--outdir[=\s]', From f14dac4ad692927cc023aaf228f3051d027c3e9c Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 5 Jan 2023 21:28:23 -0500 Subject: [PATCH 132/199] add support for safetensor .ckpt files --- ldm/invoke/ckpt_to_diffuser.py | 4 +++- ldm/invoke/model_manager.py | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index f04c68a814e..64fbb0925c7 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -19,7 +19,9 @@ import os import re import torch +from pathlib import Path from ldm.invoke.globals import Globals +from safetensors.torch import load_file try: from omegaconf import OmegaConf @@ -790,7 +792,7 @@ def convert_ckpt_to_diffuser(checkpoint_path:str, upcast_attn:bool=False, ): - checkpoint = torch.load(checkpoint_path) + checkpoint = load_file(checkpoint_path) if Path(checkpoint_path).suffix == '.safetensors' else torch.load(checkpoint_path) # Sometimes models don't have the global_step item if "global_step" in checkpoint: diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index e7887ffc2a4..6c6659e5c79 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -17,11 +17,13 @@ import traceback import warnings import shutil +from safetensors.torch import load_file from pathlib import Path from typing import Union, Any from ldm.util import download_with_progress_bar import torch +import safetensors import transformers from diffusers import AutoencoderKL, logging as dlogging from omegaconf import OmegaConf @@ -300,7 +302,11 @@ def _load_ckpt_model(self, model_name, mconfig): with open(weights,'rb') as f: weight_bytes = f.read() model_hash = self._cached_sha256(weights, weight_bytes) - sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu') + sd = None + if weights.endswith('.safetensors'): + sd = safetensors.torch.load(weight_bytes) + else: + sd = torch.load(io.BytesIO(weight_bytes), map_location='cpu') del weight_bytes # merged models from auto11 merge board are flat for some reason if 'state_dict' in sd: From b1e98199fc73ac42c4f300b6d4a9c6ec6d9842e3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 5 Jan 2023 22:11:41 -0500 Subject: [PATCH 133/199] fix name error --- ldm/invoke/CLI.py | 2 +- ldm/invoke/ckpt_to_diffuser.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index eb01b2af78e..c87fd4560c8 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -589,7 +589,7 @@ def import_model(model_path:str, gen, opt, completer): def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: manager = gen.model_manager - default_name = Path(path_or_url).stem + default_name = Path(path_or_repo).stem default_description = f'Imported model {default_name}' model_name, model_description = _get_model_name_and_desc( manager, diff --git a/ldm/invoke/ckpt_to_diffuser.py b/ldm/invoke/ckpt_to_diffuser.py index 64fbb0925c7..6e8e2194787 100644 --- a/ldm/invoke/ckpt_to_diffuser.py +++ b/ldm/invoke/ckpt_to_diffuser.py @@ -14,6 +14,7 @@ # limitations under the License. # # Adapted for use as a module by Lincoln Stein +# Original file at: https://github.com/huggingface/diffusers/blob/main/scripts/convert_ldm_original_checkpoint_to_diffusers.py """ Conversion script for the LDM checkpoints. """ import os From 282c0ef98a31e86dcc7ac77debd8f8f74b07c16c Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Thu, 5 Jan 2023 22:53:25 -0500 Subject: [PATCH 134/199] code cleanup with pyflake --- ldm/generate.py | 10 +++++----- ldm/invoke/CLI.py | 22 ++++++++-------------- ldm/invoke/args.py | 14 +++++--------- ldm/invoke/model_manager.py | 21 ++++++++++----------- 4 files changed, 28 insertions(+), 39 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 4267eb1b4b2..314fae9775e 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -545,7 +545,7 @@ def process_image(image,seed): print('**Interrupted** Partial results will be returned.') else: raise KeyboardInterrupt - except RuntimeError as e: + except RuntimeError: print(traceback.format_exc(), file=sys.stderr) print('>> Could not generate image.') @@ -557,7 +557,7 @@ def process_image(image,seed): ) if self._has_cuda(): print( - f'>> Max VRAM used for this generation:', + '>> Max VRAM used for this generation:', '%4.2fG.' % (torch.cuda.max_memory_allocated() / 1e9), 'Current VRAM utilization:', '%4.2fG' % (torch.cuda.memory_allocated() / 1e9), @@ -567,7 +567,7 @@ def process_image(image,seed): self.session_peakmem, torch.cuda.max_memory_allocated() ) print( - f'>> Max VRAM used since script start: ', + '>> Max VRAM used since script start: ', '%4.2fG' % (self.session_peakmem / 1e9), ) return results @@ -643,7 +643,7 @@ def apply_postprocessor( try: extend_instructions[direction]=int(pixels) except ValueError: - print(f'** invalid extension instruction. Use ..., as in "top 64 left 128 right 64 bottom 64"') + print('** invalid extension instruction. Use ..., as in "top 64 left 128 right 64 bottom 64"') opt.seed = seed opt.prompt = prompt @@ -691,7 +691,7 @@ def apply_postprocessor( ) elif tool is None: - print(f'* please provide at least one postprocessing option, such as -G or -U') + print('* please provide at least one postprocessing option, such as -G or -U') return None else: print(f'* postprocessing tool {tool} is not yet supported') diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index c87fd4560c8..c88b0927d42 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -2,11 +2,7 @@ import re import sys import shlex -import copy -import warnings -import time import traceback -import yaml from ldm.invoke.globals import Globals from ldm.generate import Generate @@ -16,7 +12,6 @@ from ldm.invoke.pngwriter import PngWriter, retrieve_metadata, write_metadata from ldm.invoke.image_util import make_grid from ldm.invoke.log import write_log -from omegaconf import OmegaConf from pathlib import Path import pyparsing import ldm.invoke @@ -53,8 +48,8 @@ def main(): if not args.conf: if not os.path.exists(os.path.join(Globals.root,'configs','models.yaml')): print(f"\n** Error. The file {os.path.join(Globals.root,'configs','models.yaml')} could not be found.") - print(f'** Please check the location of your invokeai directory and use the --root_dir option to point to the correct path.') - print(f'** This script will now exit.') + print('** Please check the location of your invokeai directory and use the --root_dir option to point to the correct path.') + print('** This script will now exit.') sys.exit(-1) print(f'>> {ldm.invoke.__app_name__} {ldm.invoke.__version__}') @@ -163,7 +158,6 @@ def main_loop(gen, opt): doneAfterInFile = infile is not None path_filter = re.compile(r'[<>:"/\\|?*]') last_results = list() - model_config = OmegaConf.load(opt.conf) # The readline completer reads history from the .dream_history file located in the # output directory specified at the time of script launch. We do not currently support @@ -576,7 +570,7 @@ def import_model(model_path:str, gen, opt, completer): return if not _verify_load(model_name, gen): - print(f'** model failed to load. Discarding configuration entry') + print('** model failed to load. Discarding configuration entry') gen.model_manager.del_model(model_name) return @@ -602,7 +596,7 @@ def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: path_or_repo, model_name = model_name, description = model_description): - print(f'** model failed to import') + print('** model failed to import') return None return model_name @@ -632,7 +626,7 @@ def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: model_name = model_name, model_description = model_description ): - print(f'** model failed to import') + print('** model failed to import') return None return model_name @@ -642,7 +636,7 @@ def _verify_load(model_name:str, gen)->bool: current_model = gen.model_name if not gen.model_manager.get_model(model_name): return False - do_switch = input(f'Keep model loaded? [y] ') + do_switch = input('Keep model loaded? [y] ') if len(do_switch)==0 or do_switch[0] in ('y','Y'): gen.set_model(model_name) else: @@ -666,7 +660,7 @@ def optimize_model(model_name_or_path:str, gen, opt, completer): model_name = model_name_or_path model_description = model_info['description'] else: - printf('** {model_name_or_path} is not a legacy .ckpt weights file') + print(f'** {model_name_or_path} is not a legacy .ckpt weights file') return elif os.path.exists(model_name_or_path): ckpt_path = Path(model_name_or_path) @@ -867,7 +861,7 @@ def prepare_image_metadata( except KeyError as e: print(f'** The filename format contains an unknown key \'{e.args[0]}\'. Will use \'{{prefix}}.{{seed}}.png\' instead') filename = f'{prefix}.{seed}.png' - except IndexError as e: + except IndexError: print(f'** The filename format is broken or complete. Will use \'{{prefix}}.{{seed}}.png\' instead') filename = f'{prefix}.{seed}.png' diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 7d160e825ec..9ad12c53dbe 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -91,10 +91,6 @@ import re import shlex import sys -import copy -import base64 -import functools -import warnings import ldm.invoke import ldm.invoke.pngwriter @@ -279,7 +275,7 @@ def dream_prompt_str(self,**kwargs): switches.append(f'-I {a["init_img"]}') switches.append(f'-A {a["sampler_name"]}') if a['fit']: - switches.append(f'--fit') + switches.append('--fit') if a['init_mask'] and len(a['init_mask'])>0: switches.append(f'-M {a["init_mask"]}') if a['init_color'] and len(a['init_color'])>0: @@ -287,7 +283,7 @@ def dream_prompt_str(self,**kwargs): if a['strength'] and a['strength']>0: switches.append(f'-f {a["strength"]}') if a['inpaint_replace']: - switches.append(f'--inpaint_replace') + switches.append('--inpaint_replace') if a['text_mask']: switches.append(f'-tm {" ".join([str(u) for u in a["text_mask"]])}') else: @@ -1090,7 +1086,7 @@ def _create_dream_cmd_parser(self): return parser def format_metadata(**kwargs): - print(f'format_metadata() is deprecated. Please use metadata_dumps()') + print('format_metadata() is deprecated. Please use metadata_dumps()') return metadata_dumps(kwargs) def metadata_dumps(opt, @@ -1157,7 +1153,7 @@ def metadata_dumps(opt, rfc_dict.pop('strength') if len(seeds)==0 and opt.seed: - seeds=[seed] + seeds=[opt.seed] if opt.grid: images = [] @@ -1228,7 +1224,7 @@ def metadata_loads(metadata) -> list: opt = Args() opt._cmd_switches = Namespace(**image) results.append(opt) - except Exception as e: + except Exception: import sys, traceback print('>> could not read metadata',file=sys.stderr) print(traceback.format_exc(), file=sys.stderr) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 6c6659e5c79..73fdc0d74c9 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -16,8 +16,7 @@ import time import traceback import warnings -import shutil -from safetensors.torch import load_file +import safetensors.torch from pathlib import Path from typing import Union, Any from ldm.util import download_with_progress_bar @@ -213,7 +212,7 @@ def add_model(self, model_name:str, model_attributes:dict, clobber:bool=False) - attributes are incorrect or the model name is missing. ''' omega = self.config - assert 'format' in model_attributes, f'missing required field "format"' + assert 'format' in model_attributes, 'missing required field "format"' if model_attributes['format']=='diffusers': assert 'description' in model_attributes, 'required field "description" is missing' assert 'path' in model_attributes or 'repo_id' in model_attributes,'model must have either the "path" or "repo_id" fields defined' @@ -262,7 +261,7 @@ def _load_model(self, model_name:str): # usage statistics toc = time.time() - print(f'>> Model loaded in', '%4.2fs' % (toc - tic)) + print('>> Model loaded in', '%4.2fs' % (toc - tic)) if self._has_cuda(): print( '>> Max VRAM used to load the model:', @@ -312,7 +311,7 @@ def _load_ckpt_model(self, model_name, mconfig): if 'state_dict' in sd: sd = sd['state_dict'] - print(f' | Forcing garbage collection prior to loading new model') + print(' | Forcing garbage collection prior to loading new model') gc.collect() model = instantiate_from_config(omega_config.model) model.load_state_dict(sd, strict=False) @@ -347,7 +346,7 @@ def _load_ckpt_model(self, model_name, mconfig): # usage statistics toc = time.time() - print(f'>> Model loaded in', '%4.2fs' % (toc - tic)) + print('>> Model loaded in', '%4.2fs' % (toc - tic)) if self._has_cuda(): print( @@ -366,9 +365,9 @@ def _load_diffusers_model(self, mconfig): print(f'>> Loading diffusers model from {name_or_path}') if using_fp16: - print(f' | Using faster float16 precision') + print(' | Using faster float16 precision') else: - print(f' | Using more accurate float32 precision') + print(' | Using more accurate float32 precision') # TODO: scan weights maybe? pipeline_args: dict[str, Any] = dict( @@ -782,7 +781,7 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: hash = f.read() return hash - print(f'>> Calculating sha256 hash of weights file') + print('>> Calculating sha256 hash of weights file') tic = time.time() sha = hashlib.sha256() sha.update(data) @@ -809,7 +808,7 @@ def _load_vae(self, vae_config): vae_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] else: - print(f' | Using more accurate float32 precision') + print(' | Using more accurate float32 precision') fp_args_list = [{}] vae = None @@ -826,7 +825,7 @@ def _load_vae(self, vae_config): vae = AutoencoderKL.from_pretrained(name_or_path, **vae_args, **fp_args) except OSError as e: if str(e).startswith('fp16 is not a valid'): - print(f' | Half-precision version of model not available; fetching full-precision instead') + print(' | Half-precision version of model not available; fetching full-precision instead') else: deferred_error = e if vae: From a8591cd2ad9064920072aea4e098e8862f11968a Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Fri, 6 Jan 2023 14:06:29 -0500 Subject: [PATCH 135/199] improve model setting behavior - If the user enters an invalid model name at startup time, will not try to load it, warn, and use default model - CLI UI enhancement: include currently active model in the command line prompt. --- ldm/generate.py | 11 ++++++++--- ldm/invoke/CLI.py | 36 +++++++++++++++++++----------------- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 314fae9775e..d899268ec95 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -204,7 +204,12 @@ def __init__( # model caching system for fast switching self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) - self.model_name = model or self.model_manager.default_model() or FALLBACK_MODEL_NAME + # don't accept invalid models + fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME + if not self.model_manager.valid_model(model): + print(f'** "{model}" is not a known model name; falling back to {fallback}.') + model = None + self.model_name = model or fallback # for VRAM usage statistics self.session_peakmem = torch.cuda.max_memory_allocated() if self._has_cuda else None @@ -804,7 +809,7 @@ def load_model(self): ''' preload model identified in self.model_name ''' - self.set_model(self.model_name) + return self.set_model(self.model_name) def set_model(self,model_name): """ @@ -817,7 +822,7 @@ def set_model(self,model_name): # the model cache does the loading and offloading cache = self.model_manager if not cache.valid_model(model_name): - print(f'** "{model_name}" is not a known model name. Please check your models.yaml file') + print(f'** "{model_name}" is not a known model name. Cannot change.') return self.model cache.print_vram_usage() diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index c88b0927d42..b5872884c9e 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -13,6 +13,7 @@ from ldm.invoke.image_util import make_grid from ldm.invoke.log import write_log from pathlib import Path +from argparse import Namespace import pyparsing import ldm.invoke @@ -107,9 +108,8 @@ def main(): safety_checker=opt.safety_checker, max_loaded_models=opt.max_loaded_models, ) - except (FileNotFoundError, TypeError, AssertionError): - emergency_model_reconfigure(opt) - sys.exit(-1) + except (FileNotFoundError, TypeError, AssertionError) as e: + report_model_error(opt,e) except (IOError, KeyError) as e: print(f'{e}. Aborting.') sys.exit(-1) @@ -120,9 +120,8 @@ def main(): # preload the model try: gen.load_model() - except AssertionError: - emergency_model_reconfigure(opt) - sys.exit(-1) + except AssertionError as e: + report_model_error(opt, e) # try to autoconvert new models # autoimport new .ckpt files @@ -164,7 +163,8 @@ def main_loop(gen, opt): # changing the history file midstream when the output directory is changed. completer = get_completer(opt, models=gen.model_manager.list_models()) set_default_output_dir(opt, completer) - add_embedding_terms(gen, completer) + if gen.model: + add_embedding_terms(gen, completer) output_cntr = completer.get_current_history_length()+1 # os.pathconf is not available on Windows @@ -180,7 +180,7 @@ def main_loop(gen, opt): operation = 'generate' try: - command = get_next_command(infile) + command = get_next_command(infile, gen.model_name) except EOFError: done = infile is None or doneAfterInFile infile = None @@ -897,9 +897,9 @@ def choose_postprocess_name(opt,prefix,seed) -> str: counter += 1 return filename -def get_next_command(infile=None) -> str: # command string +def get_next_command(infile=None, model_name='no model') -> str: # command string if infile is None: - command = input('invoke> ') + command = input(f'({model_name}) invoke> ') else: command = infile.readline() if not command: @@ -1054,13 +1054,15 @@ def write_commands(opt, file_path:str, outfilepath:str): f.write('\n'.join(commands)) print(f'>> File {outfilepath} with commands created') -def emergency_model_reconfigure(opt): - print() - print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') - print(' You appear to have a missing or misconfigured model file(s). ') - print(' The script will now exit and run configure_invokeai.py to help fix the problem.') - print(' After reconfiguration is done, please relaunch invoke.py. ') - print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') +def report_model_error(opt:Namespace, e:Exception): + print('** An error occurred while attempting to initialize the model. **') + print(f'** The error was: {str(e)} **') + print('** This can be caused by a missing or corrupted models file, **') + print('** and can sometimes be fixed by (re)installing the models. **') + response = input('Do you want to run configure_invokeai.py to select and/or reinstall models? [Yn] ') + if response.startswith(('n','N')): + return + print('configure_invokeai is launching....\n') # Match arguments that were set on the CLI From 54179e613b321c13be8c36a28af981821ed57870 Mon Sep 17 00:00:00 2001 From: mauwii Date: Fri, 6 Jan 2023 20:29:19 +0100 Subject: [PATCH 136/199] update test-invoke-pip.yml - fix model cache path to point to runwayml/stable-diffusion-v1-5 - remove `skip-sd-weights` from configure_invokeai.py args --- .github/workflows/test-invoke-pip.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index dacee4685c9..a03bf87d0cd 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -76,13 +76,13 @@ jobs: mkdir -p ${{ env.INVOKEAI_ROOT }}/configs cp configs/models.yaml.example ${{ env.INVOKEAI_ROOT }}/configs/models.yaml - - name: Use Cached Stable Diffusion Model + - name: Use Cached diffusers-1.5 id: cache-sd-model uses: actions/cache@v3 env: cache-name: huggingface-${{ matrix.stable-diffusion-model }} with: - path: ~/.cache/huggingface + path: ${{ env.INVOKEAI_ROOT }}/models/runwayml/stable-diffusion-v1-5 key: ${{ env.cache-name }} - name: set test prompt to main branch validation @@ -119,7 +119,7 @@ jobs: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} run: > python3 scripts/configure_invokeai.py - --skip-sd-weights --yes + --yes --full-precision # can't use fp16 weights without a GPU - name: Run the tests From 0c03f6c43cd787e4c87f018d0f7dcb074e16378a Mon Sep 17 00:00:00 2001 From: mauwii Date: Fri, 6 Jan 2023 20:50:09 +0100 Subject: [PATCH 137/199] exclude dev/diffusers from "fail for draft PRs" --- .github/workflows/test-invoke-pip.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index a03bf87d0cd..b4dd3bff8df 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -16,7 +16,7 @@ concurrency: jobs: fail_if_pull_request_is_draft: - if: github.event.pull_request.draft == true + if: github.event.pull_request.draft == true && github.head_ref != 'dev/diffusers' runs-on: ubuntu-18.04 steps: - name: Fails in order to indicate that pull request needs to be marked as ready to review and unit tests workflow needs to pass. From 12b454b604bd37c1cee7549a11db2b635e88aaec Mon Sep 17 00:00:00 2001 From: mauwii Date: Fri, 6 Jan 2023 20:59:18 +0100 Subject: [PATCH 138/199] disable "fail on PR jobs" --- .github/workflows/test-invoke-conda.yml | 14 +++++++------- .github/workflows/test-invoke-pip.yml | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 87974f358fd..7e9630689d6 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -15,15 +15,15 @@ concurrency: cancel-in-progress: true jobs: - fail_if_pull_request_is_draft: - if: github.event.pull_request.draft == true - runs-on: ubuntu-22.04 - steps: - - name: Fails in order to indicate that pull request needs to be marked as ready to review and unit tests workflow needs to pass. - run: exit 1 + # fail_if_pull_request_is_draft: + # if: github.event.pull_request.draft == true + # runs-on: ubuntu-22.04 + # steps: + # - name: Fails in order to indicate that pull request needs to be marked as ready to review and unit tests workflow needs to pass. + # run: exit 1 matrix: - if: github.event.pull_request.draft == false + if: github.event.pull_request.draft == false || github.head_ref == 'dev/diffusers' strategy: matrix: stable-diffusion-model: diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index b4dd3bff8df..8dec7ad53e6 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -15,14 +15,14 @@ concurrency: cancel-in-progress: true jobs: - fail_if_pull_request_is_draft: - if: github.event.pull_request.draft == true && github.head_ref != 'dev/diffusers' - runs-on: ubuntu-18.04 - steps: - - name: Fails in order to indicate that pull request needs to be marked as ready to review and unit tests workflow needs to pass. - run: exit 1 + # fail_if_pull_request_is_draft: + # if: github.event.pull_request.draft == true && github.head_ref != 'dev/diffusers' + # runs-on: ubuntu-18.04 + # steps: + # - name: Fails in order to indicate that pull request needs to be marked as ready to review and unit tests workflow needs to pass. + # run: exit 1 matrix: - if: github.event.pull_request.draft == false + if: github.event.pull_request.draft == false || github.head_ref == 'dev/diffusers' strategy: matrix: stable-diffusion-model: From 18b1b8cf414605393779ecabfb29b431dcb054d5 Mon Sep 17 00:00:00 2001 From: mauwii Date: Fri, 6 Jan 2023 21:14:52 +0100 Subject: [PATCH 139/199] re-add `--skip-sd-weights` since no space --- .github/workflows/test-invoke-pip.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 8dec7ad53e6..412453e43cb 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -119,6 +119,7 @@ jobs: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} run: > python3 scripts/configure_invokeai.py + --skip-sd-weights --yes --full-precision # can't use fp16 weights without a GPU From b5c9d2fa545cb3413597921d83d1de4ec8b439ea Mon Sep 17 00:00:00 2001 From: mauwii Date: Fri, 6 Jan 2023 21:20:07 +0100 Subject: [PATCH 140/199] update workflow environments - include `INVOKE_MODEL_RECONFIGURE: '--yes'` --- .github/workflows/test-invoke-conda.yml | 1 + .github/workflows/test-invoke-pip.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/test-invoke-conda.yml b/.github/workflows/test-invoke-conda.yml index 7e9630689d6..6372efd6f80 100644 --- a/.github/workflows/test-invoke-conda.yml +++ b/.github/workflows/test-invoke-conda.yml @@ -53,6 +53,7 @@ jobs: name: ${{ matrix.environment-yaml }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} env: + INVOKE_MODEL_RECONFIGURE: '--yes' CONDA_ENV_NAME: invokeai INVOKEAI_ROOT: '${{ github.workspace }}/invokeai' PYTHONUNBUFFERED: 1 diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 412453e43cb..1d01b7a7cb1 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -51,6 +51,7 @@ jobs: name: ${{ matrix.requirements-file }} on ${{ matrix.python-version }} runs-on: ${{ matrix.os }} env: + INVOKE_MODEL_RECONFIGURE: '--yes' INVOKEAI_ROOT: '${{ github.workspace }}/invokeai' PYTHONUNBUFFERED: 1 HAVE_SECRETS: ${{ secrets.HUGGINGFACE_TOKEN != '' }} From 52147cc7acecc474acc29d78dded3337d70a8444 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 7 Jan 2023 10:10:21 -0500 Subject: [PATCH 141/199] clean up model load failure handling - Allow CLI to run even when no model is defined or loadable. - Inhibit stack trace when model load fails - only show last error - Give user *option* to run configure_invokeai.py when no models successfully load. - Restart invokeai after reconfiguration. --- ldm/generate.py | 8 +++++--- ldm/invoke/CLI.py | 30 ++++++++++++++++++------------ ldm/invoke/model_manager.py | 7 +++---- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index d899268ec95..3cb1f78cb95 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -835,9 +835,11 @@ def set_model(self,model_name): gc.collect() model_data = cache.get_model(model_name) - if model_data is None: # restore previous - model_data = cache.get_model(self.model_name) - model_name = self.model_name # addresses Issue #1547 + assert model_data,'an error occurred while loading the model' +# if model_data is None: +# return +# model_data = cache.get_model(self.model_name) +# model_name = self.model_name # addresses Issue #1547 self.model = model_data['model'] self.width = model_data['width'] diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index b5872884c9e..092ee1f8432 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -559,9 +559,13 @@ def import_model(model_path:str, gen, opt, completer): ''' model_name = None - if model_path.startswith(('http:','https:','ftp:')) or os.path.exists(model_path): + if model_path.startswith(('http:','https:','ftp:')): model_name = import_ckpt_model(model_path, gen, opt, completer) - elif re.match('^[\w-]+/[\w-]+$',model_path): + elif os.path.exists(model_path) and model_path.endswith('.ckpt') and os.path.isfile(model_path): + model_name = import_ckpt_model(model_path, gen, opt, completer) + elif re.match('^[\w.-]+/[\w.-]+$',model_path): + model_name = import_diffuser_model(model_path, gen, opt, completer) + elif os.path.isdir(model_path): model_name = import_diffuser_model(model_path, gen, opt, completer) else: print(f'** {model_path} is neither the path to a .ckpt file nor a diffusers repository id. Can\'t import.') @@ -616,7 +620,7 @@ def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: completer.linebuffer = 'configs/stable-diffusion/v1-inference.yaml' done = False while not done: - config_file = input('Configuration file for this model: ') + config_file = input('Configuration file for this model: ').strip() done = os.path.exists(config_file) completer.complete_extensions(None) @@ -647,7 +651,7 @@ def _verify_load(model_name:str, gen)->bool: def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_description:str=''): model_name = _get_model_name(model_manager.list_models(),completer,model_name) completer.linebuffer = model_description - model_description = input(f'Description for this model [{model_description}]: ') or model_description + model_description = input(f'Description for this model [{model_description}]: ').strip() or model_description return model_name, model_description def optimize_model(model_name_or_path:str, gen, opt, completer): @@ -740,7 +744,7 @@ def _get_model_name(existing_names,completer,default_name:str='')->str: done = False completer.linebuffer = default_name while not done: - model_name = input(f'Short name for this model [{default_name}]: ') + model_name = input(f'Short name for this model [{default_name}]: ').strip() if len(model_name)==0: model_name = default_name if not re.match('^[\w._-]+$',model_name): @@ -899,7 +903,7 @@ def choose_postprocess_name(opt,prefix,seed) -> str: def get_next_command(infile=None, model_name='no model') -> str: # command string if infile is None: - command = input(f'({model_name}) invoke> ') + command = input(f'({model_name}) invoke> ').strip() else: command = infile.readline() if not command: @@ -1055,11 +1059,9 @@ def write_commands(opt, file_path:str, outfilepath:str): print(f'>> File {outfilepath} with commands created') def report_model_error(opt:Namespace, e:Exception): - print('** An error occurred while attempting to initialize the model. **') - print(f'** The error was: {str(e)} **') - print('** This can be caused by a missing or corrupted models file, **') - print('** and can sometimes be fixed by (re)installing the models. **') - response = input('Do you want to run configure_invokeai.py to select and/or reinstall models? [Yn] ') + print(f'** An error occurred while attempting to initialize the model: "{str(e)}"') + print('** This can be caused by a missing or corrupted models file, and can sometimes be fixed by (re)installing the models.') + response = input('Do you want to run configure_invokeai.py to select and/or reinstall models? [y] ') if response.startswith(('n','N')): return @@ -1070,7 +1072,7 @@ def report_model_error(opt:Namespace, e:Exception): root_dir = ["--root", opt.root_dir] if opt.root_dir is not None else [] config = ["--config", opt.conf] if opt.conf is not None else [] yes_to_all = os.environ.get('INVOKE_MODEL_RECONFIGURE') - + previous_args = sys.argv sys.argv = [ 'configure_invokeai' ] sys.argv.extend(root_dir) sys.argv.extend(config) @@ -1079,6 +1081,10 @@ def report_model_error(opt:Namespace, e:Exception): import configure_invokeai configure_invokeai.main() + print('** InvokeAI will now restart') + sys.argv = previous_args + main() # would rather do a os.exec(), but doesn't exist? + sys.exit(0) def check_internet()->bool: ''' diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 73fdc0d74c9..24c99832480 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -96,11 +96,9 @@ def get_model(self, model_name:str): except Exception as e: print(f'** model {model_name} could not be loaded: {str(e)}') - traceback.print_exc() - assert self.current_model,'** FATAL: no current model to restore to' + assert self.current_model,f'no model loaded and no previous model to fall back to' print(f'** restoring {self.current_model}') - self.get_model(self.current_model) - return None + return self.get_model(self.current_model) self.current_model = model_name self._push_newest_model(model_name) @@ -236,6 +234,7 @@ def _load_model(self, model_name:str): """Load and initialize the model from configuration variables passed at object creation time""" if model_name not in self.config: print(f'"{model_name}" is not a known model name. Please check your models.yaml file') + return mconfig = self.config[model_name] From 6be47b559df5392cf4995ff52de719560ed934b5 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 7 Jan 2023 10:52:52 -0500 Subject: [PATCH 142/199] further edge-case handling 1) only one model in models.yaml file, and that model is broken 2) no models in models.yaml 3) models.yaml doesn't exist at all --- ldm/generate.py | 24 +++++++++++++++--------- ldm/invoke/CLI.py | 12 +++++++++--- ldm/invoke/model_manager.py | 21 +++++++-------------- 3 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 3cb1f78cb95..6e0a49380f7 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -819,11 +819,15 @@ def set_model(self,model_name): if self.model_name == model_name and self.model is not None: return self.model + previous_model_name = self.model_name + # the model cache does the loading and offloading cache = self.model_manager if not cache.valid_model(model_name): - print(f'** "{model_name}" is not a known model name. Cannot change.') - return self.model + raise KeyError(f'** "{model_name}" is not a known model name. Cannot change.') + #if not cache.valid_model(model_name): + #print(f'** "{model_name}" is not a known model name. Cannot change.') + #return self.model cache.print_vram_usage() @@ -833,13 +837,15 @@ def set_model(self,model_name): self.sampler = None self.generators = {} gc.collect() - - model_data = cache.get_model(model_name) - assert model_data,'an error occurred while loading the model' -# if model_data is None: -# return -# model_data = cache.get_model(self.model_name) -# model_name = self.model_name # addresses Issue #1547 + try: + model_data = cache.get_model(model_name) + except Exception as e: + print(f'** model {model_name} could not be loaded: {str(e)}') + assert previous_model_name,'no previous model to restore' + print(f'** trying to reload previous model') + model_data = cache.get_model(previous_model_name) # load previous + assert model_data,e + model_name = previous_model_name self.model = model_data['model'] self.width = model_data['width'] diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 092ee1f8432..2f1a824ed81 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -120,7 +120,7 @@ def main(): # preload the model try: gen.load_model() - except AssertionError as e: + except Exception as e: report_model_error(opt, e) # try to autoconvert new models @@ -444,8 +444,14 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: elif command.startswith('!switch'): model_name = command.replace('!switch ','',1) - gen.set_model(model_name) - add_embedding_terms(gen, completer) + try: + gen.set_model(model_name) + add_embedding_terms(gen, completer) + except AssertionError as e: + report_model_error(opt,e) + except KeyError as e: + print(str(e)) + pass completer.add_history(command) operation = None diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 24c99832480..7d0ea7a5f2d 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -85,20 +85,13 @@ def get_model(self, model_name:str): hash = self.models[model_name]['hash'] else: # we're about to load a new model, so potentially offload the least recently used one - try: - requested_model, width, height, hash = self._load_model(model_name) - self.models[model_name] = { - 'model': requested_model, - 'width': width, - 'height': height, - 'hash': hash, - } - - except Exception as e: - print(f'** model {model_name} could not be loaded: {str(e)}') - assert self.current_model,f'no model loaded and no previous model to fall back to' - print(f'** restoring {self.current_model}') - return self.get_model(self.current_model) + requested_model, width, height, hash = self._load_model(model_name) + self.models[model_name] = { + 'model': requested_model, + 'width': width, + 'height': height, + 'hash': hash, + } self.current_model = model_name self._push_newest_model(model_name) From b0935981093615dc35fa7c82e593f03af25c2743 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Sat, 7 Jan 2023 11:08:08 -0500 Subject: [PATCH 143/199] fix incorrect model status listing - "cached" was not being returned from list_models() - normalize handling of exceptions during model loading: - Passing an invalid model name to generate.set_model() will return a KeyError - All other exceptions are returned as the appropriate Exception --- ldm/generate.py | 15 +++++++++------ ldm/invoke/CLI.py | 7 ++++--- ldm/invoke/model_manager.py | 2 +- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 6e0a49380f7..06608eafb0d 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -815,6 +815,10 @@ def set_model(self,model_name): """ Given the name of a model defined in models.yaml, will load and initialize it and return the model object. Previously-used models will be cached. + + If the passed model_name is invalid, raises a KeyError. + If the model fails to load for some reason, will attempt to load the previously- + loaded model (if any). If that fallback fails, will raise an AssertionError """ if self.model_name == model_name and self.model is not None: return self.model @@ -824,10 +828,7 @@ def set_model(self,model_name): # the model cache does the loading and offloading cache = self.model_manager if not cache.valid_model(model_name): - raise KeyError(f'** "{model_name}" is not a known model name. Cannot change.') - #if not cache.valid_model(model_name): - #print(f'** "{model_name}" is not a known model name. Cannot change.') - #return self.model + raise KeyError('** "{model_name}" is not a known model name. Cannot change.') cache.print_vram_usage() @@ -841,10 +842,12 @@ def set_model(self,model_name): model_data = cache.get_model(model_name) except Exception as e: print(f'** model {model_name} could not be loaded: {str(e)}') - assert previous_model_name,'no previous model to restore' + if previous_model_name is None: + raise e print(f'** trying to reload previous model') model_data = cache.get_model(previous_model_name) # load previous - assert model_data,e + if model_data is None: + raise e model_name = previous_model_name self.model = model_data['model'] diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 2f1a824ed81..1cb59e51e3c 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -120,6 +120,8 @@ def main(): # preload the model try: gen.load_model() + except KeyError as e: + pass except Exception as e: report_model_error(opt, e) @@ -447,11 +449,10 @@ def do_command(command:str, gen, opt:Args, completer) -> tuple: try: gen.set_model(model_name) add_embedding_terms(gen, completer) - except AssertionError as e: - report_model_error(opt,e) except KeyError as e: print(str(e)) - pass + except Exception as e: + report_model_error(opt,e) completer.add_history(command) operation = None diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 7d0ea7a5f2d..9152bd29b13 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -167,7 +167,7 @@ def list_models(self) -> dict: models[name] = dict( description = stanza.get('description',None), format = 'vae' if 'VAE/default' in config else format, - status = 'active' if self.current_model == name else 'cached' if name is self.models else 'not loaded', + status = 'active' if self.current_model == name else 'cached' if name in self.models else 'not loaded', ) return models From 11425b14b5590f7cba6bae0bd2062eaeab140677 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 7 Jan 2023 15:22:15 -0800 Subject: [PATCH 144/199] CI: do download weights (if not already cached) --- .github/workflows/test-invoke-pip.yml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index 1d01b7a7cb1..c3fcd99241d 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -72,11 +72,6 @@ jobs: echo "INVOKEAI_ROOT=${{ github.workspace }}/invokeai" >> ${{ matrix.github-env }} echo "INVOKEAI_OUTDIR=${{ github.workspace }}/invokeai/outputs" >> ${{ matrix.github-env }} - - name: create models.yaml from example - run: | - mkdir -p ${{ env.INVOKEAI_ROOT }}/configs - cp configs/models.yaml.example ${{ env.INVOKEAI_ROOT }}/configs/models.yaml - - name: Use Cached diffusers-1.5 id: cache-sd-model uses: actions/cache@v3 @@ -119,8 +114,7 @@ jobs: env: HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGINGFACE_TOKEN }} run: > - python3 scripts/configure_invokeai.py - --skip-sd-weights + configure_invokeai.py --yes --full-precision # can't use fp16 weights without a GPU From 785c36603a2373ce7bf677919292ea491e5ea644 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 7 Jan 2023 15:49:35 -0800 Subject: [PATCH 145/199] diffusers: fix scheduler loading in offline mode --- ldm/generate.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 06608eafb0d..a5511fcc206 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -205,7 +205,7 @@ def __init__( # model caching system for fast switching self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) # don't accept invalid models - fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME + fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME if not self.model_manager.valid_model(model): print(f'** "{model}" is not a known model name; falling back to {fallback}.') model = None @@ -1037,10 +1037,7 @@ def _set_scheduler(self): if self.sampler_name in scheduler_map: sampler_class = scheduler_map[self.sampler_name] msg = f'>> Setting Sampler to {self.sampler_name} ({sampler_class.__name__})' - self.sampler = sampler_class.from_pretrained( - self.model_manager.model_name_or_path(self.model_name), - subfolder="scheduler" - ) + self.sampler = sampler_class.from_config(self.model.scheduler.config) else: msg = (f'>> Unsupported Sampler: {self.sampler_name} ' f'Defaulting to {default}') From 697279afa45a1754975011860711692d4934b0e7 Mon Sep 17 00:00:00 2001 From: Kevin Turner <83819+keturn@users.noreply.github.com> Date: Sat, 7 Jan 2023 16:42:50 -0800 Subject: [PATCH 146/199] CI: fix model name (no longer has `diffusers-` prefix) --- .github/workflows/test-invoke-pip.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-invoke-pip.yml b/.github/workflows/test-invoke-pip.yml index c3fcd99241d..34c90f10c54 100644 --- a/.github/workflows/test-invoke-pip.yml +++ b/.github/workflows/test-invoke-pip.yml @@ -26,7 +26,7 @@ jobs: strategy: matrix: stable-diffusion-model: - - diffusers-1.5 + - stable-diffusion-1.5 requirements-file: - requirements-lin-cuda.txt - requirements-lin-amd.txt @@ -78,7 +78,10 @@ jobs: env: cache-name: huggingface-${{ matrix.stable-diffusion-model }} with: - path: ${{ env.INVOKEAI_ROOT }}/models/runwayml/stable-diffusion-v1-5 + path: | + ${{ env.INVOKEAI_ROOT }}/models/runwayml + ${{ env.INVOKEAI_ROOT }}/models/stabilityai + ${{ env.INVOKEAI_ROOT }}/models/CompVis key: ${{ env.cache-name }} - name: set test prompt to main branch validation From d9e41f17f54370dbf20f95a5f29fd463e548b546 Mon Sep 17 00:00:00 2001 From: mickr777 <115216705+mickr777@users.noreply.github.com> Date: Mon, 9 Jan 2023 13:07:58 +1100 Subject: [PATCH 147/199] Update txt2img2img.py (#2256) --- ldm/invoke/ckpt_generator/txt2img2img.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/ckpt_generator/txt2img2img.py b/ldm/invoke/ckpt_generator/txt2img2img.py index 8c650f9ebda..167debb98ea 100644 --- a/ldm/invoke/ckpt_generator/txt2img2img.py +++ b/ldm/invoke/ckpt_generator/txt2img2img.py @@ -12,7 +12,7 @@ from ldm.models.diffusion.shared_invokeai_diffusion import InvokeAIDiffuserComponent from PIL import Image -class CkptTxt2Img2Img(Generator): +class CkptTxt2Img2Img(CkptGenerator): def __init__(self, model, precision): super().__init__(model, precision) self.init_latent = None # for get_noise() From da12663509a64a15f0f4918cc1a274429594612a Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 11:56:19 -0500 Subject: [PATCH 148/199] fixes to share models with HuggingFace cache system - If HF_HOME environment variable is defined, then all huggingface models are stored in that directory following the standard conventions. - For seamless interoperability, set HF_HOME to ~/.cache/huggingface - If HF_HOME not defined, then models are stored in ~/invokeai/models. This is equivalent to setting HF_HOME to ~/invokeai/models A future commit will add a migration mechanism so that this change doesn't break previous installs. --- ldm/generate.py | 1 + ldm/invoke/CLI.py | 21 ++++++++++++--------- ldm/invoke/args.py | 3 ++- ldm/invoke/globals.py | 22 +++++++++++++++++++--- ldm/invoke/model_manager.py | 6 +++--- ldm/invoke/textual_inversion_training.py | 4 ---- ldm/modules/encoders/modules.py | 6 +++--- scripts/configure_invokeai.py | 7 ++++--- scripts/textual_inversion.py | 4 ++-- scripts/textual_inversion_fe.py | 4 ++-- 10 files changed, 48 insertions(+), 30 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index a5511fcc206..8081ffc89dd 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -206,6 +206,7 @@ def __init__( self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) # don't accept invalid models fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME + model = model or fallback if not self.model_manager.valid_model(model): print(f'** "{model}" is not a known model name; falling back to {fallback}.') model = None diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 1cb59e51e3c..1db1b4f6927 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -659,7 +659,8 @@ def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_des model_name = _get_model_name(model_manager.list_models(),completer,model_name) completer.linebuffer = model_description model_description = input(f'Description for this model [{model_description}]: ').strip() or model_description - return model_name, model_description + default = input(f'Make this model the default? [n] ').startswith(('y','Y')) + return model_name, model_description, default def optimize_model(model_name_or_path:str, gen, opt, completer): manager = gen.model_manager @@ -724,9 +725,9 @@ def del_config(model_name:str, gen, opt, completer): def edit_config(model_name:str, gen, opt, completer): current_model = gen.model_name - if model_name == current_model: - print("** Can't edit the active model. !switch to another model first. **") - return +# if model_name == current_model: +# print("** Can't edit the active model. !switch to another model first. **") +# return manager = gen.model_manager if not (info := manager.model_info(model_name)): @@ -734,15 +735,17 @@ def edit_config(model_name:str, gen, opt, completer): return print(f'\n>> Editing model {model_name} from configuration file {opt.conf}') - new_name,new_description = _get_model_name_and_desc(gen.model_manager, - completer, - model_name=model_name, - model_description=info['description'] + new_name,new_description,default = _get_model_name_and_desc(gen.model_manager, + completer, + model_name=model_name, + model_description=info['description'] ) info['description'] = new_description if new_name != model_name: manager.add_model(new_name,info) manager.del_model(model_name) + if default: + manager.set_default_model(new_name) manager.commit(opt.conf) completer.update_models(manager.list_models()) print('>> Model successfully updated') @@ -756,7 +759,7 @@ def _get_model_name(existing_names,completer,default_name:str='')->str: model_name = default_name if not re.match('^[\w._-]+$',model_name): print('** model name must contain only words, digits and the characters [._-] **') - elif model_name in existing_names: + elif model_name != default_name and model_name in existing_names: print(f'** the name {model_name} is already in use. Pick another.') else: done = True diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 9ad12c53dbe..400d1f720d0 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -97,6 +97,7 @@ from ldm.invoke.globals import Globals from ldm.invoke.prompt_parser import split_weighted_subprompts from argparse import Namespace +from pathlib import Path APP_ID = ldm.invoke.__app_id__ APP_NAME = ldm.invoke.__app_name__ @@ -183,7 +184,7 @@ def parse_args(self): sys.exit(0) print('* Initializing, be patient...') - Globals.root = os.path.abspath(switches.root_dir or Globals.root) + Globals.root = Path(os.path.abspath(switches.root_dir or Globals.root)) Globals.try_patchmatch = switches.patchmatch # now use root directory to find the init file diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index 7c5b765cbc3..c67dbac145c 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -15,6 +15,7 @@ import os.path as osp from pathlib import Path from argparse import Namespace +from typing import Union Globals = Namespace() @@ -42,12 +43,27 @@ # The CLI will test connectivity at startup time. Globals.internet_available = True -def global_config_dir()->str: +def global_config_dir()->Path: return Path(Globals.root, Globals.config_dir) -def global_models_dir()->str: +def global_models_dir()->Path: return Path(Globals.root, Globals.models_dir) -def global_autoscan_dir()->str: +def global_autoscan_dir()->Path: return Path(Globals.root, Globals.autoscan_dir) +def global_set_root(root_dir:Union[str,Path]): + Globals.root = root_dir + +def global_cache_dir(subdir:Union[str,Path]='')->Path: + ''' + Returns Path to the model cache directory. If a subdirectory + is provided, it will be appended to the end of the path, allowing + for huggingface-style conventions: + global_cache_dir('diffusers') + global_cache_dir('transformers') + ''' + if (home := os.environ.get('HF_HOME')): + return Path(home,subdir) + else: + return Path(Globals.root,'models',subdir) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 9152bd29b13..a9db776b03c 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -30,7 +30,7 @@ from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir +from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir, global_cache_dir from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -370,7 +370,7 @@ def _load_diffusers_model(self, mconfig): vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) if not isinstance(name_or_path,Path): - pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) + pipeline_args.update(cache_dir=global_cache_dir('diffusers')) if using_fp16: pipeline_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] @@ -791,7 +791,7 @@ def _load_vae(self, vae_config): using_fp16 = self.precision == 'float16' vae_args.update( - cache_dir=os.path.join(Globals.root,'models',name_or_path), + cache_dir=global_cache_dir('diffusers'), local_files_only=not Globals.internet_available, ) diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py index 34e83718d6c..7615353ecb6 100644 --- a/ldm/invoke/textual_inversion_training.py +++ b/ldm/invoke/textual_inversion_training.py @@ -518,10 +518,6 @@ def do_textual_inversion_training( pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}" pipeline_args = dict() - if not isinstance(pretrained_model_name_or_path,Path): - pipeline_args.update( - cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path) - ) # Load tokenizer if tokenizer_name: diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index e119bdef5ad..f4706bcb3f4 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -10,7 +10,7 @@ from transformers import CLIPTokenizer, CLIPTextModel import kornia from ldm.invoke.devices import choose_torch_device -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, global_cache_dir #from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.modules.x_transformer import ( @@ -109,7 +109,7 @@ def __init__( BertTokenizerFast, ) - cache = os.path.join(Globals.root,'models/bert-base-uncased') + cache = global_cache_dir('hub') try: self.tokenizer = BertTokenizerFast.from_pretrained( 'bert-base-uncased', @@ -249,7 +249,7 @@ def __init__( transformer:Optional[CLIPTextModel]=None, ): super().__init__() - cache = os.path.join(Globals.root,'models',version) + cache = global_cache_dir('hub') self.tokenizer = tokenizer or CLIPTokenizer.from_pretrained( version, cache_dir=cache, diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index c3a583aff2c..64ae1e356df 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -32,7 +32,7 @@ from tqdm import tqdm from transformers import CLIPTokenizer, CLIPTextModel -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, global_cache_dir from ldm.invoke.readline import generic_completer warnings.filterwarnings('ignore') @@ -368,6 +368,7 @@ def _download_diffusion_weights(mconfig:DictConfig, access_token:str, precision: path = download_from_hf( model_class, repo_id, + cache_subdir='diffusers', safety_checker=None, **extra_args, ) @@ -535,9 +536,9 @@ def download_bert(): print('...success',file=sys.stderr) #--------------------------------------------- -def download_from_hf(model_class:object, model_name:str, **kwargs): +def download_from_hf(model_class:object, model_name:str, cache_subdir:Path=Path('hub'), **kwargs): print('',file=sys.stderr) # to prevent tqdm from overwriting - path = os.path.join(Globals.root,Model_dir,model_name) + path = global_cache_dir(cache_subdir) model = model_class.from_pretrained(model_name, cache_dir=path, resume_download=True, diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py index 24ce18a0077..fb176a5eecc 100755 --- a/scripts/textual_inversion.py +++ b/scripts/textual_inversion.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # Copyright 2023, Lincoln Stein @lstein -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, set_root from ldm.invoke.textual_inversion_training import parse_args, do_textual_inversion_training if __name__ == "__main__": args = parse_args() - Globals.root = args.root_dir or Globals.root + set_root(args.root_dir or Globals.root) kwargs = vars(args) do_textual_inversion_training(**kwargs) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py index 9a8363a500b..187ac089ba6 100755 --- a/scripts/textual_inversion_fe.py +++ b/scripts/textual_inversion_fe.py @@ -5,7 +5,7 @@ import sys import curses import re -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, set_root from omegaconf import OmegaConf from pathlib import Path from typing import List @@ -223,7 +223,7 @@ def onStart(self): help='Path to the invokeai runtime directory', ) args = parser.parse_args() - Globals.root = args.root_dir + set_root(args.root_dir) myapplication = MyApplication() myapplication.run() From 5aeba074317fe3b3ee8fe991d24717fb62db52d8 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 12:04:34 -0500 Subject: [PATCH 149/199] feat - make model storage compatible with hugging face caching system This commit alters the InvokeAI model directory to be compatible with hugging face, making it easier to share diffusers (and other models) across different programs. - If the HF_HOME environment variable is not set, then models are cached in ~/invokeai/models in a format that is identical to the HuggingFace cache. - If HF_HOME is set, then models are cached wherever HF_HOME points. - To enable sharing with other HuggingFace library clients, set HF_HOME to ~/.cache/huggingface to set the default cache location or to ~/invokeai/models to have huggingface cache inside InvokeAI. --- ldm/generate.py | 1 - ldm/invoke/CLI.py | 21 +++++++++------------ ldm/invoke/args.py | 3 +-- ldm/invoke/globals.py | 22 +++------------------- ldm/invoke/model_manager.py | 6 +++--- ldm/invoke/textual_inversion_training.py | 4 ++++ ldm/modules/encoders/modules.py | 6 +++--- scripts/configure_invokeai.py | 7 +++---- scripts/textual_inversion.py | 4 ++-- scripts/textual_inversion_fe.py | 4 ++-- 10 files changed, 30 insertions(+), 48 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index 8081ffc89dd..a5511fcc206 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -206,7 +206,6 @@ def __init__( self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) # don't accept invalid models fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME - model = model or fallback if not self.model_manager.valid_model(model): print(f'** "{model}" is not a known model name; falling back to {fallback}.') model = None diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 1db1b4f6927..1cb59e51e3c 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -659,8 +659,7 @@ def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_des model_name = _get_model_name(model_manager.list_models(),completer,model_name) completer.linebuffer = model_description model_description = input(f'Description for this model [{model_description}]: ').strip() or model_description - default = input(f'Make this model the default? [n] ').startswith(('y','Y')) - return model_name, model_description, default + return model_name, model_description def optimize_model(model_name_or_path:str, gen, opt, completer): manager = gen.model_manager @@ -725,9 +724,9 @@ def del_config(model_name:str, gen, opt, completer): def edit_config(model_name:str, gen, opt, completer): current_model = gen.model_name -# if model_name == current_model: -# print("** Can't edit the active model. !switch to another model first. **") -# return + if model_name == current_model: + print("** Can't edit the active model. !switch to another model first. **") + return manager = gen.model_manager if not (info := manager.model_info(model_name)): @@ -735,17 +734,15 @@ def edit_config(model_name:str, gen, opt, completer): return print(f'\n>> Editing model {model_name} from configuration file {opt.conf}') - new_name,new_description,default = _get_model_name_and_desc(gen.model_manager, - completer, - model_name=model_name, - model_description=info['description'] + new_name,new_description = _get_model_name_and_desc(gen.model_manager, + completer, + model_name=model_name, + model_description=info['description'] ) info['description'] = new_description if new_name != model_name: manager.add_model(new_name,info) manager.del_model(model_name) - if default: - manager.set_default_model(new_name) manager.commit(opt.conf) completer.update_models(manager.list_models()) print('>> Model successfully updated') @@ -759,7 +756,7 @@ def _get_model_name(existing_names,completer,default_name:str='')->str: model_name = default_name if not re.match('^[\w._-]+$',model_name): print('** model name must contain only words, digits and the characters [._-] **') - elif model_name != default_name and model_name in existing_names: + elif model_name in existing_names: print(f'** the name {model_name} is already in use. Pick another.') else: done = True diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 400d1f720d0..9ad12c53dbe 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -97,7 +97,6 @@ from ldm.invoke.globals import Globals from ldm.invoke.prompt_parser import split_weighted_subprompts from argparse import Namespace -from pathlib import Path APP_ID = ldm.invoke.__app_id__ APP_NAME = ldm.invoke.__app_name__ @@ -184,7 +183,7 @@ def parse_args(self): sys.exit(0) print('* Initializing, be patient...') - Globals.root = Path(os.path.abspath(switches.root_dir or Globals.root)) + Globals.root = os.path.abspath(switches.root_dir or Globals.root) Globals.try_patchmatch = switches.patchmatch # now use root directory to find the init file diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index c67dbac145c..7c5b765cbc3 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -15,7 +15,6 @@ import os.path as osp from pathlib import Path from argparse import Namespace -from typing import Union Globals = Namespace() @@ -43,27 +42,12 @@ # The CLI will test connectivity at startup time. Globals.internet_available = True -def global_config_dir()->Path: +def global_config_dir()->str: return Path(Globals.root, Globals.config_dir) -def global_models_dir()->Path: +def global_models_dir()->str: return Path(Globals.root, Globals.models_dir) -def global_autoscan_dir()->Path: +def global_autoscan_dir()->str: return Path(Globals.root, Globals.autoscan_dir) -def global_set_root(root_dir:Union[str,Path]): - Globals.root = root_dir - -def global_cache_dir(subdir:Union[str,Path]='')->Path: - ''' - Returns Path to the model cache directory. If a subdirectory - is provided, it will be appended to the end of the path, allowing - for huggingface-style conventions: - global_cache_dir('diffusers') - global_cache_dir('transformers') - ''' - if (home := os.environ.get('HF_HOME')): - return Path(home,subdir) - else: - return Path(Globals.root,'models',subdir) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index a9db776b03c..9152bd29b13 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -30,7 +30,7 @@ from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir, global_cache_dir +from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -370,7 +370,7 @@ def _load_diffusers_model(self, mconfig): vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) if not isinstance(name_or_path,Path): - pipeline_args.update(cache_dir=global_cache_dir('diffusers')) + pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) if using_fp16: pipeline_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] @@ -791,7 +791,7 @@ def _load_vae(self, vae_config): using_fp16 = self.precision == 'float16' vae_args.update( - cache_dir=global_cache_dir('diffusers'), + cache_dir=os.path.join(Globals.root,'models',name_or_path), local_files_only=not Globals.internet_available, ) diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py index 7615353ecb6..34e83718d6c 100644 --- a/ldm/invoke/textual_inversion_training.py +++ b/ldm/invoke/textual_inversion_training.py @@ -518,6 +518,10 @@ def do_textual_inversion_training( pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}" pipeline_args = dict() + if not isinstance(pretrained_model_name_or_path,Path): + pipeline_args.update( + cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path) + ) # Load tokenizer if tokenizer_name: diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index f4706bcb3f4..e119bdef5ad 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -10,7 +10,7 @@ from transformers import CLIPTokenizer, CLIPTextModel import kornia from ldm.invoke.devices import choose_torch_device -from ldm.invoke.globals import Globals, global_cache_dir +from ldm.invoke.globals import Globals #from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.modules.x_transformer import ( @@ -109,7 +109,7 @@ def __init__( BertTokenizerFast, ) - cache = global_cache_dir('hub') + cache = os.path.join(Globals.root,'models/bert-base-uncased') try: self.tokenizer = BertTokenizerFast.from_pretrained( 'bert-base-uncased', @@ -249,7 +249,7 @@ def __init__( transformer:Optional[CLIPTextModel]=None, ): super().__init__() - cache = global_cache_dir('hub') + cache = os.path.join(Globals.root,'models',version) self.tokenizer = tokenizer or CLIPTokenizer.from_pretrained( version, cache_dir=cache, diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index 64ae1e356df..c3a583aff2c 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -32,7 +32,7 @@ from tqdm import tqdm from transformers import CLIPTokenizer, CLIPTextModel -from ldm.invoke.globals import Globals, global_cache_dir +from ldm.invoke.globals import Globals from ldm.invoke.readline import generic_completer warnings.filterwarnings('ignore') @@ -368,7 +368,6 @@ def _download_diffusion_weights(mconfig:DictConfig, access_token:str, precision: path = download_from_hf( model_class, repo_id, - cache_subdir='diffusers', safety_checker=None, **extra_args, ) @@ -536,9 +535,9 @@ def download_bert(): print('...success',file=sys.stderr) #--------------------------------------------- -def download_from_hf(model_class:object, model_name:str, cache_subdir:Path=Path('hub'), **kwargs): +def download_from_hf(model_class:object, model_name:str, **kwargs): print('',file=sys.stderr) # to prevent tqdm from overwriting - path = global_cache_dir(cache_subdir) + path = os.path.join(Globals.root,Model_dir,model_name) model = model_class.from_pretrained(model_name, cache_dir=path, resume_download=True, diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py index fb176a5eecc..24ce18a0077 100755 --- a/scripts/textual_inversion.py +++ b/scripts/textual_inversion.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # Copyright 2023, Lincoln Stein @lstein -from ldm.invoke.globals import Globals, set_root +from ldm.invoke.globals import Globals from ldm.invoke.textual_inversion_training import parse_args, do_textual_inversion_training if __name__ == "__main__": args = parse_args() - set_root(args.root_dir or Globals.root) + Globals.root = args.root_dir or Globals.root kwargs = vars(args) do_textual_inversion_training(**kwargs) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py index 187ac089ba6..9a8363a500b 100755 --- a/scripts/textual_inversion_fe.py +++ b/scripts/textual_inversion_fe.py @@ -5,7 +5,7 @@ import sys import curses import re -from ldm.invoke.globals import Globals, set_root +from ldm.invoke.globals import Globals from omegaconf import OmegaConf from pathlib import Path from typing import List @@ -223,7 +223,7 @@ def onStart(self): help='Path to the invokeai runtime directory', ) args = parser.parse_args() - set_root(args.root_dir) + Globals.root = args.root_dir myapplication = MyApplication() myapplication.run() From a46d1750b51a4d2669d72c32901e2f16f8966855 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 12:17:31 -0500 Subject: [PATCH 150/199] fixes to share models with HuggingFace cache system - If HF_HOME environment variable is defined, then all huggingface models are stored in that directory following the standard conventions. - For seamless interoperability, set HF_HOME to ~/.cache/huggingface - If HF_HOME not defined, then models are stored in ~/invokeai/models. This is equivalent to setting HF_HOME to ~/invokeai/models A future commit will add a migration mechanism so that this change doesn't break previous installs. --- ldm/generate.py | 1 + ldm/invoke/CLI.py | 21 ++++++++++++--------- ldm/invoke/args.py | 3 ++- ldm/invoke/ckpt_generator/inpaint.py | 2 +- ldm/invoke/globals.py | 22 +++++++++++++++++++--- ldm/invoke/model_manager.py | 6 +++--- ldm/invoke/textual_inversion_training.py | 4 ---- ldm/modules/encoders/modules.py | 6 +++--- scripts/configure_invokeai.py | 7 ++++--- scripts/textual_inversion.py | 4 ++-- scripts/textual_inversion_fe.py | 4 ++-- 11 files changed, 49 insertions(+), 31 deletions(-) diff --git a/ldm/generate.py b/ldm/generate.py index a5511fcc206..8081ffc89dd 100644 --- a/ldm/generate.py +++ b/ldm/generate.py @@ -206,6 +206,7 @@ def __init__( self.model_manager = ModelManager(mconfig,self.device,self.precision,max_loaded_models=max_loaded_models) # don't accept invalid models fallback = self.model_manager.default_model() or FALLBACK_MODEL_NAME + model = model or fallback if not self.model_manager.valid_model(model): print(f'** "{model}" is not a known model name; falling back to {fallback}.') model = None diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 1cb59e51e3c..1db1b4f6927 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -659,7 +659,8 @@ def _get_model_name_and_desc(model_manager,completer,model_name:str='',model_des model_name = _get_model_name(model_manager.list_models(),completer,model_name) completer.linebuffer = model_description model_description = input(f'Description for this model [{model_description}]: ').strip() or model_description - return model_name, model_description + default = input(f'Make this model the default? [n] ').startswith(('y','Y')) + return model_name, model_description, default def optimize_model(model_name_or_path:str, gen, opt, completer): manager = gen.model_manager @@ -724,9 +725,9 @@ def del_config(model_name:str, gen, opt, completer): def edit_config(model_name:str, gen, opt, completer): current_model = gen.model_name - if model_name == current_model: - print("** Can't edit the active model. !switch to another model first. **") - return +# if model_name == current_model: +# print("** Can't edit the active model. !switch to another model first. **") +# return manager = gen.model_manager if not (info := manager.model_info(model_name)): @@ -734,15 +735,17 @@ def edit_config(model_name:str, gen, opt, completer): return print(f'\n>> Editing model {model_name} from configuration file {opt.conf}') - new_name,new_description = _get_model_name_and_desc(gen.model_manager, - completer, - model_name=model_name, - model_description=info['description'] + new_name,new_description,default = _get_model_name_and_desc(gen.model_manager, + completer, + model_name=model_name, + model_description=info['description'] ) info['description'] = new_description if new_name != model_name: manager.add_model(new_name,info) manager.del_model(model_name) + if default: + manager.set_default_model(new_name) manager.commit(opt.conf) completer.update_models(manager.list_models()) print('>> Model successfully updated') @@ -756,7 +759,7 @@ def _get_model_name(existing_names,completer,default_name:str='')->str: model_name = default_name if not re.match('^[\w._-]+$',model_name): print('** model name must contain only words, digits and the characters [._-] **') - elif model_name in existing_names: + elif model_name != default_name and model_name in existing_names: print(f'** the name {model_name} is already in use. Pick another.') else: done = True diff --git a/ldm/invoke/args.py b/ldm/invoke/args.py index 9ad12c53dbe..400d1f720d0 100644 --- a/ldm/invoke/args.py +++ b/ldm/invoke/args.py @@ -97,6 +97,7 @@ from ldm.invoke.globals import Globals from ldm.invoke.prompt_parser import split_weighted_subprompts from argparse import Namespace +from pathlib import Path APP_ID = ldm.invoke.__app_id__ APP_NAME = ldm.invoke.__app_name__ @@ -183,7 +184,7 @@ def parse_args(self): sys.exit(0) print('* Initializing, be patient...') - Globals.root = os.path.abspath(switches.root_dir or Globals.root) + Globals.root = Path(os.path.abspath(switches.root_dir or Globals.root)) Globals.try_patchmatch = switches.patchmatch # now use root directory to find the init file diff --git a/ldm/invoke/ckpt_generator/inpaint.py b/ldm/invoke/ckpt_generator/inpaint.py index ac99430a12d..3b965b0ee36 100644 --- a/ldm/invoke/ckpt_generator/inpaint.py +++ b/ldm/invoke/ckpt_generator/inpaint.py @@ -27,7 +27,7 @@ def infill_methods()->list[str]: methods.append('tile') return methods -class Inpaint(CkptImg2Img): +class CkptInpaint(CkptImg2Img): def __init__(self, model, precision): self.init_latent = None self.pil_image = None diff --git a/ldm/invoke/globals.py b/ldm/invoke/globals.py index 7c5b765cbc3..c67dbac145c 100644 --- a/ldm/invoke/globals.py +++ b/ldm/invoke/globals.py @@ -15,6 +15,7 @@ import os.path as osp from pathlib import Path from argparse import Namespace +from typing import Union Globals = Namespace() @@ -42,12 +43,27 @@ # The CLI will test connectivity at startup time. Globals.internet_available = True -def global_config_dir()->str: +def global_config_dir()->Path: return Path(Globals.root, Globals.config_dir) -def global_models_dir()->str: +def global_models_dir()->Path: return Path(Globals.root, Globals.models_dir) -def global_autoscan_dir()->str: +def global_autoscan_dir()->Path: return Path(Globals.root, Globals.autoscan_dir) +def global_set_root(root_dir:Union[str,Path]): + Globals.root = root_dir + +def global_cache_dir(subdir:Union[str,Path]='')->Path: + ''' + Returns Path to the model cache directory. If a subdirectory + is provided, it will be appended to the end of the path, allowing + for huggingface-style conventions: + global_cache_dir('diffusers') + global_cache_dir('transformers') + ''' + if (home := os.environ.get('HF_HOME')): + return Path(home,subdir) + else: + return Path(Globals.root,'models',subdir) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 9152bd29b13..a9db776b03c 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -30,7 +30,7 @@ from picklescan.scanner import scan_file_path from ldm.invoke.generator.diffusers_pipeline import StableDiffusionGeneratorPipeline -from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir +from ldm.invoke.globals import Globals, global_models_dir, global_autoscan_dir, global_cache_dir from ldm.util import instantiate_from_config, ask_user DEFAULT_MAX_MODELS=2 @@ -370,7 +370,7 @@ def _load_diffusers_model(self, mconfig): vae = self._load_vae(mconfig['vae']) pipeline_args.update(vae=vae) if not isinstance(name_or_path,Path): - pipeline_args.update(cache_dir=os.path.join(Globals.root,'models',name_or_path)) + pipeline_args.update(cache_dir=global_cache_dir('diffusers')) if using_fp16: pipeline_args.update(torch_dtype=torch.float16) fp_args_list = [{'revision':'fp16'},{}] @@ -791,7 +791,7 @@ def _load_vae(self, vae_config): using_fp16 = self.precision == 'float16' vae_args.update( - cache_dir=os.path.join(Globals.root,'models',name_or_path), + cache_dir=global_cache_dir('diffusers'), local_files_only=not Globals.internet_available, ) diff --git a/ldm/invoke/textual_inversion_training.py b/ldm/invoke/textual_inversion_training.py index 34e83718d6c..7615353ecb6 100644 --- a/ldm/invoke/textual_inversion_training.py +++ b/ldm/invoke/textual_inversion_training.py @@ -518,10 +518,6 @@ def do_textual_inversion_training( pretrained_model_name_or_path = model_conf.get('repo_id',None) or Path(model_conf.get('path')) assert pretrained_model_name_or_path, f"models.yaml error: neither 'repo_id' nor 'path' is defined for {model}" pipeline_args = dict() - if not isinstance(pretrained_model_name_or_path,Path): - pipeline_args.update( - cache_dir=os.path.join(Globals.root,'models',pretrained_model_name_or_path) - ) # Load tokenizer if tokenizer_name: diff --git a/ldm/modules/encoders/modules.py b/ldm/modules/encoders/modules.py index e119bdef5ad..f4706bcb3f4 100644 --- a/ldm/modules/encoders/modules.py +++ b/ldm/modules/encoders/modules.py @@ -10,7 +10,7 @@ from transformers import CLIPTokenizer, CLIPTextModel import kornia from ldm.invoke.devices import choose_torch_device -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, global_cache_dir #from ldm.modules.textual_inversion_manager import TextualInversionManager from ldm.modules.x_transformer import ( @@ -109,7 +109,7 @@ def __init__( BertTokenizerFast, ) - cache = os.path.join(Globals.root,'models/bert-base-uncased') + cache = global_cache_dir('hub') try: self.tokenizer = BertTokenizerFast.from_pretrained( 'bert-base-uncased', @@ -249,7 +249,7 @@ def __init__( transformer:Optional[CLIPTextModel]=None, ): super().__init__() - cache = os.path.join(Globals.root,'models',version) + cache = global_cache_dir('hub') self.tokenizer = tokenizer or CLIPTokenizer.from_pretrained( version, cache_dir=cache, diff --git a/scripts/configure_invokeai.py b/scripts/configure_invokeai.py index c3a583aff2c..64ae1e356df 100755 --- a/scripts/configure_invokeai.py +++ b/scripts/configure_invokeai.py @@ -32,7 +32,7 @@ from tqdm import tqdm from transformers import CLIPTokenizer, CLIPTextModel -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, global_cache_dir from ldm.invoke.readline import generic_completer warnings.filterwarnings('ignore') @@ -368,6 +368,7 @@ def _download_diffusion_weights(mconfig:DictConfig, access_token:str, precision: path = download_from_hf( model_class, repo_id, + cache_subdir='diffusers', safety_checker=None, **extra_args, ) @@ -535,9 +536,9 @@ def download_bert(): print('...success',file=sys.stderr) #--------------------------------------------- -def download_from_hf(model_class:object, model_name:str, **kwargs): +def download_from_hf(model_class:object, model_name:str, cache_subdir:Path=Path('hub'), **kwargs): print('',file=sys.stderr) # to prevent tqdm from overwriting - path = os.path.join(Globals.root,Model_dir,model_name) + path = global_cache_dir(cache_subdir) model = model_class.from_pretrained(model_name, cache_dir=path, resume_download=True, diff --git a/scripts/textual_inversion.py b/scripts/textual_inversion.py index 24ce18a0077..fb176a5eecc 100755 --- a/scripts/textual_inversion.py +++ b/scripts/textual_inversion.py @@ -1,11 +1,11 @@ #!/usr/bin/env python # Copyright 2023, Lincoln Stein @lstein -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, set_root from ldm.invoke.textual_inversion_training import parse_args, do_textual_inversion_training if __name__ == "__main__": args = parse_args() - Globals.root = args.root_dir or Globals.root + set_root(args.root_dir or Globals.root) kwargs = vars(args) do_textual_inversion_training(**kwargs) diff --git a/scripts/textual_inversion_fe.py b/scripts/textual_inversion_fe.py index 9a8363a500b..187ac089ba6 100755 --- a/scripts/textual_inversion_fe.py +++ b/scripts/textual_inversion_fe.py @@ -5,7 +5,7 @@ import sys import curses import re -from ldm.invoke.globals import Globals +from ldm.invoke.globals import Globals, set_root from omegaconf import OmegaConf from pathlib import Path from typing import List @@ -223,7 +223,7 @@ def onStart(self): help='Path to the invokeai runtime directory', ) args = parser.parse_args() - Globals.root = args.root_dir + set_root(args.root_dir) myapplication = MyApplication() myapplication.run() From 30b74c4c6ba98a2614c9bffe6b8d06eb2ca54934 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 12:19:02 -0500 Subject: [PATCH 151/199] fix error "no attribute CkptInpaint" --- ldm/invoke/ckpt_generator/inpaint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ldm/invoke/ckpt_generator/inpaint.py b/ldm/invoke/ckpt_generator/inpaint.py index ac99430a12d..3b965b0ee36 100644 --- a/ldm/invoke/ckpt_generator/inpaint.py +++ b/ldm/invoke/ckpt_generator/inpaint.py @@ -27,7 +27,7 @@ def infill_methods()->list[str]: methods.append('tile') return methods -class Inpaint(CkptImg2Img): +class CkptInpaint(CkptImg2Img): def __init__(self, model, precision): self.init_latent = None self.pil_image = None From 81ded862cab10854583f3ca95ce0aaf0885c9696 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 14:11:35 -0500 Subject: [PATCH 152/199] model_manager.list_models() returns entire model config stanza+status --- ldm/invoke/model_manager.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 9152bd29b13..d9cc9c6f744 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -162,14 +162,12 @@ def list_models(self) -> dict: models = {} for name in self.config: stanza = self.config[name] - format = stanza.get('format','diffusers') + format = stanza.get('format','ckpt') config = stanza.get('config','no config') - models[name] = dict( - description = stanza.get('description',None), - format = 'vae' if 'VAE/default' in config else format, - status = 'active' if self.current_model == name else 'cached' if name in self.models else 'not loaded', - ) - + models[name]=dict() + models[name].update(stanza) + models[name]['format'] = 'vae' if 'VAE/default' in config else format + models[name]['status'] = 'active' if self.current_model == name else 'cached' if name in self.models else 'not loaded' return models def print_models(self) -> None: From e728b4625c88c0ced5268827eb9013f9000b46cd Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 10 Jan 2023 09:49:21 +1300 Subject: [PATCH 153/199] Initial Draft - Model Manager Diffusers --- frontend/src/app/invokeai.d.ts | 11 + frontend/src/app/socketio/actions.ts | 6 +- .../components/ModelManager/AddModel.tsx | 1 + .../ModelManager/DiffusersModelEdit.tsx | 210 ++++++++++++++++++ .../components/ModelManager/ModelEdit.tsx | 2 + .../components/ModelManager/ModelList.tsx | 76 +++++-- .../ModelManager/ModelManagerModal.tsx | 17 +- .../components/ModelManager/SearchModels.tsx | 1 + ldm/invoke/model_manager.py | 51 ++++- 9 files changed, 344 insertions(+), 31 deletions(-) create mode 100644 frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx diff --git a/frontend/src/app/invokeai.d.ts b/frontend/src/app/invokeai.d.ts index d5ee6adbbc8..8938e7265cb 100644 --- a/frontend/src/app/invokeai.d.ts +++ b/frontend/src/app/invokeai.d.ts @@ -170,6 +170,8 @@ export declare type Model = { width?: number; height?: number; default?: boolean; + format?: string; + repo_id?: string; }; export declare type ModelList = Record; @@ -188,6 +190,15 @@ export declare type InvokeModelConfigProps = { width: number | undefined; height: number | undefined; default: boolean | undefined; + format: string | undefined; +}; + +export declare type InvokeDiffusersModelConfigProps = { + name: string | undefined; + description: string | undefined; + repo_id: string | undefined; + vae: string | undefined; + default: boolean | undefined; }; /** diff --git a/frontend/src/app/socketio/actions.ts b/frontend/src/app/socketio/actions.ts index e07b12ebf4f..60ea433ac33 100644 --- a/frontend/src/app/socketio/actions.ts +++ b/frontend/src/app/socketio/actions.ts @@ -32,9 +32,9 @@ export const requestSystemConfig = createAction( export const searchForModels = createAction('socketio/searchForModels'); -export const addNewModel = createAction( - 'socketio/addNewModel' -); +export const addNewModel = createAction< + InvokeAI.InvokeModelConfigProps | InvokeAI.InvokeDiffusersModelConfigProps +>('socketio/addNewModel'); export const deleteModel = createAction('socketio/deleteModel'); diff --git a/frontend/src/features/system/components/ModelManager/AddModel.tsx b/frontend/src/features/system/components/ModelManager/AddModel.tsx index bc08fa08b6f..dac2e2cc34d 100644 --- a/frontend/src/features/system/components/ModelManager/AddModel.tsx +++ b/frontend/src/features/system/components/ModelManager/AddModel.tsx @@ -65,6 +65,7 @@ export default function AddModel() { vae: '', width: 512, height: 512, + format: 'ckpt', default: false, }; diff --git a/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx b/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx new file mode 100644 index 00000000000..f9de0faa691 --- /dev/null +++ b/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx @@ -0,0 +1,210 @@ +import { createSelector } from '@reduxjs/toolkit'; + +import React, { useEffect, useState } from 'react'; +import IAIInput from 'common/components/IAIInput'; +import IAIButton from 'common/components/IAIButton'; + +import { useAppDispatch, useAppSelector } from 'app/storeHooks'; +import { systemSelector } from 'features/system/store/systemSelectors'; + +import { + Flex, + FormControl, + FormErrorMessage, + FormHelperText, + FormLabel, + Text, + VStack, +} from '@chakra-ui/react'; + +import { Field, Formik } from 'formik'; +import { useTranslation } from 'react-i18next'; +import { addNewModel } from 'app/socketio/actions'; + +import _ from 'lodash'; + +import type { RootState } from 'app/store'; +import type { InvokeDiffusersModelConfigProps } from 'app/invokeai'; + +const selector = createSelector( + [systemSelector], + (system) => { + const { openModel, model_list } = system; + return { + model_list, + openModel, + }; + }, + { + memoizeOptions: { + resultEqualityCheck: _.isEqual, + }, + } +); + +export default function DiffusersModelEdit() { + const { openModel, model_list } = useAppSelector(selector); + const isProcessing = useAppSelector( + (state: RootState) => state.system.isProcessing + ); + + const dispatch = useAppDispatch(); + + const { t } = useTranslation(); + + const [editModelFormValues, setEditModelFormValues] = + useState({ + name: '', + description: '', + repo_id: '', + vae: '', + default: false, + }); + + useEffect(() => { + if (openModel) { + const retrievedModel = _.pickBy(model_list, (val, key) => { + return _.isEqual(key, openModel); + }); + setEditModelFormValues({ + name: openModel, + description: retrievedModel[openModel]?.description, + repo_id: retrievedModel[openModel]?.repo_id, + vae: retrievedModel[openModel]?.vae + ? retrievedModel[openModel]?.vae + : '', + default: retrievedModel[openModel]?.default, + }); + } + }, [model_list, openModel]); + + const editModelFormSubmitHandler = ( + values: InvokeDiffusersModelConfigProps + ) => { + dispatch(addNewModel(values)); + }; + + return openModel ? ( + + + + {openModel} + + + + + {({ handleSubmit, errors, touched }) => ( +
+ + {/* Description */} + + + {t('modelmanager:description')} + + + + {!!errors.description && touched.description ? ( + {errors.description} + ) : ( + + {t('modelmanager:descriptionValidationMsg')} + + )} + + + + {/* Repo ID */} + + + {t('modelmanager:modelLocation')} + + + + {!!errors.repo_id && touched.repo_id ? ( + {errors.repo_id} + ) : ( + + {t('modelmanager:modelLocationValidationMsg')} + + )} + + + + {/* VAE */} + + + {t('modelmanager:vaeLocation')} + + + + {!!errors.vae && touched.vae ? ( + {errors.vae} + ) : ( + + {t('modelmanager:vaeLocationValidationMsg')} + + )} + + + + + {t('modelmanager:updateModel')} + + +
+ )} +
+
+
+ ) : ( + + + Pick A Model To Edit + + + ); +} diff --git a/frontend/src/features/system/components/ModelManager/ModelEdit.tsx b/frontend/src/features/system/components/ModelManager/ModelEdit.tsx index fcb8d701ba6..5d816e170e6 100644 --- a/frontend/src/features/system/components/ModelManager/ModelEdit.tsx +++ b/frontend/src/features/system/components/ModelManager/ModelEdit.tsx @@ -68,6 +68,7 @@ export default function ModelEdit() { width: 512, height: 512, default: false, + format: 'ckpt', }); useEffect(() => { @@ -84,6 +85,7 @@ export default function ModelEdit() { width: retrievedModel[openModel]?.width, height: retrievedModel[openModel]?.height, default: retrievedModel[openModel]?.default, + format: 'ckpt', }); } }, [model_list, openModel]); diff --git a/frontend/src/features/system/components/ModelManager/ModelList.tsx b/frontend/src/features/system/components/ModelManager/ModelList.tsx index f5b2419100c..fa607bb503f 100644 --- a/frontend/src/features/system/components/ModelManager/ModelList.tsx +++ b/frontend/src/features/system/components/ModelManager/ModelList.tsx @@ -1,5 +1,5 @@ import { useState } from 'react'; -import { Flex, Text } from '@chakra-ui/react'; +import { Box, Flex, Text } from '@chakra-ui/react'; import { createSelector } from '@reduxjs/toolkit'; import IAIInput from 'common/components/IAIInput'; @@ -22,17 +22,12 @@ const modelListSelector = createSelector( return { name: key, ...model }; }); - const activeModel = models.find((model) => model.status === 'active'); - - return { - models, - activeModel: activeModel, - }; + return models; } ); const ModelList = () => { - const { models } = useAppSelector(modelListSelector); + const models = useAppSelector(modelListSelector); const [searchText, setSearchText] = useState(''); @@ -43,7 +38,8 @@ const ModelList = () => { }, 400); const renderModelListItems = () => { - const modelListItemsToRender: ReactNode[] = []; + const ckptModelListItemsToRender: ReactNode[] = []; + const diffusersModelListItemsToRender: ReactNode[] = []; const filteredModelListItemsToRender: ReactNode[] = []; models.forEach((model, i) => { @@ -57,19 +53,59 @@ const ModelList = () => { /> ); } - modelListItemsToRender.push( - - ); + if (model.format !== 'diffusers') { + ckptModelListItemsToRender.push( + + ); + } else { + diffusersModelListItemsToRender.push( + + ); + } }); - return searchText !== '' - ? filteredModelListItemsToRender - : modelListItemsToRender; + return searchText !== '' ? ( + filteredModelListItemsToRender + ) : ( + + + + Checkpoint Models + + {ckptModelListItemsToRender} + + + + Diffusers Models + + {diffusersModelListItemsToRender} + + + ); }; return ( diff --git a/frontend/src/features/system/components/ModelManager/ModelManagerModal.tsx b/frontend/src/features/system/components/ModelManager/ModelManagerModal.tsx index 8fecf121982..25d3305863a 100644 --- a/frontend/src/features/system/components/ModelManager/ModelManagerModal.tsx +++ b/frontend/src/features/system/components/ModelManager/ModelManagerModal.tsx @@ -14,6 +14,9 @@ import ModelEdit from './ModelEdit'; import ModelList from './ModelList'; import type { ReactElement } from 'react'; +import { useAppSelector } from 'app/storeHooks'; +import { RootState } from 'app/store'; +import DiffusersModelEdit from './DiffusersModelEdit'; type ModelManagerModalProps = { children: ReactElement; @@ -28,6 +31,14 @@ export default function ModelManagerModal({ onClose: onModelManagerModalClose, } = useDisclosure(); + const model_list = useAppSelector( + (state: RootState) => state.system.model_list + ); + + const openModel = useAppSelector( + (state: RootState) => state.system.openModel + ); + const { t } = useTranslation(); return ( @@ -50,7 +61,11 @@ export default function ModelManagerModal({ columnGap={'2rem'} > - + {openModel && model_list[openModel]['format'] === 'diffusers' ? ( + + ) : ( + + )} diff --git a/frontend/src/features/system/components/ModelManager/SearchModels.tsx b/frontend/src/features/system/components/ModelManager/SearchModels.tsx index f5a67220be0..ab129bf19f4 100644 --- a/frontend/src/features/system/components/ModelManager/SearchModels.tsx +++ b/frontend/src/features/system/components/ModelManager/SearchModels.tsx @@ -178,6 +178,7 @@ export default function SearchModels() { width: 512, height: 512, default: false, + format: 'ckpt', }; dispatch(addNewModel(modelFormat)); }); diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index d9cc9c6f744..34854099532 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -162,12 +162,49 @@ def list_models(self) -> dict: models = {} for name in self.config: stanza = self.config[name] - format = stanza.get('format','ckpt') - config = stanza.get('config','no config') - models[name]=dict() - models[name].update(stanza) - models[name]['format'] = 'vae' if 'VAE/default' in config else format - models[name]['status'] = 'active' if self.current_model == name else 'cached' if name in self.models else 'not loaded' + models[name] = dict() + + format = stanza.get('format','ckpt') # Determine Format + + # Common Attribs + description = stanza.get('description', None) + if self.current_model == name: + status = 'active' + elif name in self.models: + status = 'cached' + else: + status = 'not loaded' + + # Checkpoint Config Parse + if format == 'ckpt': + config = stanza.get('config', None) + weights = stanza.get('weights', None) + vae = stanza.get('vae', None) + width = stanza.get('width', 512) + height = stanza.get('height', 512) + models[name].update( + description = description, + format = format, + config = config, + weights = weights, + vae = vae, + width = width, + height = height, + status = status + ) + + # Diffusers Config Parse + if format == 'diffusers': + repo_id = stanza.get('repo_id', None) + vae = stanza.get('vae', None)['repo_id'] + models[name].update( + description = description, + format = format, + repo_id = repo_id, + vae = vae, + status = status + ) + return models def print_models(self) -> None: @@ -667,7 +704,7 @@ def commit(self,config_file_path:str) -> None: if not os.path.isabs(config_file_path): config_file_path = os.path.normpath(os.path.join(Globals.root,config_file_path)) tmpfile = os.path.join(os.path.dirname(config_file_path),'new_config.tmp') - with open(tmpfile, 'w') as outfile: + with open(tmpfile, 'w', encoding="utf-8") as outfile: outfile.write(self.preamble()) outfile.write(yaml_str) os.replace(tmpfile,config_file_path) From a17f9014abb9dc3d90626785853278a20ed17fd3 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 20:56:15 +0000 Subject: [PATCH 154/199] added hash function to diffusers --- ldm/invoke/model_manager.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index a9db776b03c..998603e592d 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -352,7 +352,6 @@ def _load_ckpt_model(self, model_name, mconfig): def _load_diffusers_model(self, mconfig): name_or_path = self.model_name_or_path(mconfig) - model_hash = 'FIXME' using_fp16 = self.precision == 'float16' print(f'>> Loading diffusers model from {name_or_path}') @@ -402,6 +401,8 @@ def _load_diffusers_model(self, mconfig): pipeline.to(self.device) + model_hash = self._hash_diffuser(name_or_path) + # square images??? width = pipeline.unet.config.sample_size * pipeline.vae_scale_factor height = width @@ -762,6 +763,36 @@ def _push_newest_model(self,model_name:str) -> None: def _has_cuda(self) -> bool: return self.device.type == 'cuda' + def _diffuser_sha256(self,name_or_path:Union[str, Path])->Union[str,bytes]: + path = None + if isinstance(name_or_path,Path): + path = name_or_path + else: + owner,repo = name_or_path.split('/') + path = Path(global_cache_dir('diffusers') / f'models--{owner}--{repo}') + if not path.exists(): + return None + hashpath = path / 'checksum.sha256' + if hashpath.exists() and path.stat().st_mtime <= hashpath.stat().st_mtime: + with open(hashpath) as f: + hash = f.read() + return hash + print('>> Calculating sha256 hash of models files') + tic = time.time() + sha = hashlib.sha256() + count = 0 + for root, dirs, files in os.walk(path): + for name in files: + count += 1 + with open(os.path.join(root,name),'rb') as f: + sha.update(f.read()) + hash = sha.hexdigest() + toc = time.time() + print(f'>> sha256 = {hash} ({count} files hashed in','%4.2fs)' % (toc - tic)) + with open(hashpath,'w') as f: + f.write(hash) + return hash + def _cached_sha256(self,path,data) -> Union[str, bytes]: dirname = os.path.dirname(path) basename = os.path.basename(path) From 1637cbdea678f7c02d388d118ee5dc8bf7e4b62b Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Mon, 9 Jan 2023 16:21:12 -0500 Subject: [PATCH 155/199] implement sha256 hashes on diffusers models --- ldm/invoke/model_manager.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 998603e592d..74863ba0cd3 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -401,7 +401,7 @@ def _load_diffusers_model(self, mconfig): pipeline.to(self.device) - model_hash = self._hash_diffuser(name_or_path) + model_hash = self._diffuser_sha256(name_or_path) # square images??? width = pipeline.unet.config.sample_size * pipeline.vae_scale_factor @@ -777,18 +777,18 @@ def _diffuser_sha256(self,name_or_path:Union[str, Path])->Union[str,bytes]: with open(hashpath) as f: hash = f.read() return hash - print('>> Calculating sha256 hash of models files') + print(' | Calculating sha256 hash of model files') tic = time.time() sha = hashlib.sha256() count = 0 - for root, dirs, files in os.walk(path): + for root, dirs, files in os.walk(path, followlinks=False): for name in files: count += 1 with open(os.path.join(root,name),'rb') as f: sha.update(f.read()) hash = sha.hexdigest() toc = time.time() - print(f'>> sha256 = {hash} ({count} files hashed in','%4.2fs)' % (toc - tic)) + print(f' | sha256 = {hash} ({count} files hashed in','%4.2fs)' % (toc - tic)) with open(hashpath,'w') as f: f.write(hash) return hash @@ -804,7 +804,7 @@ def _cached_sha256(self,path,data) -> Union[str, bytes]: hash = f.read() return hash - print('>> Calculating sha256 hash of weights file') + print(' | Calculating sha256 hash of weights file') tic = time.time() sha = hashlib.sha256() sha.update(data) From 1cd93a714e62f8dd178d911bc99f1af2d35d0999 Mon Sep 17 00:00:00 2001 From: blessedcoolant <54517381+blessedcoolant@users.noreply.github.com> Date: Tue, 10 Jan 2023 12:50:46 +1300 Subject: [PATCH 156/199] Add Model Manager Support for Diffusers --- frontend/public/locales/common/en.json | 1 + .../public/locales/modelmanager/en-US.json | 1 + frontend/public/locales/modelmanager/en.json | 1 + frontend/src/app/invokeai.d.ts | 5 +- .../features/options/store/optionsSlice.ts | 9 + .../ModelManager/AddCheckpointModel.tsx | 328 ++++++++++++++++ .../ModelManager/AddDiffusersModel.tsx | 205 ++++++++++ .../components/ModelManager/AddModel.tsx | 359 +++--------------- ...{ModelEdit.tsx => CheckpointModelEdit.tsx} | 2 +- .../ModelManager/DiffusersModelEdit.tsx | 25 +- .../components/ModelManager/ModelList.tsx | 2 +- .../ModelManager/ModelManagerModal.tsx | 15 +- 12 files changed, 626 insertions(+), 327 deletions(-) create mode 100644 frontend/src/features/system/components/ModelManager/AddCheckpointModel.tsx create mode 100644 frontend/src/features/system/components/ModelManager/AddDiffusersModel.tsx rename frontend/src/features/system/components/ModelManager/{ModelEdit.tsx => CheckpointModelEdit.tsx} (99%) diff --git a/frontend/public/locales/common/en.json b/frontend/public/locales/common/en.json index aed5882710d..b67799e7895 100644 --- a/frontend/public/locales/common/en.json +++ b/frontend/public/locales/common/en.json @@ -34,6 +34,7 @@ "upload": "Upload", "close": "Close", "load": "Load", + "back": "Back", "statusConnected": "Connected", "statusDisconnected": "Disconnected", "statusError": "Error", diff --git a/frontend/public/locales/modelmanager/en-US.json b/frontend/public/locales/modelmanager/en-US.json index aa824590afa..30bb6004490 100644 --- a/frontend/public/locales/modelmanager/en-US.json +++ b/frontend/public/locales/modelmanager/en-US.json @@ -16,6 +16,7 @@ "config": "Config", "configValidationMsg": "Path to the config file of your model.", "modelLocation": "Model Location", + "repo_id": "Repo ID", "modelLocationValidationMsg": "Path to where your model is located.", "vaeLocation": "VAE Location", "vaeLocationValidationMsg": "Path to where your VAE is located.", diff --git a/frontend/public/locales/modelmanager/en.json b/frontend/public/locales/modelmanager/en.json index 820a87f8498..f57d237d11a 100644 --- a/frontend/public/locales/modelmanager/en.json +++ b/frontend/public/locales/modelmanager/en.json @@ -16,6 +16,7 @@ "config": "Config", "configValidationMsg": "Path to the config file of your model.", "modelLocation": "Model Location", + "repo_id": "Repo ID", "modelLocationValidationMsg": "Path to where your model is located.", "vaeLocation": "VAE Location", "vaeLocationValidationMsg": "Path to where your VAE is located.", diff --git a/frontend/src/app/invokeai.d.ts b/frontend/src/app/invokeai.d.ts index 8938e7265cb..d3b027b58c4 100644 --- a/frontend/src/app/invokeai.d.ts +++ b/frontend/src/app/invokeai.d.ts @@ -197,8 +197,11 @@ export declare type InvokeDiffusersModelConfigProps = { name: string | undefined; description: string | undefined; repo_id: string | undefined; - vae: string | undefined; default: boolean | undefined; + format: string | undefined; + vae: { + repo_id: string | undefined; + }; }; /** diff --git a/frontend/src/features/options/store/optionsSlice.ts b/frontend/src/features/options/store/optionsSlice.ts index 6ef16050128..9e38e78f579 100644 --- a/frontend/src/features/options/store/optionsSlice.ts +++ b/frontend/src/features/options/store/optionsSlice.ts @@ -57,6 +57,7 @@ export interface OptionsState { width: number; shouldUseCanvasBetaLayout: boolean; shouldShowExistingModelsInSearch: boolean; + addNewModelUIOption: 'ckpt' | 'diffusers' | null; } const initialOptionsState: OptionsState = { @@ -105,6 +106,7 @@ const initialOptionsState: OptionsState = { width: 512, shouldUseCanvasBetaLayout: false, shouldShowExistingModelsInSearch: false, + addNewModelUIOption: null, }; const initialState: OptionsState = initialOptionsState; @@ -412,6 +414,12 @@ export const optionsSlice = createSlice({ ) => { state.shouldShowExistingModelsInSearch = action.payload; }, + setAddNewModelUIOption: ( + state, + action: PayloadAction<'ckpt' | 'diffusers' | null> + ) => { + state.addNewModelUIOption = action.payload; + }, }, }); @@ -469,6 +477,7 @@ export const { setWidth, setShouldUseCanvasBetaLayout, setShouldShowExistingModelsInSearch, + setAddNewModelUIOption, } = optionsSlice.actions; export default optionsSlice.reducer; diff --git a/frontend/src/features/system/components/ModelManager/AddCheckpointModel.tsx b/frontend/src/features/system/components/ModelManager/AddCheckpointModel.tsx new file mode 100644 index 00000000000..7b996900690 --- /dev/null +++ b/frontend/src/features/system/components/ModelManager/AddCheckpointModel.tsx @@ -0,0 +1,328 @@ +import { + FormControl, + FormErrorMessage, + FormHelperText, + FormLabel, + HStack, + Text, + VStack, +} from '@chakra-ui/react'; + +import React from 'react'; +import IAIInput from 'common/components/IAIInput'; +import IAINumberInput from 'common/components/IAINumberInput'; +import IAICheckbox from 'common/components/IAICheckbox'; +import IAIButton from 'common/components/IAIButton'; + +import SearchModels from './SearchModels'; + +import { addNewModel } from 'app/socketio/actions'; + +import { useAppDispatch, useAppSelector } from 'app/storeHooks'; + +import { Field, Formik } from 'formik'; +import { useTranslation } from 'react-i18next'; + +import type { FieldInputProps, FormikProps } from 'formik'; +import type { RootState } from 'app/store'; +import type { InvokeModelConfigProps } from 'app/invokeai'; +import { setAddNewModelUIOption } from 'features/options/store/optionsSlice'; +import IAIIconButton from 'common/components/IAIIconButton'; +import { BiArrowBack } from 'react-icons/bi'; + +const MIN_MODEL_SIZE = 64; +const MAX_MODEL_SIZE = 2048; + +export default function AddCheckpointModel() { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const isProcessing = useAppSelector( + (state: RootState) => state.system.isProcessing + ); + + function hasWhiteSpace(s: string) { + return /\\s/g.test(s); + } + + function baseValidation(value: string) { + let error; + if (hasWhiteSpace(value)) error = t('modelmanager:cannotUseSpaces'); + return error; + } + + const addModelFormValues: InvokeModelConfigProps = { + name: '', + description: '', + config: 'configs/stable-diffusion/v1-inference.yaml', + weights: '', + vae: '', + width: 512, + height: 512, + format: 'ckpt', + default: false, + }; + + const addModelFormSubmitHandler = (values: InvokeModelConfigProps) => { + dispatch(addNewModel(values)); + dispatch(setAddNewModelUIOption(null)); + }; + + const [addManually, setAddmanually] = React.useState(false); + + return ( + <> + dispatch(setAddNewModelUIOption(null))} + width="max-content" + position="absolute" + zIndex={1} + size="sm" + right={12} + top={3} + icon={} + /> + + + setAddmanually(!addManually)} + /> + + {addManually && ( + + {({ handleSubmit, errors, touched }) => ( +
+ + + {t('modelmanager:manual')} + + {/* Name */} + + + {t('modelmanager:name')} + + + + {!!errors.name && touched.name ? ( + {errors.name} + ) : ( + + {t('modelmanager:nameValidationMsg')} + + )} + + + + {/* Description */} + + + {t('modelmanager:description')} + + + + {!!errors.description && touched.description ? ( + {errors.description} + ) : ( + + {t('modelmanager:descriptionValidationMsg')} + + )} + + + + {/* Config */} + + + {t('modelmanager:config')} + + + + {!!errors.config && touched.config ? ( + {errors.config} + ) : ( + + {t('modelmanager:configValidationMsg')} + + )} + + + + {/* Weights */} + + + {t('modelmanager:modelLocation')} + + + + {!!errors.weights && touched.weights ? ( + {errors.weights} + ) : ( + + {t('modelmanager:modelLocationValidationMsg')} + + )} + + + + {/* VAE */} + + + {t('modelmanager:vaeLocation')} + + + + {!!errors.vae && touched.vae ? ( + {errors.vae} + ) : ( + + {t('modelmanager:vaeLocationValidationMsg')} + + )} + + + + + {/* Width */} + + + {t('modelmanager:width')} + + + + {({ + field, + form, + }: { + field: FieldInputProps; + form: FormikProps; + }) => ( + + form.setFieldValue(field.name, Number(value)) + } + /> + )} + + + {!!errors.width && touched.width ? ( + {errors.width} + ) : ( + + {t('modelmanager:widthValidationMsg')} + + )} + + + + {/* Height */} + + + {t('modelmanager:height')} + + + + {({ + field, + form, + }: { + field: FieldInputProps; + form: FormikProps; + }) => ( + + form.setFieldValue(field.name, Number(value)) + } + /> + )} + + + {!!errors.height && touched.height ? ( + {errors.height} + ) : ( + + {t('modelmanager:heightValidationMsg')} + + )} + + + + + + {t('modelmanager:addModel')} + + +
+ )} +
+ )} + + ); +} diff --git a/frontend/src/features/system/components/ModelManager/AddDiffusersModel.tsx b/frontend/src/features/system/components/ModelManager/AddDiffusersModel.tsx new file mode 100644 index 00000000000..ee7940e5df3 --- /dev/null +++ b/frontend/src/features/system/components/ModelManager/AddDiffusersModel.tsx @@ -0,0 +1,205 @@ +import { + Flex, + FormControl, + FormErrorMessage, + FormHelperText, + FormLabel, + VStack, +} from '@chakra-ui/react'; +import { useAppDispatch, useAppSelector } from 'app/storeHooks'; +import IAIButton from 'common/components/IAIButton'; +import IAIIconButton from 'common/components/IAIIconButton'; +import IAIInput from 'common/components/IAIInput'; +import { setAddNewModelUIOption } from 'features/options/store/optionsSlice'; +import { Field, Formik } from 'formik'; +import React from 'react'; +import { useTranslation } from 'react-i18next'; +import { BiArrowBack } from 'react-icons/bi'; + +import type { RootState } from 'app/store'; +import { InvokeDiffusersModelConfigProps } from 'app/invokeai'; +import { addNewModel } from 'app/socketio/actions'; + +export default function AddDiffusersModel() { + const dispatch = useAppDispatch(); + const { t } = useTranslation(); + + const isProcessing = useAppSelector( + (state: RootState) => state.system.isProcessing + ); + + function hasWhiteSpace(s: string) { + return /\\s/g.test(s); + } + + function baseValidation(value: string) { + let error; + if (hasWhiteSpace(value)) error = t('modelmanager:cannotUseSpaces'); + return error; + } + + const addModelFormValues: InvokeDiffusersModelConfigProps = { + name: '', + description: '', + repo_id: '', + format: 'diffusers', + default: false, + vae: { + repo_id: '', + }, + }; + + const addModelFormSubmitHandler = ( + values: InvokeDiffusersModelConfigProps + ) => { + let diffusersModelToAdd = values; + + if (values.vae.repo_id == '') { + diffusersModelToAdd = { + ...diffusersModelToAdd, + vae: { repo_id: values.repo_id + '/vae' }, + }; + } + + dispatch(addNewModel(diffusersModelToAdd)); + dispatch(setAddNewModelUIOption(null)); + }; + + return ( + + dispatch(setAddNewModelUIOption(null))} + width="max-content" + position="absolute" + zIndex={1} + size="sm" + right={12} + top={3} + icon={} + /> + + {({ handleSubmit, errors, touched }) => ( +
+ + {/* Name */} + + + {t('modelmanager:name')} + + + + {!!errors.name && touched.name ? ( + {errors.name} + ) : ( + + {t('modelmanager:nameValidationMsg')} + + )} + + + + {/* Description */} + + + {t('modelmanager:description')} + + + + {!!errors.description && touched.description ? ( + {errors.description} + ) : ( + + {t('modelmanager:descriptionValidationMsg')} + + )} + + + + {/* Repo ID */} + + + {t('modelmanager:modelLocation')} /{' '} + {t('modelmanager:repo_id')} + + + + {!!errors.repo_id && touched.repo_id ? ( + {errors.repo_id} + ) : ( + + {t('modelmanager:modelLocationValidationMsg')} + + )} + + + + {/* VAE */} + + + {t('modelmanager:vaeLocation')} + + + + {!!errors.vae?.repo_id && touched.vae?.repo_id ? ( + {errors.vae?.repo_id} + ) : ( + + {t('modelmanager:vaeLocationValidationMsg')} + + )} + + + + + {t('modelmanager:addModel')} + + +
+ )} +
+
+ ); +} diff --git a/frontend/src/features/system/components/ModelManager/AddModel.tsx b/frontend/src/features/system/components/ModelManager/AddModel.tsx index dac2e2cc34d..3447f3a1a0e 100644 --- a/frontend/src/features/system/components/ModelManager/AddModel.tsx +++ b/frontend/src/features/system/components/ModelManager/AddModel.tsx @@ -1,10 +1,5 @@ import { Flex, - FormControl, - FormErrorMessage, - FormHelperText, - FormLabel, - HStack, Modal, ModalBody, ModalCloseButton, @@ -13,73 +8,63 @@ import { ModalOverlay, Text, useDisclosure, - VStack, } from '@chakra-ui/react'; import React from 'react'; -import IAIInput from 'common/components/IAIInput'; -import IAINumberInput from 'common/components/IAINumberInput'; -import IAICheckbox from 'common/components/IAICheckbox'; import IAIButton from 'common/components/IAIButton'; -import SearchModels from './SearchModels'; - -import { addNewModel } from 'app/socketio/actions'; - -import { useAppDispatch, useAppSelector } from 'app/storeHooks'; import { FaPlus } from 'react-icons/fa'; -import { Field, Formik } from 'formik'; + import { useTranslation } from 'react-i18next'; +import { useAppDispatch, useAppSelector } from 'app/storeHooks'; -import type { FieldInputProps, FormikProps } from 'formik'; import type { RootState } from 'app/store'; -import type { InvokeModelConfigProps } from 'app/invokeai'; - -const MIN_MODEL_SIZE = 64; -const MAX_MODEL_SIZE = 2048; +import { setAddNewModelUIOption } from 'features/options/store/optionsSlice'; +import AddCheckpointModel from './AddCheckpointModel'; +import AddDiffusersModel from './AddDiffusersModel'; + +function AddModelBox({ + text, + onClick, +}: { + text: string; + onClick?: () => void; +}) { + return ( + + {text} + + ); +} export default function AddModel() { const { isOpen, onOpen, onClose } = useDisclosure(); - const dispatch = useAppDispatch(); - const { t } = useTranslation(); - const isProcessing = useAppSelector( - (state: RootState) => state.system.isProcessing + const addNewModelUIOption = useAppSelector( + (state: RootState) => state.options.addNewModelUIOption ); - function hasWhiteSpace(s: string) { - return /\\s/g.test(s); - } - - function baseValidation(value: string) { - let error; - if (hasWhiteSpace(value)) error = t('modelmanager:cannotUseSpaces'); - return error; - } - - const addModelFormValues: InvokeModelConfigProps = { - name: '', - description: '', - config: 'configs/stable-diffusion/v1-inference.yaml', - weights: '', - vae: '', - width: 512, - height: 512, - format: 'ckpt', - default: false, - }; + const dispatch = useAppDispatch(); - const addModelFormSubmitHandler = (values: InvokeModelConfigProps) => { - dispatch(addNewModel(values)); - onClose(); - }; + const { t } = useTranslation(); const addModelModalClose = () => { onClose(); }; - const [addManually, setAddmanually] = React.useState(false); - return ( <> - + {t('modelmanager:addNewModel')} - + - - setAddmanually(!addManually)} - /> - - {addManually && ( - - {({ handleSubmit, errors, touched }) => ( -
- - - {t('modelmanager:manual')} - - {/* Name */} - - - {t('modelmanager:name')} - - - - {!!errors.name && touched.name ? ( - {errors.name} - ) : ( - - {t('modelmanager:nameValidationMsg')} - - )} - - - - {/* Description */} - - - {t('modelmanager:description')} - - - - {!!errors.description && touched.description ? ( - - {errors.description} - - ) : ( - - {t('modelmanager:descriptionValidationMsg')} - - )} - - - - {/* Config */} - - - {t('modelmanager:config')} - - - - {!!errors.config && touched.config ? ( - {errors.config} - ) : ( - - {t('modelmanager:configValidationMsg')} - - )} - - - - {/* Weights */} - - - {t('modelmanager:modelLocation')} - - - - {!!errors.weights && touched.weights ? ( - - {errors.weights} - - ) : ( - - {t('modelmanager:modelLocationValidationMsg')} - - )} - - - - {/* VAE */} - - - {t('modelmanager:vaeLocation')} - - - - {!!errors.vae && touched.vae ? ( - {errors.vae} - ) : ( - - {t('modelmanager:vaeLocationValidationMsg')} - - )} - - - - - {/* Width */} - - - {t('modelmanager:width')} - - - - {({ - field, - form, - }: { - field: FieldInputProps; - form: FormikProps; - }) => ( - - form.setFieldValue( - field.name, - Number(value) - ) - } - /> - )} - - - {!!errors.width && touched.width ? ( - - {errors.width} - - ) : ( - - {t('modelmanager:widthValidationMsg')} - - )} - - - - {/* Height */} - - - {t('modelmanager:height')} - - - - {({ - field, - form, - }: { - field: FieldInputProps; - form: FormikProps; - }) => ( - - form.setFieldValue( - field.name, - Number(value) - ) - } - /> - )} - - - {!!errors.height && touched.height ? ( - - {errors.height} - - ) : ( - - {t('modelmanager:heightValidationMsg')} - - )} - - - - - - {t('modelmanager:addModel')} - - -
- )} -
+ {addNewModelUIOption == null && ( + + dispatch(setAddNewModelUIOption('ckpt'))} + /> + dispatch(setAddNewModelUIOption('diffusers'))} + /> + )} + {addNewModelUIOption == 'ckpt' && } + {addNewModelUIOption == 'diffusers' && }
diff --git a/frontend/src/features/system/components/ModelManager/ModelEdit.tsx b/frontend/src/features/system/components/ModelManager/CheckpointModelEdit.tsx similarity index 99% rename from frontend/src/features/system/components/ModelManager/ModelEdit.tsx rename to frontend/src/features/system/components/ModelManager/CheckpointModelEdit.tsx index 5d816e170e6..8999f71281b 100644 --- a/frontend/src/features/system/components/ModelManager/ModelEdit.tsx +++ b/frontend/src/features/system/components/ModelManager/CheckpointModelEdit.tsx @@ -48,7 +48,7 @@ const selector = createSelector( const MIN_MODEL_SIZE = 64; const MAX_MODEL_SIZE = 2048; -export default function ModelEdit() { +export default function CheckpointModelEdit() { const { openModel, model_list } = useAppSelector(selector); const isProcessing = useAppSelector( (state: RootState) => state.system.isProcessing diff --git a/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx b/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx index f9de0faa691..6dc0d7fbbcb 100644 --- a/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx +++ b/frontend/src/features/system/components/ModelManager/DiffusersModelEdit.tsx @@ -57,8 +57,9 @@ export default function DiffusersModelEdit() { name: '', description: '', repo_id: '', - vae: '', + vae: { repo_id: '' }, default: false, + format: 'diffusers', }); useEffect(() => { @@ -66,14 +67,18 @@ export default function DiffusersModelEdit() { const retrievedModel = _.pickBy(model_list, (val, key) => { return _.isEqual(key, openModel); }); + setEditModelFormValues({ name: openModel, description: retrievedModel[openModel]?.description, repo_id: retrievedModel[openModel]?.repo_id, - vae: retrievedModel[openModel]?.vae - ? retrievedModel[openModel]?.vae - : '', + vae: { + repo_id: retrievedModel[openModel]?.vae + ? retrievedModel[openModel]?.vae + : '', + }, default: retrievedModel[openModel]?.default, + format: 'diffusers', }); } }, [model_list, openModel]); @@ -158,20 +163,22 @@ export default function DiffusersModelEdit() { {/* VAE */} - + {t('modelmanager:vaeLocation')} - {!!errors.vae && touched.vae ? ( - {errors.vae} + {!!errors.vae?.repo_id && touched.vae?.repo_id ? ( + {errors.vae?.repo_id} ) : ( {t('modelmanager:vaeLocationValidationMsg')} diff --git a/frontend/src/features/system/components/ModelManager/ModelList.tsx b/frontend/src/features/system/components/ModelManager/ModelList.tsx index fa607bb503f..f335e4b23ce 100644 --- a/frontend/src/features/system/components/ModelManager/ModelList.tsx +++ b/frontend/src/features/system/components/ModelManager/ModelList.tsx @@ -43,7 +43,7 @@ const ModelList = () => { const filteredModelListItemsToRender: ReactNode[] = []; models.forEach((model, i) => { - if (model.name.startsWith(searchText)) { + if (model.name.toLowerCase().startsWith(searchText.toLowerCase())) { filteredModelListItemsToRender.push( - + {t('modelmanager:modelManager')} ) : ( - + )} From ee03e984f62d89a29dc2d551be7a3caeed9ace16 Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 10 Jan 2023 03:02:27 +0000 Subject: [PATCH 157/199] fix various problems with model manager - in cli import functions, fix not enough values to unpack from _get_name_and_desc() - fix crash when using old-style vae: value with new-style diffuser --- ldm/invoke/CLI.py | 10 +++++++--- ldm/invoke/model_manager.py | 4 +++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ldm/invoke/CLI.py b/ldm/invoke/CLI.py index 1db1b4f6927..bddc691ee00 100644 --- a/ldm/invoke/CLI.py +++ b/ldm/invoke/CLI.py @@ -596,7 +596,7 @@ def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: manager = gen.model_manager default_name = Path(path_or_repo).stem default_description = f'Imported model {default_name}' - model_name, model_description = _get_model_name_and_desc( + model_name, model_description, default = _get_model_name_and_desc( manager, completer, model_name=default_name, @@ -609,13 +609,15 @@ def import_diffuser_model(path_or_repo:str, gen, opt, completer)->str: description = model_description): print('** model failed to import') return None + if default: + manager.set_default_model(model_name) return model_name def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: manager = gen.model_manager default_name = Path(path_or_url).stem default_description = f'Imported model {default_name}' - model_name, model_description = _get_model_name_and_desc( + model_name, model_description, default = _get_model_name_and_desc( manager, completer, model_name=default_name, @@ -640,6 +642,8 @@ def import_ckpt_model(path_or_url:str, gen, opt, completer)->str: print('** model failed to import') return None + if default: + manager.set_model_default(model_name) return model_name def _verify_load(model_name:str, gen)->bool: @@ -676,7 +680,7 @@ def optimize_model(model_name_or_path:str, gen, opt, completer): return elif os.path.exists(model_name_or_path): ckpt_path = Path(model_name_or_path) - model_name,model_description = _get_model_name_and_desc( + model_name,model_description, default = _get_model_name_and_desc( manager, completer, ckpt_path.stem, diff --git a/ldm/invoke/model_manager.py b/ldm/invoke/model_manager.py index 442bc848f86..212d71df39d 100644 --- a/ldm/invoke/model_manager.py +++ b/ldm/invoke/model_manager.py @@ -196,7 +196,9 @@ def list_models(self) -> dict: # Diffusers Config Parse if format == 'diffusers': repo_id = stanza.get('repo_id', None) - vae = stanza.get('vae', None)['repo_id'] + vae = None + if 'vae' in stanza: + vae = stanza['vae'].get('repo_id',None) models[name].update( description = description, format = format, From 7f867ab2d7315e12fabe5aa6b74c9e1ffa4462eb Mon Sep 17 00:00:00 2001 From: Lincoln Stein Date: Tue, 10 Jan 2023 03:09:23 +0000 Subject: [PATCH 158/199] rebuild frontend --- ...y-5c5a479d.js => index-legacy-474a75fe.js} | 8 +- .../{index.ec2d89c6.js => index.1b59e83a.js} | 98 +++++++++---------- frontend/dist/index.html | 4 +- 3 files changed, 55 insertions(+), 55 deletions(-) rename frontend/dist/assets/{index-legacy-5c5a479d.js => index-legacy-474a75fe.js} (78%) rename frontend/dist/assets/{index.ec2d89c6.js => index.1b59e83a.js} (65%) diff --git a/frontend/dist/assets/index-legacy-5c5a479d.js b/frontend/dist/assets/index-legacy-474a75fe.js similarity index 78% rename from frontend/dist/assets/index-legacy-5c5a479d.js rename to frontend/dist/assets/index-legacy-474a75fe.js index 3babd3134aa..b48c03f8f8b 100644 --- a/frontend/dist/assets/index-legacy-5c5a479d.js +++ b/frontend/dist/assets/index-legacy-474a75fe.js @@ -18,7 +18,7 @@ * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. */ -var Y=a.exports,Z=G.exports;function X(e){for(var t="https://reactjs.org/docs/error-decoder.html?invariant="+e,n=1;n