diff --git a/examples/textual_inversion/README.md b/examples/textual_inversion/README.md index 0a2723f0982f..982141049b1c 100644 --- a/examples/textual_inversion/README.md +++ b/examples/textual_inversion/README.md @@ -94,6 +94,12 @@ to a number larger than one, *e.g.*: --num_vectors 5 ``` +**CPU**: If you run on Intel Gen 4th Xeon (and later), use ipex and bf16 will get a significant acceleration. +You need to add `--mixed_precision="bf16"` and `--use_ipex` in the command and install the following package: +``` +pip install intel-extension-for-pytorch +``` + The saved textual inversion vectors will then be larger in size compared to the default case. ### Inference diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py index 50bcc992064d..1bb49f29af96 100644 --- a/examples/textual_inversion/textual_inversion.py +++ b/examples/textual_inversion/textual_inversion.py @@ -341,7 +341,14 @@ def parse_args(): help=( "Whether to use mixed precision. Choose" "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10." - "and an Nvidia Ampere GPU." + "and an Intel Gen 4th Xeon (and later) or Nvidia Ampere GPU." + ), + ) + parser.add_argument( + "--use_ipex", + action="store_true", + help=( + "Whether or not to use ipex to accelerate the training process," "requires Intel Gen 3rd Xeon (and later)" ), ) parser.add_argument( @@ -779,6 +786,12 @@ def main(): unet.to(accelerator.device, dtype=weight_dtype) vae.to(accelerator.device, dtype=weight_dtype) + if args.use_ipex: + import intel_extension_for_pytorch as ipex + + unet = ipex.optimize(unet, dtype=weight_dtype) + vae = ipex.optimize(vae, dtype=weight_dtype) + # We need to recalculate our total training steps as the size of the training dataloader may have changed. num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps) if overrode_max_train_steps: