diff --git a/examples/textual_inversion/README.md b/examples/textual_inversion/README.md
index 0a2723f0982f..982141049b1c 100644
--- a/examples/textual_inversion/README.md
+++ b/examples/textual_inversion/README.md
@@ -94,6 +94,12 @@ to a number larger than one, *e.g.*:
 --num_vectors 5
 ```
 
+**CPU**: If you run on Intel Gen 4th Xeon (and later), use ipex and bf16 will get a significant acceleration.
+You need to add `--mixed_precision="bf16"` and `--use_ipex` in the command and install the following package:
+```
+pip install intel-extension-for-pytorch
+```
+
 The saved textual inversion vectors will then be larger in size compared to the default case.
 
 ### Inference
diff --git a/examples/textual_inversion/textual_inversion.py b/examples/textual_inversion/textual_inversion.py
index 50bcc992064d..1bb49f29af96 100644
--- a/examples/textual_inversion/textual_inversion.py
+++ b/examples/textual_inversion/textual_inversion.py
@@ -341,7 +341,14 @@ def parse_args():
         help=(
             "Whether to use mixed precision. Choose"
             "between fp16 and bf16 (bfloat16). Bf16 requires PyTorch >= 1.10."
-            "and an Nvidia Ampere GPU."
+            "and an Intel Gen 4th Xeon (and later) or Nvidia Ampere GPU."
+        ),
+    )
+    parser.add_argument(
+        "--use_ipex",
+        action="store_true",
+        help=(
+            "Whether or not to use ipex to accelerate the training process," "requires Intel Gen 3rd Xeon (and later)"
         ),
     )
     parser.add_argument(
@@ -779,6 +786,12 @@ def main():
     unet.to(accelerator.device, dtype=weight_dtype)
     vae.to(accelerator.device, dtype=weight_dtype)
 
+    if args.use_ipex:
+        import intel_extension_for_pytorch as ipex
+
+        unet = ipex.optimize(unet, dtype=weight_dtype)
+        vae = ipex.optimize(vae, dtype=weight_dtype)
+
     # We need to recalculate our total training steps as the size of the training dataloader may have changed.
     num_update_steps_per_epoch = math.ceil(len(train_dataloader) / args.gradient_accumulation_steps)
     if overrode_max_train_steps: