fix: huber_schedule exponential not working on sd3_train.py

kohya-ss · Dec 1, 2024 · 14f642f · 14f642f
1 parent a5a27fe
commit 14f642f
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 6 deletions.
diff --git a/library/train_util.py b/library/train_util.py
@@ -5875,7 +5875,7 @@ def get_huber_threshold(args, timesteps: torch.Tensor, noise_scheduler) -> torch
         alpha = -math.log(args.huber_c) / noise_scheduler.config.num_train_timesteps
         result = torch.exp(-alpha * timesteps) * args.huber_scale
     elif args.huber_schedule == "snr":
-        if noise_scheduler is None or not hasattr(noise_scheduler, "alphas_cumprod"):
+        if not hasattr(noise_scheduler, "alphas_cumprod"):
             raise NotImplementedError("Huber schedule 'snr' is not supported with the current model.")
         alphas_cumprod = torch.index_select(noise_scheduler.alphas_cumprod, 0, timesteps.cpu())
         sigmas = ((1.0 - alphas_cumprod) / alphas_cumprod) ** 0.5

diff --git a/sd3_train.py b/sd3_train.py
@@ -675,8 +675,8 @@ def grad_hook(parameter: torch.Tensor):
     progress_bar = tqdm(range(args.max_train_steps), smoothing=0, disable=not accelerator.is_local_main_process, desc="steps")
     global_step = 0
 
-    # noise_scheduler = sd3_train_utils.FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0)
-    # noise_scheduler_copy = copy.deepcopy(noise_scheduler)
+    # only used to get timesteps, etc. TODO manage timesteps etc. separately
+    dummy_scheduler = sd3_train_utils.FlowMatchEulerDiscreteScheduler(num_train_timesteps=1000, shift=3.0)
 
     if accelerator.is_main_process:
         init_kwargs = {}
@@ -844,9 +844,7 @@ def grad_hook(parameter: torch.Tensor):
                 #     1,
                 # )
                 # calculate loss
-                loss = train_util.conditional_loss(
-                    args, model_pred.float(), target.float(), timesteps, "none", None
-                )
+                loss = train_util.conditional_loss(args, model_pred.float(), target.float(), timesteps, "none", dummy_scheduler)
                 if args.masked_loss or ("alpha_masks" in batch and batch["alpha_masks"] is not None):
                     loss = apply_masked_loss(loss, batch)
                 loss = loss.mean([1, 2, 3])