From 2d65f470d54986f2df542cb8332e31113c04112d Mon Sep 17 00:00:00 2001 From: NanoCode012 Date: Fri, 2 Feb 2024 03:55:05 +0900 Subject: [PATCH] fix(model): apply gate fp32 only for mixtral (#1241) * fix(model): apply gate fp32 only for mixtral * Update src/axolotl/utils/models.py * fix gate layer check --------- Co-authored-by: Wing Lian --- src/axolotl/utils/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/axolotl/utils/models.py b/src/axolotl/utils/models.py index 224e3a258b..52a81ea2c0 100644 --- a/src/axolotl/utils/models.py +++ b/src/axolotl/utils/models.py @@ -676,7 +676,7 @@ def load_model( if not cfg.fsdp: # FSDP doesn't like mixed Float and BFloat16 for name, module in model.named_modules(): - if any(m in name for m in ["norm", "gate"]): + if "norm" in name or name.endswith(".gate"): module.to(torch.float32) if model_config.model_type == "btlm": # don't upcast lm_head for btlm