From 82e867769026432dc2ce0082a435679f2abe0d66 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 29 Nov 2024 16:44:43 -0800 Subject: [PATCH] Make LaProp weight decay match typical PyTorch 'decoupled' behaviour where it's scaled by LR --- timm/optim/laprop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/timm/optim/laprop.py b/timm/optim/laprop.py index cdb30f587..a17c81e6a 100644 --- a/timm/optim/laprop.py +++ b/timm/optim/laprop.py @@ -116,6 +116,6 @@ def step(self, closure=None): p.add_(exp_avg, alpha=-step_size) if group['weight_decay'] != 0: - p.add_(p, alpha=-group['weight_decay']) + p.add_(p, alpha=-(group['lr'] * group['weight_decay'])) return loss \ No newline at end of file