From 66ed2ce008729747ed9abc3beed992152fab2c71 Mon Sep 17 00:00:00 2001 From: Vadim Markovtsev Date: Fri, 7 Jul 2023 21:45:38 +0200 Subject: [PATCH] Optimize the gradient step - Avoid extra memory allocations - 9x faster if compiled under Inductor --- sophia.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sophia.py b/sophia.py index 5c5e3f9..59b9576 100644 --- a/sophia.py +++ b/sophia.py @@ -191,12 +191,11 @@ def _single_tensor_sophiag(params: List[Tensor], step = step_t step_size = lr step_size_neg = step_size.neg() - - ratio = (exp_avg.abs() / (rho * bs * hess + 1e-15)).clamp(None,1) - param.addcmul_(exp_avg.sign(), ratio, value=step_size_neg) else: step = step_t.item() - step_size_neg = - lr - - ratio = (exp_avg.abs() / (rho * bs * hess + 1e-15)).clamp(None,1) - param.addcmul_(exp_avg.sign(), ratio, value=step_size_neg) \ No newline at end of file + step_size_neg = - lr + + ratio = rho * bs * hess + 1e-15 + torch.div(exp_avg, ratio, out=ratio) + torch.clamp_(ratio, -1, 1) + param.add_(ratio, alpha=step_size_neg)