diff --git a/colossalai/legacy/engine/_base_engine.py b/colossalai/legacy/engine/_base_engine.py index a4e90b218489..9a1a2dc325a3 100644 --- a/colossalai/legacy/engine/_base_engine.py +++ b/colossalai/legacy/engine/_base_engine.py @@ -157,7 +157,7 @@ def step(self): """Execute parameter update """ self._all_reduce_gradients() - self.optimizer.clip_grad_norm(self.model, self._clip_grad_norm) + self.optimizer.clip_grad_by_norm(self._clip_grad_norm) return self.optimizer.step() def backward(self, loss: Tensor): diff --git a/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py b/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py index 1f91286a6656..c2270dc53a50 100644 --- a/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py +++ b/colossalai/legacy/engine/gradient_accumulation/_gradient_accumulation.py @@ -74,7 +74,7 @@ def clip_grad_norm(self, model: nn.Module, max_norm: float) -> None: if self.accumulate_step < self.accumulate_size: pass else: - self.optim.clip_grad_norm(model, max_norm) + self.optim.clip_grad_by_norm(max_norm) def backward(self, loss: Tensor) -> None: """Execute backward pass.