clip gradients as in paper and also make sure not to do gradient sync…

… until last step
lucidrains · May 25, 2024 · 936210c · 936210c
1 parent 422efba
commit 936210c
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/alphafold3_pytorch/trainer.py b/alphafold3_pytorch/trainer.py
@@ -158,15 +158,21 @@ def __call__(
         steps = 0
 
         while steps < self.num_train_steps:
-            for _ in range(self.grad_accum_every):
+
+            for grad_accum_step in range(self.grad_accum_every):
+                is_accumulating = grad_accum_step < (self.grad_accum_every - 1)
+
                 inputs = next(dl)
 
-                loss = self.model(**inputs)
+                with self.fabric.no_backward_sync(self.model, enabled = is_accumulating):
+                    loss = self.model(**inputs)
 
                 self.fabric.backward(loss / self.grad_accum_every)
 
             print(f'loss: {loss.item():.3f}')
 
+            self.fabric.clip_gradients(self.model, self.optimizer, max_norm = self.clip_grad_norm)
+
             self.optimizer.step()
 
             if self.is_main:

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "alphafold3-pytorch"
-version = "0.0.39"
+version = "0.0.40"
 description = "Alphafold 3 - Pytorch"
 authors = [
     { name = "Phil Wang", email = "[email protected]" }