Merge pull request #114 from zhang-haojie/main

Fix pytorch lightning training
Vchitect · Aug 22, 2024 · 1b74d1e · 1b74d1e
2 parents 18fc772 + 8041536
commit 1b74d1e
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 4 deletions.
diff --git a/train_pl.py b/train_pl.py
@@ -121,7 +121,7 @@ def on_save_checkpoint(self, checkpoint):
         epoch = self.trainer.current_epoch
         step = self.trainer.global_step
         checkpoint = {
-            "model": self.model.module.state_dict(),
+            "model": self.model.state_dict(),
             "ema": self.ema.state_dict(),
         }
         torch.save(checkpoint, f"{checkpoint_dir}/epoch{epoch}-step{step}.ckpt")
@@ -221,7 +221,7 @@ def main(args):
     # Trainer
     trainer = Trainer(
         accelerator="gpu",
-        devices=[3],    # Specify GPU ids
+        # devices=[3],    # Specify GPU ids
         strategy="auto",
         max_epochs=num_train_epochs,
         logger=tb_logger,

diff --git a/train_with_img_pl.py b/train_with_img_pl.py
@@ -131,7 +131,7 @@ def on_save_checkpoint(self, checkpoint):
         epoch = self.trainer.current_epoch
         step = self.trainer.global_step
         checkpoint = {
-            "model": self.model.module.state_dict(),
+            "model": self.model.state_dict(),
             "ema": self.ema.state_dict(),
         }
         torch.save(checkpoint, f"{checkpoint_dir}/epoch{epoch}-step{step}.ckpt")
@@ -231,7 +231,7 @@ def main(args):
     # Trainer
     trainer = Trainer(
         accelerator="gpu",
-        devices=[3],    # Specify GPU ids
+        # devices=[3],    # Specify GPU ids
         strategy="auto",
         max_epochs=num_train_epochs,
         logger=tb_logger,