Skip to content

Commit

Permalink
Use rank from tracked process group
Browse files Browse the repository at this point in the history
  • Loading branch information
jonb377 committed Oct 12, 2023
1 parent 273ef2f commit ad02d56
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion torch_xla/experimental/distributed_checkpoint/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def _release_oldest_checkpoints(self):
Delete oldest checkpoints until the number of tracked checkpoints is below
self.max_to_keep. This operation is only execution on the rank 0 process.
"""
if dist.get_rank() == 0 and self.max_to_keep > 0:
if dist.get_rank(self.pg) == 0 and self.max_to_keep > 0:
while len(self._tracked_chkpts) > self.max_to_keep:
oldest_chkpt = self._tracked_chkpts.popleft()
self._delete_chkpt_at_step(oldest_chkpt.step)
Expand Down

0 comments on commit ad02d56

Please sign in to comment.