Skip to content
This repository has been archived by the owner on Jan 31, 2024. It is now read-only.

[WIP] Better exit file handling #118

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions ipi/engine/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,12 +311,11 @@ def run(self):
#info(" # MD diagnostics: V: %10.5e Kcv: %10.5e Ecns: %10.5e" %
# (self.properties["potential"], self.properties["kinetic_cv"], self.properties["conserved"] ) )

if os.path.exists("EXIT"):
info(" # EXIT file detected! Bye bye!", verbosity.low)
break

if (self.ttime > 0) and (time.time() - simtime > self.ttime):
info(" # Wall clock time expired! Bye bye!", verbosity.low)
break
# Check for exit files also here. This can have shorter latency
# than the regular check and also can stop at specific step.
# TODO: This does not continue smoothly from the RESTART when EXIT
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Were these "not smoothly continuations" the possible remaining issues, that you wanted to test further?

# is found, but EXIT_step seems fine.
softexit.check_exit_file()
softexit.check_exit_file("EXIT_{:d}".format(self.step))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should there also be a timeout check?


self.rollback = False
23 changes: 15 additions & 8 deletions ipi/utils/softexit.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def trigger(self, message=""):

sys.exit()


def start(self, timeout=0.0):
"""Starts the softexit monitoring loop.

Expand All @@ -121,6 +120,19 @@ def start(self, timeout=0.0):
self._thread.start()
self.register_thread(self._thread, self._doloop)

def check_exit_file(self, fn_exit="EXIT"):
"""Trigger an early exit if the exit file `fn_exit` exists."""

if os.path.exists(fn_exit):
os.remove(fn_exit)
self.trigger(" @SOFTEXIT: Exit file detected: {:s}".format(fn_exit))

def check_timeout(self):
"""Trigger an early exit if maximum wall clock run time has elapsed."""

if (self.timeout > 0) and (self.timeout < time.time()):
self.trigger(" @SOFTEXIT: Maximum wallclock time elapsed.")

def _kill_handler(self, signal, frame):
"""Deals with handling a kill call gracefully.

Expand Down Expand Up @@ -149,13 +161,8 @@ def _softexit_monitor(self):

while self._doloop[0]:
time.sleep(SOFTEXITLATENCY)
if os.path.exists("EXIT"):
self.trigger(" @SOFTEXIT: EXIT file detected.")
break

if (self.timeout>0 and self.timeout<time.time()):
self.trigger(" @SOFTEXIT: Maximum wallclock time elapsed.")
break
self.check_exit_file()
self.check_timeout()


softexit = Softexit()