Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Render a root-cause exception for dependency and join errors #3717

Open
wants to merge 33 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
ce429bb
Print a root-cause exception for DependencyError
benclifford Dec 4, 2024
f1eaa9b
Merge branch 'master' into benc-dependency-error-rendering
benclifford Jan 8, 2025
c3e0cd8
Merge remote-tracking branch 'origin/master' into benc-dependency-err…
benclifford Jan 11, 2025
d81eea0
WIP, passes tests
benclifford Jan 11, 2025
c0866ee
add indication of other causes
benclifford Jan 11, 2025
e5131d7
a couple of type-driven refactors
benclifford Jan 11, 2025
e4f0059
fix a traversal bug
benclifford Jan 11, 2025
0d70406
tidy up exception text a bit
benclifford Jan 11, 2025
8f4825f
use type based detection of task record
benclifford Jan 11, 2025
f6654b0
bugfix use of generated task id string
benclifford Jan 11, 2025
3dab4ea
fiddle docstring
benclifford Jan 11, 2025
784d4fe
flesh out TODO for PR
benclifford Jan 11, 2025
e673d38
task ID is not optional - a datafuture is always from a parsl task
benclifford Jan 11, 2025
b73d745
render data future differently
benclifford Jan 11, 2025
2421957
messing with types and docstrings of DataFuture
benclifford Jan 11, 2025
69ce905
more docstring fiddling
benclifford Jan 11, 2025
274f2ce
remove an accidentally introduced comment on merging from 2020
benclifford Jan 11, 2025
e975fdc
remove field that is unused
benclifford Jan 11, 2025
8a7b5d8
remove an if typechecking, and fix resultant circular import
benclifford Jan 11, 2025
007bfa2
remove another bad field
benclifford Jan 11, 2025
97d3693
this should be a str, but I need to test where it appears to see if t…
benclifford Jan 11, 2025
8e2b488
another repr that probably should be str
benclifford Jan 11, 2025
70fd038
another repr
benclifford Jan 11, 2025
7987c24
another repr - see #2025
benclifford Jan 11, 2025
83552cb
exception text should not describe what it thinks is being done with …
benclifford Jan 11, 2025
6558b69
bugfix
benclifford Jan 12, 2025
76cb0ca
isort
benclifford Jan 12, 2025
d9d9f2a
Merge branch 'master' into benc-dependency-error-rendering
benclifford Jan 13, 2025
066fce0
Merge remote-tracking branch 'origin/master' into benc-dependency-err…
benclifford Jan 14, 2025
991d01e
Merge remote-tracking branch 'refs/remotes/origin/benc-dependency-err…
benclifford Jan 14, 2025
4c027d4
Merge remote-tracking branch 'origin/master' into benc-dependency-err…
benclifford Jan 14, 2025
faf4795
revert import changes
benclifford Jan 14, 2025
e269929
Add tests that __cause__ is populated from the right exception
benclifford Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 60 additions & 18 deletions parsl/dataflow/errors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Optional, Sequence, Tuple
from typing import List, Sequence, Tuple

from parsl.errors import ParslError

Expand Down Expand Up @@ -29,35 +29,77 @@ def __str__(self) -> str:
return self.reason


class DependencyError(DataFlowException):
"""Error raised if an app cannot run because there was an error
in a dependency.
class PropagatedException(DataFlowException):
"""Error raised if an app fails because there was an error
in a related task. This is intended to be subclassed for
dependency and join_app errors.

Args:
- dependent_exceptions_tids: List of exceptions and identifiers for
dependencies which failed. The identifier might be a task ID or
the repr of a non-DFK Future.
- dependent_exceptions_tids: List of exceptions and brief descriptions
for dependencies which failed. The description might be a task ID or
the repr of a non-AppFuture.
- task_id: Task ID of the task that failed because of the dependency error
"""

def __init__(self, dependent_exceptions_tids: Sequence[Tuple[Exception, str]], task_id: int) -> None:
def __init__(self,
dependent_exceptions_tids: Sequence[Tuple[BaseException, str]],
task_id: int,
*,
failure_description: str) -> None:
self.dependent_exceptions_tids = dependent_exceptions_tids
self.task_id = task_id
self._failure_description = failure_description

(cause, cause_sequence) = self._find_any_root_cause()
self.__cause__ = cause
self._cause_sequence = cause_sequence

def __str__(self) -> str:
deps = ", ".join(tid for _exc, tid in self.dependent_exceptions_tids)
return f"Dependency failure for task {self.task_id} with failed dependencies from {deps}"
sequence_text = " <- ".join(self._cause_sequence)
return f"{self._failure_description} for task {self.task_id}. " \
f"The representative cause is via {sequence_text}"

def _find_any_root_cause(self) -> Tuple[BaseException, List[str]]:
"""Looks recursively through self.dependent_exceptions_tids to find
an exception that caused this propagated error, that is not itself
a propagated error.
"""
e: BaseException = self
dep_ids = []
while isinstance(e, PropagatedException) and len(e.dependent_exceptions_tids) >= 1:
id_txt = e.dependent_exceptions_tids[0][1]
assert isinstance(id_txt, str)
# if there are several causes for this exception, label that
# there are more so that we know that the representative fail
# sequence is not the full story.
if len(e.dependent_exceptions_tids) > 1:
id_txt += " (+ others)"
dep_ids.append(id_txt)
e = e.dependent_exceptions_tids[0][0]
return e, dep_ids


class DependencyError(PropagatedException):
"""Error raised if an app cannot run because there was an error
in a dependency. There can be several exceptions (one from each
dependency) and DependencyError collects them all together.

Args:
- dependent_exceptions_tids: List of exceptions and brief descriptions
for dependencies which failed. The description might be a task ID or
the repr of a non-AppFuture.
- task_id: Task ID of the task that failed because of the dependency error
"""
def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, str]], task_id: int) -> None:
super().__init__(dependent_exceptions_tids, task_id,
failure_description="Dependency failure")

class JoinError(DataFlowException):

class JoinError(PropagatedException):
"""Error raised if apps joining into a join_app raise exceptions.
There can be several exceptions (one from each joining app),
and JoinError collects them all together.
"""
def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, Optional[str]]], task_id: int) -> None:
self.dependent_exceptions_tids = dependent_exceptions_tids
self.task_id = task_id

def __str__(self) -> str:
dep_tids = [tid for (exception, tid) in self.dependent_exceptions_tids]
return "Join failure for task {} with failed join dependencies from tasks {}".format(self.task_id, dep_tids)
def __init__(self, dependent_exceptions_tids: Sequence[Tuple[BaseException, str]], task_id: int) -> None:
super().__init__(dependent_exceptions_tids, task_id,
failure_description="Join failure")
6 changes: 6 additions & 0 deletions parsl/tests/test_python_apps/test_fail.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@ def test_fail_sequence_first():
assert isinstance(t_final.exception().dependent_exceptions_tids[0][0], DependencyError)
assert t_final.exception().dependent_exceptions_tids[0][1].startswith("task ")

assert hasattr(t_final.exception(),'__cause__')
assert t_final.exception().__cause__ == t1.exception()


def test_fail_sequence_middle():
t1 = random_fail(fail_prob=0)
Expand All @@ -50,3 +53,6 @@ def test_fail_sequence_middle():

assert len(t_final.exception().dependent_exceptions_tids) == 1
assert isinstance(t_final.exception().dependent_exceptions_tids[0][0], ManufacturedTestFailure)

assert hasattr(t_final.exception(),'__cause__')
assert t_final.exception().__cause__ == t2.exception()
Loading