Skip to content

Commit

Permalink
Merge pull request #700 from opensafely-core/stale-codelist-not-inter…
Browse files Browse the repository at this point in the history
…nal-error

Add StatusCode.STALE_CODELISTS
  • Loading branch information
bloodearnest authored Jan 3, 2024
2 parents 6484812 + a23e343 commit af5b785
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 16 deletions.
4 changes: 3 additions & 1 deletion DEVELOPERS.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ graph TD
CREATED --> WAITING_ON_REBOOT
CREATED --> WAITING_DB_MAINTENANCE
CREATED --> WAITING_PAUSED
CREATED --> STALE_CODELISTS
CREATED --> SUCCEEDED
WAITING_ON_DEPENDENCIES --> WAITING_ON_WORKERS
WAITING_ON_DEPENDENCIES --> WAITING_ON_REBOOT
WAITING_ON_DEPENDENCIES --> WAITING_DB_MAINTENANCE
Expand Down Expand Up @@ -86,7 +88,7 @@ graph TD
classDef blocking fill:#ffdf75,color:#7d661c,stroke-width:2px,stroke:#997d23;
class LEGEND_BLOCKED,WAITING_ON_WORKERS,WAITING_ON_REBOOT,WAITING_PAUSED,WAITING_DB_MAINTENANCE blocking
class LEGEND_ERROR,INTERNAL_ERROR,UNMATCHED_PATTERNS,DEPENDENCY_FAILED,NONZERO_EXIT error
class LEGEND_ERROR,INTERNAL_ERROR,UNMATCHED_PATTERNS,DEPENDENCY_FAILED,NONZERO_EXIT,STALE_CODELISTS error
```

Expand Down
26 changes: 18 additions & 8 deletions jobrunner/create_or_update_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ class JobRequestError(Exception):
pass


class StaleCodelistError(JobRequestError):
pass


class NothingToDoError(JobRequestError):
pass

Expand All @@ -62,10 +66,10 @@ def create_or_update_jobs(job_request):
JobRequestError,
) as e:
log.info(f"JobRequest failed:\n{e}")
create_failed_job(job_request, e)
create_job_from_exception(job_request, e)
except Exception:
log.exception("Uncaught error while creating jobs")
create_failed_job(job_request, JobRequestError("Internal error"))
create_job_from_exception(job_request, JobRequestError("Internal error"))
else:
if job_request.cancelled_actions:
log.debug("Cancelling actions: %s", job_request.cancelled_actions)
Expand Down Expand Up @@ -312,12 +316,12 @@ def assert_codelists_ok(job_request, new_jobs):
# Codelists are out of date; fail the entire job request if any job
# requires database access
if job.requires_db:
raise JobRequestError(
raise StaleCodelistError(
f"Codelists are out of date (required by action {job.action})"
)


def create_failed_job(job_request, exception):
def create_job_from_exception(job_request, exception):
"""
Sometimes we want to say to the job-server (and the user): your JobRequest
was broken so we weren't able to create any jobs for it. But the only way
Expand All @@ -328,19 +332,25 @@ def create_failed_job(job_request, exception):
This is a bit of a hack, but it keeps the sync protocol simple.
"""
action = "__error__"
error = exception
state = State.FAILED
status_message = str(exception)

# Special case for the NothingToDoError which we treat as a success
if isinstance(exception, NothingToDoError):
state = State.SUCCEEDED
code = StatusCode.SUCCEEDED
status_message = "All actions have already run"
action = job_request.requested_actions[0]
error = None
# StaleCodelistError is a failure but not an INTERNAL_ERROR
elif isinstance(exception, StaleCodelistError):
code = StatusCode.STALE_CODELISTS
else:
state = State.FAILED
code = StatusCode.INTERNAL_ERROR
# include exception name in message to aid debugging
status_message = f"{type(exception).__name__}: {exception}"
action = "__error__"
error = exception

now = time.time()
job = Job(
job_request_id=job_request.id,
Expand Down
2 changes: 2 additions & 0 deletions jobrunner/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ class StatusCode(Enum):
UNMATCHED_PATTERNS = "unmatched_patterns"
INTERNAL_ERROR = "internal_error"
KILLED_BY_ADMIN = "killed_by_admin"
STALE_CODELISTS = "stale_codelists"

@property
def is_final_code(self):
Expand All @@ -93,6 +94,7 @@ def __lt__(self, other):
StatusCode.UNMATCHED_PATTERNS,
StatusCode.INTERNAL_ERROR,
StatusCode.KILLED_BY_ADMIN,
StatusCode.STALE_CODELISTS,
]


Expand Down
34 changes: 27 additions & 7 deletions tests/test_create_or_update_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from jobrunner.create_or_update_jobs import (
JobRequestError,
NothingToDoError,
create_failed_job,
StaleCodelistError,
create_job_from_exception,
create_jobs,
create_or_update_jobs,
validate_job_request,
Expand Down Expand Up @@ -305,10 +306,10 @@ def create_jobs_with_project_file(job_request, project_file):
return create_jobs(job_request)


def test_create_failed_job(db):
def test_create_job_from_exception(db):
job_request = job_request_factory_raw()

create_failed_job(job_request, Exception("test"))
create_job_from_exception(job_request, Exception("test"))

job = find_one(Job, job_request_id=job_request.id)

Expand All @@ -329,15 +330,15 @@ def test_create_failed_job(db):
assert spans[1].events[0].attributes["exception.message"] == "test"


def test_create_failed_job_nothing_to_do(db):
def test_create_job_from_exception_nothing_to_do(db):
job_request = job_request_factory_raw()

create_failed_job(job_request, NothingToDoError())
create_job_from_exception(job_request, NothingToDoError("nothing to do"))
job = find_one(Job, job_request_id=job_request.id)

assert job.state == State.SUCCEEDED
assert job.status_code == StatusCode.SUCCEEDED
assert job.status_message == "All actions have already run"
assert job.status_message == "nothing to do"
assert job.action == job_request.requested_actions[0]

spans = get_trace("jobs")
Expand All @@ -348,6 +349,25 @@ def test_create_failed_job_nothing_to_do(db):
assert spans[1].status.status_code == trace.StatusCode.UNSET


def test_create_job_from_exception_stale_codelist(db):
job_request = job_request_factory_raw()

create_job_from_exception(job_request, StaleCodelistError("stale"))
job = find_one(Job, job_request_id=job_request.id)

assert job.state == State.FAILED
assert job.status_code == StatusCode.STALE_CODELISTS
assert job.status_message == "stale"
assert job.action == "__error__"

spans = get_trace("jobs")

assert spans[0].name == "STALE_CODELISTS"
assert spans[0].status.status_code == trace.StatusCode.ERROR
assert spans[1].name == "JOB"
assert spans[1].status.status_code == trace.StatusCode.ERROR


@pytest.mark.parametrize(
"requested_action,expect_error",
[("generate_cohort", True), ("analyse_data", True), ("standalone_action", False)],
Expand All @@ -369,7 +389,7 @@ def test_create_or_update_jobs_with_out_of_date_codelists(
# The error reports the action that needed the up-to-date codelists, even if that
# wasn't the action explicitly requested
with pytest.raises(
JobRequestError,
StaleCodelistError,
match=re.escape(
"Codelists are out of date (required by action generate_cohort)"
),
Expand Down

0 comments on commit af5b785

Please sign in to comment.