From ed73c3e10efd0d2f9e0cc3e6aff38ed3ef589898 Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Tue, 7 Nov 2023 14:28:20 +0000 Subject: [PATCH 1/3] Add a codelists_ok field to JobRequest --- jobrunner/cli/local_run.py | 1 + jobrunner/models.py | 1 + jobrunner/sync.py | 1 + tests/factories.py | 1 + tests/lib/test_log_utils.py | 1 + tests/test_create_or_update_jobs.py | 4 ++++ tests/test_integration.py | 4 ++++ tests/test_sync.py | 4 ++++ 8 files changed, 17 insertions(+) diff --git a/jobrunner/cli/local_run.py b/jobrunner/cli/local_run.py index f8e555a6..fd0c3f29 100644 --- a/jobrunner/cli/local_run.py +++ b/jobrunner/cli/local_run.py @@ -448,6 +448,7 @@ def create_job_request_and_jobs(project_dir, actions, force_run_dependencies): requested_actions=actions, cancelled_actions=[], workspace=project_dir.name, + codelists_ok=True, database_name="dummy", force_run_dependencies=force_run_dependencies, branch="", diff --git a/jobrunner/models.py b/jobrunner/models.py index 2d54837a..84076b96 100644 --- a/jobrunner/models.py +++ b/jobrunner/models.py @@ -101,6 +101,7 @@ class JobRequest: requested_actions: list cancelled_actions: list workspace: str + codelists_ok: bool database_name: str force_run_dependencies: bool = False branch: str = None diff --git a/jobrunner/sync.py b/jobrunner/sync.py index 9599b2b7..f0a89439 100644 --- a/jobrunner/sync.py +++ b/jobrunner/sync.py @@ -126,6 +126,7 @@ def job_request_from_remote_format(job_request): requested_actions=job_request["requested_actions"], cancelled_actions=job_request["cancelled_actions"], workspace=job_request["workspace"]["name"], + codelists_ok=job_request["codelists_ok"], # Transitional code while we move the location of the `database_name` attribute database_name=( job_request["database_name"] diff --git a/tests/factories.py b/tests/factories.py index dd8f2853..73c98ac5 100644 --- a/tests/factories.py +++ b/tests/factories.py @@ -19,6 +19,7 @@ "requested_actions": ["action"], "cancelled_actions": [], "workspace": "workspace", + "codelists_ok": True, "database_name": "default", "original": { "created_by": "testuser", diff --git a/tests/lib/test_log_utils.py b/tests/lib/test_log_utils.py index 253129ba..bde3f820 100644 --- a/tests/lib/test_log_utils.py +++ b/tests/lib/test_log_utils.py @@ -24,6 +24,7 @@ commit="commit", requested_actions=["action"], cancelled_actions=[], + codelists_ok=True, database_name="dummy", ) diff --git a/tests/test_create_or_update_jobs.py b/tests/test_create_or_update_jobs.py index 0f4261a6..c52b9839 100644 --- a/tests/test_create_or_update_jobs.py +++ b/tests/test_create_or_update_jobs.py @@ -36,6 +36,7 @@ def test_create_or_update_jobs(tmp_work_dir, db): requested_actions=["generate_cohort"], cancelled_actions=[], workspace="1", + codelists_ok=True, database_name="dummy", original=dict( created_by="user", @@ -77,6 +78,7 @@ def test_create_or_update_jobs_with_git_error(tmp_work_dir): requested_actions=["generate_cohort"], cancelled_actions=[], workspace="1", + codelists_ok=True, database_name="dummy", original=dict( created_by="user", @@ -245,6 +247,7 @@ def test_validate_job_request(params, exc_msg, monkeypatch): requested_actions=["generate_cohort"], cancelled_actions=[], workspace="1", + codelists_ok=True, database_name="default", # note db from from job-server is 'default' original=dict( created_by="user", @@ -271,6 +274,7 @@ def make_job_request(action=None, actions=None, **kwargs): repo_url="https://example.com/repo.git", commit="abcdef0123456789", workspace="1", + codelists_ok=True, database_name="default", requested_actions=actions, cancelled_actions=[], diff --git a/tests/test_integration.py b/tests/test_integration.py index d52a4b1d..9e698bb0 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -70,6 +70,7 @@ def test_integration_with_cohortextractor( "repo": str(test_repo.path), "branch": "HEAD", }, + "codelists_ok": True, "database_name": "dummy", "sha": test_repo.commit, "created_by": "user", @@ -121,6 +122,7 @@ def test_integration_with_cohortextractor( "repo": str(test_repo.path), "branch": "HEAD", }, + "codelists_ok": True, "database_name": "dummy", "sha": test_repo.commit, "created_by": "user", @@ -244,6 +246,7 @@ def test_integration_with_databuilder( "repo": str(test_repo.path), "branch": "HEAD", }, + "codelists_ok": True, "database_name": "dummy", "sha": test_repo.commit, "created_by": "user", @@ -292,6 +295,7 @@ def test_integration_with_databuilder( "repo": str(test_repo.path), "branch": "HEAD", }, + "codelists_ok": True, "database_name": "dummy", "sha": test_repo.commit, "created_by": "user", diff --git a/tests/test_sync.py b/tests/test_sync.py index dad49444..a4a15de4 100644 --- a/tests/test_sync.py +++ b/tests/test_sync.py @@ -19,6 +19,7 @@ def test_job_request_from_remote_format(): "cancelled_actions": ["analyse"], "force_run_dependencies": True, "sha": "abcdef", + "codelists_ok": True, "created_by": "user", "project": "project", "orgs": ["org"], @@ -29,6 +30,7 @@ def test_job_request_from_remote_format(): commit="abcdef", branch="master", workspace="testing", + codelists_ok=True, database_name="default", requested_actions=["generate_cohort"], cancelled_actions=["analyse"], @@ -52,6 +54,7 @@ def test_job_request_from_remote_format_database_name_fallback(): "cancelled_actions": ["analyse"], "force_run_dependencies": True, "sha": "abcdef", + "codelists_ok": True, "created_by": "user", "project": "project", "orgs": ["org"], @@ -62,6 +65,7 @@ def test_job_request_from_remote_format_database_name_fallback(): commit="abcdef", branch="master", workspace="testing", + codelists_ok=True, database_name="default", requested_actions=["generate_cohort"], cancelled_actions=["analyse"], From 6b6ea0c6f090f4c685815fc655cdc2113f23da4f Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Tue, 7 Nov 2023 14:29:22 +0000 Subject: [PATCH 2/3] Check for database actions and raise exception if codelists out of date --- jobrunner/create_or_update_jobs.py | 15 +++++++++++++++ tests/test_create_or_update_jobs.py | 24 ++++++++++++++++++++++++ 2 files changed, 39 insertions(+) diff --git a/jobrunner/create_or_update_jobs.py b/jobrunner/create_or_update_jobs.py index 07a77b80..9be0da80 100644 --- a/jobrunner/create_or_update_jobs.py +++ b/jobrunner/create_or_update_jobs.py @@ -90,6 +90,11 @@ def create_jobs(job_request): new_jobs = get_new_jobs_to_run(job_request, pipeline_config, latest_jobs) assert_new_jobs_created(job_request, new_jobs, latest_jobs) resolve_reusable_action_references(new_jobs) + + # check for database actions in the new jobs, and raise an exception if + # codelists are out of date + assert_codelists_ok(job_request, new_jobs) + # There is a delay between getting the current jobs (which we fetch from # the database and the disk) and inserting our new jobs below. This means # the state of the world may have changed in the meantime. Why is this OK? @@ -299,6 +304,16 @@ def assert_new_jobs_created(job_request, new_jobs, current_jobs): raise Exception(f"Unexpected job states after scheduling: {current_job_states}") +def assert_codelists_ok(job_request, new_jobs): + if job_request.codelists_ok: + return True + for job in new_jobs: + # Codelists are out of date; fail the entire job request if any job + # requires database access + if job.requires_db: + raise JobRequestError("Codelists are out of date") + + def create_failed_job(job_request, exception): """ Sometimes we want to say to the job-server (and the user): your JobRequest diff --git a/tests/test_create_or_update_jobs.py b/tests/test_create_or_update_jobs.py index c52b9839..b7cca5bf 100644 --- a/tests/test_create_or_update_jobs.py +++ b/tests/test_create_or_update_jobs.py @@ -345,3 +345,27 @@ def test_create_failed_job_nothing_to_do(db): assert spans[0].status.status_code == trace.StatusCode.UNSET assert spans[1].name == "JOB" assert spans[1].status.status_code == trace.StatusCode.UNSET + + +@pytest.mark.parametrize( + "requested_action,expect_error", + [("generate_cohort", True), ("analyse_data", True), ("standalone_action", False)], +) +def test_create_or_update_jobs_with_out_of_date_codelists( + tmp_work_dir, requested_action, expect_error +): + project = TEST_PROJECT + ( + """ + standalone_action: + run: python:latest analysis/do_something.py + outputs: + moderately_sensitive: + something: done.txt +""" + ) + job_request = make_job_request(action=requested_action, codelists_ok=False) + if expect_error: + with pytest.raises(JobRequestError, match="Codelists are out of date"): + create_jobs_with_project_file(job_request, project) + else: + assert create_jobs_with_project_file(job_request, project) == 1 From c0e75e6742933b5f62827f3d664e6eb6d575371f Mon Sep 17 00:00:00 2001 From: Becky Smith Date: Thu, 9 Nov 2023 11:54:28 +0000 Subject: [PATCH 3/3] Report the action that required up to date codelists --- jobrunner/create_or_update_jobs.py | 4 +++- tests/test_create_or_update_jobs.py | 10 +++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/jobrunner/create_or_update_jobs.py b/jobrunner/create_or_update_jobs.py index 9be0da80..2f62a27f 100644 --- a/jobrunner/create_or_update_jobs.py +++ b/jobrunner/create_or_update_jobs.py @@ -311,7 +311,9 @@ def assert_codelists_ok(job_request, new_jobs): # Codelists are out of date; fail the entire job request if any job # requires database access if job.requires_db: - raise JobRequestError("Codelists are out of date") + raise JobRequestError( + f"Codelists are out of date (required by action {job.action})" + ) def create_failed_job(job_request, exception): diff --git a/tests/test_create_or_update_jobs.py b/tests/test_create_or_update_jobs.py index b7cca5bf..8b67943f 100644 --- a/tests/test_create_or_update_jobs.py +++ b/tests/test_create_or_update_jobs.py @@ -1,3 +1,4 @@ +import re import uuid from pathlib import Path from unittest import mock @@ -365,7 +366,14 @@ def test_create_or_update_jobs_with_out_of_date_codelists( ) job_request = make_job_request(action=requested_action, codelists_ok=False) if expect_error: - with pytest.raises(JobRequestError, match="Codelists are out of date"): + # The error reports the action that needed the up-to-date codelists, even if that + # wasn't the action explicitly requested + with pytest.raises( + JobRequestError, + match=re.escape( + "Codelists are out of date (required by action generate_cohort)" + ), + ): create_jobs_with_project_file(job_request, project) else: assert create_jobs_with_project_file(job_request, project) == 1