Skip to content

Commit

Permalink
Merge pull request #226 from opensafely-core/check-upstream
Browse files Browse the repository at this point in the history
Add a new codelists check-upstream command
  • Loading branch information
rebkwok authored Oct 30, 2023
2 parents 4cde364 + f5e20d8 commit 55bd4b8
Show file tree
Hide file tree
Showing 2 changed files with 190 additions and 12 deletions.
85 changes: 77 additions & 8 deletions opensafely/codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from opensafely._vendor import requests


DESCRIPTION = "Commands for interacting with https://codelists.opensafely.org/"
OPENCODELISTS_BASE_URL = "https://www.opencodelists.org"
DESCRIPTION = f"Commands for interacting with {OPENCODELISTS_BASE_URL}"

CODELISTS_DIR = "codelists"
CODELISTS_FILE = "codelists.txt"
Expand Down Expand Up @@ -40,10 +41,18 @@ def show_help(**kwargs):
parser_check = subparsers.add_parser(
"check",
help=(
f"Check that codelists on disk match the specification at "
f"{CODELISTS_DIR}/{CODELISTS_FILE}"
"Check that codelists on disk match the specification at "
f"{CODELISTS_DIR}/{CODELISTS_FILE} and are up-to-date with "
"upstream versions"
),
)

parser_check_upstream = subparsers.add_parser(
"check-upstream",
help=("Check codelists are up to date with upstream versions"),
)
parser_check_upstream.set_defaults(function=check_upstream)

parser_check.set_defaults(function=check)


Expand Down Expand Up @@ -89,11 +98,64 @@ def update(codelists_dir=None):
return True


def check():
codelists_dir = Path.cwd() / CODELISTS_DIR
def get_codelists_dir(codelists_dir=None):
codelists_dir = codelists_dir or Path.cwd() / CODELISTS_DIR
if not codelists_dir.exists():
print(f"No '{CODELISTS_DIR}' directory present so nothing to check")
return
return codelists_dir


def check_upstream(codelists_dir=None):
"""
Check currently downloaded codelists against current OpenCodelists data.
This runs after the local checks in `check()`, but can also run as a standalone
command.
"""
codelists_dir = get_codelists_dir(codelists_dir)
if codelists_dir is None:
return True
codelists_file = codelists_dir / CODELISTS_FILE
if not codelists_file.exists():
exit_with_error(f"No file found at '{codelists_file}'")
manifest_file = codelists_dir / MANIFEST_FILE
if not manifest_file.exists():
exit_with_prompt(f"No file found at '{manifest_file}'.")
post_data = {
"codelists": codelists_file.read_text(),
"manifest": manifest_file.read_text(),
}
url = f"{OPENCODELISTS_BASE_URL}/api/v1/check/"
response = requests.post(url, post_data).json()
status = response["status"]

if status == "error":
# The OpenCodelists check endpoint returns an error in the response data if it
# encounters an invalid user, organisation or codelist in the codelists.txt file, or
# if any codelists in codelists.csv don't match the expected pattern. These should all
# be fixable by `opensafely codelists update`.
if "error" in response["data"]:
exit_with_prompt(
f"Error checking upstream codelists: {response['data']['error']}\n"
)
if response["data"]["added"] or response["data"]["removed"]:
exit_with_prompt(
"Codelists have been added or removed\n\n"
"For details, run:\n\n opensafely codelists check\n"
)
changed = "\n ".join(response["data"]["changed"])
exit_with_prompt(
f"Some codelists are out of date\nCodelists affected:\n {changed}\n"
)
print("Codelists OK")
return True


def check():
codelists_dir = get_codelists_dir()
if codelists_dir is None:
return True

codelists = parse_codelist_file(codelists_dir)
manifest_file = codelists_dir / MANIFEST_FILE
if not manifest_file.exists():
Expand Down Expand Up @@ -146,8 +208,15 @@ def check():
"A CSV file seems to have been modified since it was downloaded:\n"
"{}\n".format("\n".join(modified))
)
print("Codelists OK")
return True

try:
return check_upstream(codelists_dir)
except requests.exceptions.ConnectionError:
print(
f"Local codelists OK; could not contact {OPENCODELISTS_BASE_URL} for upstream check,"
"try again later"
)
return True


def make_temporary_manifest(codelists_dir):
Expand Down Expand Up @@ -197,7 +266,7 @@ def parse_codelist_file(codelists_dir):
)
codelist_versions[line_without_version] = line_version

url = f"https://codelists.opensafely.org/codelist/{line}/"
url = f"{OPENCODELISTS_BASE_URL}/codelist/{line}/"
filename = "-".join(tokens[:-1]) + ".csv"
codelists.append(
Codelist(
Expand Down
117 changes: 113 additions & 4 deletions tests/test_codelists.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
from pathlib import Path
from urllib.parse import parse_qs

import pytest
from requests_mock import mocker
Expand All @@ -16,6 +17,18 @@
mocker._original_send = requests.Session.send


@pytest.fixture
def mock_check(requests_mock):
def _mock(response):
mocked = requests_mock.post(
"https://www.opencodelists.org/api/v1/check/",
json=response,
)
return mocked

return _mock


def test_codelists_update(tmp_path, requests_mock):
codelist_dir = tmp_path / "codelists"
codelist_dir.mkdir()
Expand All @@ -26,12 +39,12 @@ def test_codelists_update(tmp_path, requests_mock):
)
os.chdir(tmp_path)
requests_mock.get(
"https://codelists.opensafely.org/"
"https://www.opencodelists.org/"
"codelist/project123/codelist456/version2/download.csv",
text="foo",
)
requests_mock.get(
"https://codelists.opensafely.org/"
"https://www.opencodelists.org/"
"codelist/user/user123/codelist098/version1/download.csv",
text="bar",
)
Expand All @@ -53,9 +66,19 @@ def codelists_path(tmp_path):
yield tmp_path


def test_codelists_check(codelists_path):
def test_codelists_check(mock_check, codelists_path):
mock_check(response={"status": "ok"})
os.chdir(codelists_path)
assert codelists.check()


def test_codelists_check_passes_if_opencodelists_is_down(requests_mock, codelists_path):
requests_mock.post(
"https://www.opencodelists.org/api/v1/check/",
exc=requests.exceptions.ConnectionError,
)
os.chdir(codelists_path)
codelists.check()
assert codelists.check()


def test_codelists_check_fail_if_list_updated(codelists_path):
Expand Down Expand Up @@ -94,6 +117,12 @@ def test_codelists_check_fail_if_invalid_manifest_file(codelists_path):
codelists.check()


def test_codelists_parse_fail_if_no_codelists_dir(tmp_path):
os.chdir(tmp_path)
with pytest.raises(SystemExit):
codelists.parse_codelist_file(tmp_path)


def test_codelists_parse_fail_if_different_versions_of_same_list(codelists_path):
with open(codelists_path / "codelists/codelists.txt", "a") as f:
f.write("\nsomeproject/somelist/someversion")
Expand All @@ -112,9 +141,89 @@ def test_codelists_parse_fail_if_duplicate_lines(codelists_path):
codelists.parse_codelist_file(codelists_path / "codelists")


def test_codelists_parse_fail_if_bad_codelist(codelists_path):
with open(codelists_path / "codelists/codelists.txt", "a") as f:
f.write("\nsomeproject/somelist/")
os.chdir(codelists_path)
with pytest.raises(SystemExit):
codelists.parse_codelist_file(codelists_path / "codelists")


def test_codelists_parse_fail_if_codelist_file_does_not_exist(codelists_path):
(codelists_path / "codelists/codelists.txt").unlink()
os.chdir(codelists_path)
with pytest.raises(SystemExit):
codelists.parse_codelist_file(codelists_path / "codelists")


def test_codelists_parse_pass_if_different_lists_have_same_version(codelists_path):
with open(codelists_path / "codelists/codelists.txt", "a") as f:
f.write("\nsomeproject/somelist/someversion")
f.write("\nsomeuser/anotherproject/somelist/someversion")
os.chdir(codelists_path)
assert codelists.parse_codelist_file(codelists_path / "codelists")


def test_codelists_check_upstream_passes_if_no_codelists_dir(tmp_path):
os.chdir(tmp_path)
assert codelists.check_upstream()


def test_codelists_check_upstream_fails_if_no_codelists_file(tmp_path):
os.chdir(tmp_path)
(tmp_path / "codelists").mkdir()
with pytest.raises(SystemExit):
codelists.check_upstream()


def test_codelists_check_upstream_fails_if_no_manifest_file(tmp_path, mock_check):
mock_check(response={"status": "ok"})
os.chdir(tmp_path)
codelists_dir = tmp_path / "codelists"
codelists_dir.mkdir()
(codelists_dir / "codelists.txt").touch()
with pytest.raises(SystemExit):
codelists.check_upstream()
(codelists_dir / "codelists.json").touch()
assert codelists.check_upstream()


def test_codelists_check_upstream(codelists_path, mock_check):
mocked = mock_check(response={"status": "ok"})
os.chdir(codelists_path)
assert codelists.check_upstream()

# assert content sent to opencodelists
assert mocked.called_once
parsed_request = parse_qs(mocked.last_request.text)
assert list(parsed_request.keys()) == ["codelists", "manifest"]

assert (
parsed_request["codelists"][0]
== (codelists_path / "codelists/codelists.txt").read_text()
)
assert (
parsed_request["manifest"][0]
== (codelists_path / "codelists/codelists.json").read_text()
)


def test_codelists_check_upstream_with_error(codelists_path, mock_check):
mock_check(response={"status": "error", "data": {"error": "Any unknown error"}})
with pytest.raises(SystemExit):
codelists.check_upstream(codelists_path / "codelists")


@pytest.mark.parametrize(
"response",
[
{"added": ["org/foo/123"], "removed": [], "changed": []},
{"added": [], "removed": ["org/foo/123"], "changed": []},
{"added": [], "removed": [], "changed": ["org/foo/123"]},
{"added": ["org/bar/123"], "removed": [], "changed": ["org/foo/123"]},
],
)
def test_codelists_check_upstream_with_changes(codelists_path, mock_check, response):
mock_check(response={"status": "error", "data": response})
with pytest.raises(SystemExit):
codelists.check_upstream(codelists_path / "codelists")

0 comments on commit 55bd4b8

Please sign in to comment.