diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 81b730343..ec469a02b 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -143,30 +143,6 @@ jobs: timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - - uses: docker/setup-buildx-action@v3 - # pre-build and cache the postgres container as installing python3 takes a while, doesn't push - - name: Cache Docker layers - uses: actions/cache@v3 - with: - path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} - restore-keys: | - ${{ runner.os }}-buildx- - - uses: docker/build-push-action@v5 - with: - context: . - file: docker/postgres/Dockerfile - cache-from: type=local,src=/tmp/.buildx-cache - cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - - # Temp fix - # https://github.com/docker/build-push-action/issues/252 - # https://github.com/moby/buildkit/issues/1896 - name: Move cache - run: | - rm -rf /tmp/.buildx-cache - mv /tmp/.buildx-cache-new /tmp/.buildx-cache - - name: Init Python uses: actions/setup-python@v4 with: @@ -175,7 +151,7 @@ jobs: - name: Install Python dependencies run: | - pip install pixl_core/ pixl_ehr/[test] + pip install pytest-pixl/ pixl_core/ pixl_ehr/[test] - name: Run tests working-directory: pixl_ehr/tests @@ -187,7 +163,6 @@ jobs: timeout-minutes: 10 steps: - uses: actions/checkout@v3 - - name: Init Python uses: actions/setup-python@v4 with: @@ -209,25 +184,21 @@ jobs: timeout-minutes: 30 steps: - uses: actions/checkout@v3 - - name: Prune docker volumes - # seems like we're getting an error from ftp-server exiting with zero status code - # this is a workaround that resolves it locally 🤞 - run: | - docker volume prune --force - - uses: docker/setup-buildx-action@v3 - # pre-build and cache the postgres container as installing python3 takes a while, doesn't push + # pre-build and cache the orthanc-anon image: installing python3 takes a while, doesn't push - name: Cache Docker layers - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: /tmp/.buildx-cache - key: ${{ runner.os }}-buildx-${{ github.sha }} + key: ${{ runner.os }}-buildx-${{ github.ref }}-${{ github.sha }} restore-keys: | + ${{ runner.os }}-buildx-${{ github.ref }}- ${{ runner.os }}-buildx- + save-always: true - uses: docker/build-push-action@v5 with: context: . - file: docker/postgres/Dockerfile + file: docker/orthanc-anon/Dockerfile cache-from: type=local,src=/tmp/.buildx-cache cache-to: type=local,dest=/tmp/.buildx-cache-new,mode=max - # Temp fix @@ -244,6 +215,14 @@ jobs: python-version: 3.10.6 cache: "pip" + - name: Install Python dependencies + # The -e option is required here due to the way the + # code determines the export directory. See issue #318. + run: | + pip install -e pixl_core/ + pip install -e cli/[test] + pip install -e pytest-pixl/ + - name: Build test services working-directory: test run: | @@ -257,3 +236,19 @@ jobs: working-directory: test run: | ./run-system-test.sh + echo FINISHED SYSTEM TEST SCRIPT + + - name: Dump ehr-api docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-ehr-api-1 2>&1 + + - name: Dump orthanc-anon docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-orthanc-anon-1 2>&1 + + - name: Dump imaging-api docker logs for debugging + if: ${{ failure() }} + run: | + docker logs -t system-test-imaging-api-1 2>&1 diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 528ae1d0b..78fcfbc1d 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -125,7 +125,10 @@ def extract_radiology_reports(parquet_dir: Path) -> None: success_code = 200 if response.status_code != success_code: - msg = f"Failed to run export-patient-data due to: {response.text}" + msg = ( + f"Failed to run export-patient-data due to: " + f"error code {response.status_code} {response.text}" + ) raise RuntimeError(msg) diff --git a/docker-compose.yml b/docker-compose.yml index 7b184df9b..65420a398 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -153,6 +153,9 @@ services: - ${PWD}/orthanc/orthanc-anon/config:/run/secrets:ro networks: - pixl-net + # needed for same reason as ehr-api + extra_hosts: + - "host.docker.internal:host-gateway" depends_on: postgres: condition: service_healthy @@ -253,6 +256,10 @@ services: retries: 5 networks: - pixl-net + # needed for testing under GHA (linux), so this container + # can reach the test FTP server running on the docker host + extra_hosts: + - "host.docker.internal:host-gateway" volumes: - ${PWD}/exports:/run/exports diff --git a/docker/orthanc-anon/Dockerfile b/docker/orthanc-anon/Dockerfile index 8aa3e7cea..4628fbe86 100644 --- a/docker/orthanc-anon/Dockerfile +++ b/docker/orthanc-anon/Dockerfile @@ -13,6 +13,39 @@ # limitations under the License. FROM osimis/orthanc:22.9.0-full-stable SHELL ["/bin/bash", "-o", "pipefail", "-e", "-u", "-x", "-c"] +# need some packages for building python 3.9.13 +RUN export DEBIAN_FRONTEND=noninteractive \ + && apt update \ + && apt install --yes --no-install-recommends \ + build-essential \ + libbz2-dev \ + libffi-dev \ + libgdbm-dev \ + libncurses5-dev \ + libnss3-dev \ + libreadline-dev \ + libsqlite3-dev \ + libssl-dev \ + zlib1g-dev \ + && apt-get autoremove \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Default for Debian Bullseye (11) is python 3.9.2, which has a bug in ftplib that is +# affecting us. +# See https://bugs.python.org/issue43285 for more details. It only happens in the system +# test, when docker is using NAT to implement host.docker.internal +# Install 3.9.13 alongside the existing version (removing the original python beforehand +# seems to break orthanc) +ADD https://www.python.org/ftp/python/3.9.13/Python-3.9.13.tgz /python3913/ +RUN cd /python3913 \ + && tar -xvf Python-3.9.13.tgz \ + && cd Python-3.9.13 \ + && ./configure --enable-optimizations \ + && make -j `nproc` \ + && make install \ + && make clean \ + && rm -fr /python3913 # Install requirements before copying modules COPY ./pixl_core/pyproject.toml ./pixl_core/pyproject.toml diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 23c077190..9aa9b0ffc 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -72,6 +72,9 @@ def AzureAccessToken(): return response.json()["access_token"] +TIMER = None + + def AzureDICOMTokenRefresh(): """ Refresh Azure DICOM token diff --git a/pixl_core/src/core/exports.py b/pixl_core/src/core/exports.py index 20bcaf0b4..f02682004 100644 --- a/pixl_core/src/core/exports.py +++ b/pixl_core/src/core/exports.py @@ -80,9 +80,10 @@ def copy_to_exports(self, input_omop_dir: pathlib.Path) -> str: """ public_input = input_omop_dir / "public" + logging.info("Copying public parquet files from %s to %s", public_input, self.public_output) + # Make directory for exports if they don't exist ParquetExport._mkdir(self.public_output) - logger.info("Copying public parquet files from %s to %s", public_input, self.public_output) # Copy extract files, overwriting if it exists shutil.copytree(public_input, self.public_output, dirs_exist_ok=True) @@ -96,6 +97,7 @@ def export_radiology(self, export_df: pd.DataFrame) -> pathlib.Path: """Export radiology reports to parquet file""" self._mkdir(self.radiology_output) parquet_file = self.radiology_output / "radiology.parquet" + logging.info("Exporting radiology to %s", self.radiology_output) export_df.to_parquet(parquet_file) # We are not responsible for making the "latest" symlink, see `copy_to_exports`. # This avoids the confusion caused by EHR API (which calls export_radiology) having a diff --git a/pixl_core/src/core/upload.py b/pixl_core/src/core/upload.py index f0bbe2687..a8c59f730 100644 --- a/pixl_core/src/core/upload.py +++ b/pixl_core/src/core/upload.py @@ -76,10 +76,11 @@ def upload_dicom_image(zip_content: BinaryIO, pseudo_anon_id: str) -> None: # Store the file using a binary handler try: + logger.info("Running command %s", command) ftp.storbinary(command, zip_content) except ftplib.all_errors as ftp_error: ftp.quit() - error_msg = "Failed to run STOR command '%s': '%s'" + error_msg = f"Failed to run STOR command : {command}" raise ConnectionError(error_msg, command, ftp_error) from ftp_error # Close the FTP connection @@ -159,8 +160,8 @@ def _connect_to_ftp() -> FTP_TLS: ftp.login(ftp_user, ftp_password) ftp.prot_p() except ftplib.all_errors as ftp_error: - error_msg = "Failed to connect to FTPS server: '%s'" - raise ConnectionError(error_msg, ftp_error) from ftp_error + error_msg = f"Failed to connect to FTPS server: {ftp_user}@{ftp_host}:{ftp_port}" + raise ConnectionError(error_msg) from ftp_error return ftp diff --git a/pixl_core/tests/conftest.py b/pixl_core/tests/conftest.py index b9579756e..585f36df7 100644 --- a/pixl_core/tests/conftest.py +++ b/pixl_core/tests/conftest.py @@ -16,17 +16,20 @@ import datetime import os import pathlib -import subprocess +import shlex from pathlib import Path from typing import TYPE_CHECKING, BinaryIO import pytest from core.db.models import Base, Extract, Image from core.exports import ParquetExport +from pytest_pixl.helpers import run_subprocess +from pytest_pixl.plugin import FtpHostAddress from sqlalchemy import Engine, create_engine from sqlalchemy.orm import Session, sessionmaker if TYPE_CHECKING: + import subprocess from collections.abc import Generator pytest_plugins = "pytest_pixl" @@ -47,25 +50,19 @@ @pytest.fixture(scope="package") def run_containers() -> subprocess.CompletedProcess[bytes]: """Run docker containers for tests which require them.""" - subprocess.run( - b"docker compose down --volumes", - check=True, - cwd=TEST_DIR, - shell=True, # noqa: S602 + run_subprocess( + shlex.split("docker compose down --volumes"), + TEST_DIR, timeout=60, ) - yield subprocess.run( - b"docker compose up --build --wait", - check=True, - cwd=TEST_DIR, - shell=True, # noqa: S602 + yield run_subprocess( + shlex.split("docker compose up --build --wait"), + TEST_DIR, timeout=60, ) - subprocess.run( - b"docker compose down --volumes", - check=True, - cwd=TEST_DIR, - shell=True, # noqa: S602 + run_subprocess( + shlex.split("docker compose down --volumes"), + TEST_DIR, timeout=60, ) @@ -79,6 +76,12 @@ def ftps_home_dir(ftps_server) -> Generator[Path, None, None]: return Path(ftps_server.home_dir) +@pytest.fixture(scope="session") +def ftp_host_address(): + """Run FTP on localhost - no docker containers need to access it""" + return FtpHostAddress.LOCALHOST + + @pytest.fixture() def test_zip_content() -> BinaryIO: """Directory containing the test data for uploading to the ftp server.""" diff --git a/pixl_dcmd/tests/test_main.py b/pixl_dcmd/tests/test_main.py index 15834fcaf..aba26cf7b 100644 --- a/pixl_dcmd/tests/test_main.py +++ b/pixl_dcmd/tests/test_main.py @@ -14,7 +14,6 @@ from __future__ import annotations import pathlib -import subprocess import nibabel import numpy as np @@ -23,6 +22,7 @@ import sqlalchemy import yaml from pydicom.data import get_testdata_file +from pytest_pixl.helpers import run_subprocess from core.db.models import Image from pixl_dcmd.main import ( @@ -109,15 +109,14 @@ def test_can_nifti_convert_post_anonymisation( # Convert the anonymised DICOMs to NIFTI with dcm2niix anon_nifti_dir = tmp_path / "nifti_anon" anon_nifti_dir.mkdir() - subprocess.run( + run_subprocess( ["dcm2niix", "-f", "anon", "-o", str(anon_nifti_dir), str(anon_dicom_dir)], - check=True, ) # Convert the pre-anonymisation DICOMs to NIFTI with dcm2niix ident_nifti_dir = tmp_path / "nifti_ident" ident_nifti_dir.mkdir() - subprocess.run( + run_subprocess( [ "dcm2niix", "-f", @@ -126,7 +125,6 @@ def test_can_nifti_convert_post_anonymisation( str(ident_nifti_dir), str(directory_of_mri_dicoms), ], - check=True, ) # Confirm that the shape, orientation and contents of the pre- and diff --git a/pixl_ehr/pyproject.toml b/pixl_ehr/pyproject.toml index 1193c20ba..d6166a550 100644 --- a/pixl_ehr/pyproject.toml +++ b/pixl_ehr/pyproject.toml @@ -25,6 +25,7 @@ dependencies = [ test = [ "pytest==7.4.2", "pytest-asyncio==0.21.1", + "pytest-pixl", "httpx==0.24.*", ] dev = [ diff --git a/pixl_ehr/tests/conftest.py b/pixl_ehr/tests/conftest.py index 10e2c7c91..f6abe460f 100644 --- a/pixl_ehr/tests/conftest.py +++ b/pixl_ehr/tests/conftest.py @@ -14,10 +14,15 @@ from __future__ import annotations import os -import subprocess +import shlex from pathlib import Path +from typing import TYPE_CHECKING import pytest +from pytest_pixl.helpers import run_subprocess + +if TYPE_CHECKING: + import subprocess # configure environmental variables os.environ["LOG_LEVEL"] = "DEBUG" @@ -41,17 +46,13 @@ @pytest.fixture(scope="package", autouse=True) def run_containers() -> subprocess.CompletedProcess[bytes]: """Run docker containers for tests which require them.""" - yield subprocess.run( - b"docker compose up --build --wait", - check=True, - cwd=TEST_DIR, - shell=True, # noqa: S602 + yield run_subprocess( + shlex.split("docker compose up --build --wait"), + TEST_DIR, timeout=120, ) - subprocess.run( - b"docker compose down --volumes", - check=True, - cwd=TEST_DIR, - shell=True, # noqa: S602 + run_subprocess( + shlex.split("docker compose down --volumes"), + TEST_DIR, timeout=60, ) diff --git a/pytest-pixl/src/pytest_pixl/ftpserver.py b/pytest-pixl/src/pytest_pixl/ftpserver.py index df9ca0953..d586b1095 100644 --- a/pytest-pixl/src/pytest_pixl/ftpserver.py +++ b/pytest-pixl/src/pytest_pixl/ftpserver.py @@ -12,13 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. """A ligthweight FTPS server supporting implicit SSL for use in PIXL tests.""" - +import logging from pathlib import Path from decouple import config from pyftpdlib.authorizers import DummyAuthorizer from pyftpdlib.handlers import TLS_FTPHandler -from pyftpdlib.servers import FTPServer +from pyftpdlib.servers import ThreadedFTPServer + +logger = logging.getLogger(__name__) +logger.setLevel("INFO") # User permission # from https://pyftpdlib.readthedocs.io/en/latest/api.html#pyftpdlib.authorizers.DummyAuthorizer.add_user @@ -73,19 +76,20 @@ class PixlFTPServer: :type home_dir: str """ - def __init__(self, home_dir: str) -> None: + def __init__(self, home_root: Path, host_address: str) -> None: """ Initialise the FTPS server. Sets the hostname, port, username and password from the corresponding environment variables. - :param home_dir: The home directory for the FTP server. This is the directory where the - user will be placed after login. + :param home_root: The directory where the user's home directory will be created. + The home dir is the directory where the user will be placed after login. """ - self.host = config("FTP_HOST", default="127.0.0.1") + self.host = host_address self.port = int(config("FTP_PORT", default=20021)) self.user_name = config("FTP_USER_NAME", default="pixl_user") self.user_password = config("FTP_USER_PASSWORD", default="longpassword") - self.home_dir = home_dir + self.home_dir: Path = home_root / self.user_name + self.home_dir.mkdir() self.certfile = Path(__file__).parents[1] / "resources" / "ssl" / "localhost.crt" self.keyfile = Path(__file__).parents[1] / "resources" / "ssl" / "localhost.key" @@ -103,7 +107,7 @@ def _add_user(self) -> None: will be a directory on the local filesystem! """ self.authorizer.add_user( - self.user_name, self.user_password, self.home_dir, perm=USER_PERMISSIONS + self.user_name, self.user_password, str(self.home_dir), perm=USER_PERMISSIONS ) def _setup_TLS_handler(self) -> None: @@ -119,11 +123,12 @@ def _check_ssl_files(self) -> None: assert certfile_path.exists(), f"Could not find cerfile at {certfile_path.absolute()}" assert keyfile_path.exists(), f"Could not find cerfile at {keyfile_path.absolute()}" - def _create_server(self) -> FTPServer: + def _create_server(self) -> ThreadedFTPServer: """ Creates the FTPS server and returns it. The server can be started with the `serve_forever` method. """ address = (self.host, self.port) - self.server = FTPServer(address, self.handler) + logger.info("Starting FTP server on %s:%d", self.host, self.port) + self.server = ThreadedFTPServer(address, self.handler) return self.server diff --git a/pytest-pixl/src/pytest_pixl/helpers.py b/pytest-pixl/src/pytest_pixl/helpers.py new file mode 100644 index 000000000..42519ebe2 --- /dev/null +++ b/pytest-pixl/src/pytest_pixl/helpers.py @@ -0,0 +1,92 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Testing utilities""" +from __future__ import annotations + +import logging +import subprocess +from time import sleep +from typing import TYPE_CHECKING, Callable, Optional + +if TYPE_CHECKING: + from pathlib import Path + +logger = logging.getLogger(__name__) + + +def run_subprocess( + cmd: list[str], working_dir: Optional[Path] = None, *, shell=False, timeout=360 +) -> subprocess.CompletedProcess[bytes]: + """ + Run a command but capture the stderr and stdout better than the CalledProcessError + string representation does + """ + logger.info("Running command %s", cmd) + try: + cp = subprocess.run( + cmd, + check=True, + cwd=working_dir, + shell=shell, # noqa: S603 input is trusted + timeout=timeout, + capture_output=True, + ) + except subprocess.CalledProcessError as exception: + logger.error("*** exception occurred running: '%s'", cmd) # noqa: TRY400 will raise anyway + logger.error("*** stdout:\n%s", exception.stdout.decode()) # noqa: TRY400 + logger.error("*** stderr:\n%s", exception.stderr.decode()) # noqa: TRY400 + raise + else: + logger.info("Success, returncode = %s", cp.returncode) + logger.info("stdout =\n%s", cp.stdout.decode()) + logger.info("stderr =\n%s", cp.stderr.decode()) + return cp + + +def wait_for_condition( + test_condition: Callable[..., bool], + *, + seconds_max: int = 1, + seconds_interval: int = 1, + progress_string_fn: Optional[Callable[..., str]] = None, +) -> None: + """ + Repeatedly test for a condition for the specified amount of time. + :param test_condition: the condition to test for. The name of this method is used in + the logging output so recommended to name it well. + :param seconds_max: maximum seconds to wait for condition to be true + :param seconds_interval: time to sleep in between attempts + :param progress_string_fn: callable to generate a status string (eg. partial success) that + will be part of the log message at each attempt + :raises AssertionError: if the condition doesn't occur during the specified period + """ + for seconds in range(0, seconds_max, seconds_interval): + success = test_condition() + # must evaluate progress string *after* condition has been tested so it is most up to date + progress_str = ": " + progress_string_fn() if progress_string_fn is not None else "" + if success: + logger.info("Achieved condition '%s' %s", test_condition.__name__, progress_str) + return + logger.info( + "Waiting for condition '%s' (%s seconds out of %s) %s", + test_condition.__name__, + seconds, + seconds_max, + progress_str, + ) + sleep(seconds_interval) + err_str = ( + f"Condition {test_condition.__name__} was not achieved even after {seconds_max} seconds" + ) + raise AssertionError(err_str) diff --git a/pytest-pixl/src/pytest_pixl/plugin.py b/pytest-pixl/src/pytest_pixl/plugin.py index b0e9a97b4..aaecdb3b5 100644 --- a/pytest-pixl/src/pytest_pixl/plugin.py +++ b/pytest-pixl/src/pytest_pixl/plugin.py @@ -12,25 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. """Pytest fixtures.""" - -import os +import sys import threading from collections.abc import Generator +from enum import Enum import pytest from pytest_pixl.ftpserver import PixlFTPServer -@pytest.fixture(scope="module") -def ftps_server(tmp_path_factory) -> Generator[PixlFTPServer, None, None]: +class FtpHostAddress(Enum): + """ + Docker on Linux is such that if we bind the FTP server to localhost, traffic from container + to host can't get through. Instead you must bind to the docker host interface. + + However, some tests initiate FTP connections from pytest (running on host) rather than + containers. These require the server to be bound to localhost. So the test itself needs + to tell us where to bind (using a fixture called ftp_host_address) + + Binding to all interfaces (0.0.0.0) would work, but is not ideal as you're opening up + the test FTP server to any passersby. + + On macOS, you can just bind to localhost and traffic from containers or localhost gets through. + Haven't tested Windows, am assuming the macOS way works. + """ + + LOCALHOST = 1 + DOCKERHOST = 2 + + def to_host_ip_address(self): + """Convert the test's requirement into a platform-dependent host IP address""" + if self == FtpHostAddress.DOCKERHOST and sys.platform == "linux": + return "172.17.0.1" + return "127.0.0.1" + + +@pytest.fixture(scope="session") +def ftps_server( + tmp_path_factory, ftp_host_address: FtpHostAddress +) -> Generator[PixlFTPServer, None, None]: """ Spins up an FTPS server in a separate process for testing. Configuration is controlled by the FTP_* environment variables. """ - tmp_home_dir = tmp_path_factory.mktemp("ftps_server") / os.environ["FTP_USER_NAME"] - tmp_home_dir.mkdir() - ftps_server = PixlFTPServer(home_dir=str(tmp_home_dir)) + tmp_home_dir_root = tmp_path_factory.mktemp("ftps_server") + ftps_server = PixlFTPServer( + home_root=tmp_home_dir_root, host_address=ftp_host_address.to_host_ip_address() + ) thread = threading.Thread(target=ftps_server.server.serve_forever) thread.start() yield ftps_server diff --git a/ruff.toml b/ruff.toml index 15ae37d7e..b80ac34e9 100644 --- a/ruff.toml +++ b/ruff.toml @@ -48,7 +48,7 @@ mccabe.max-complexity = 18 exclude=["scripts"] [extend-per-file-ignores] -"**/tests/*" = ["PLR2004"] # Magic value used in comparison +"**/test*/*" = ["PLR2004"] # Magic value used in comparison "hasher/tests/*" = ["ARG001"] # unused function argument "env.py" = ["INP001", "E402", "ERA001"] "alembic/versions/*" = ["D103", "INP001"] \ No newline at end of file diff --git a/test/.env b/test/.env index 3dadc3eed..b77a670fd 100644 --- a/test/.env +++ b/test/.env @@ -30,7 +30,7 @@ PIXL_EHR_API_PORT=7006 PIXL_IMAGING_API_PORT=7007 RABBITMQ_PORT=7008 RABBITMQ_ADMIN_PORT=7009 -FTP_PORT=21 +FTP_PORT=20021 # PIXL Orthanc raw instance ORTHANC_RAW_USERNAME=orthanc_raw_username @@ -63,6 +63,6 @@ RABBITMQ_USERNAME=rabbitmq_username RABBITMQ_PASSWORD=rabbitmq_password # FTP server -FTP_HOST=ftp-server -FTP_USER_NAME=ftp_username +FTP_HOST=host.docker.internal +FTP_USER_NAME=pixl_user FTP_USER_PASSWORD=longpassword diff --git a/test/README.md b/test/README.md index 450a06f2f..0ee56ab9e 100644 --- a/test/README.md +++ b/test/README.md @@ -18,6 +18,22 @@ You can run the system test with: ./run-system-test.sh ``` +Or to do all the setup but not run any tests: +```bash +./run-system-test.sh setup +``` + +You can then develop and run tests repeatedly with `pytest` or through your IDE. +But you are responsible for knowing +when to re-run the setup if something it depends on has changed. +Currently, the postgres container doesn't get properly set/reset by the tests so you may have +to re-run setup if you want to re-run certain tests. + +Run the following to teardown: +```bash +./run-system-test.sh teardown +``` + ## File organisation ### PIXL configuration diff --git a/test/conftest.py b/test/conftest.py new file mode 100644 index 000000000..e1309391f --- /dev/null +++ b/test/conftest.py @@ -0,0 +1,72 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""System/E2E test setup""" +from pathlib import Path + +import pytest +from pytest_pixl.helpers import run_subprocess +from pytest_pixl.plugin import FtpHostAddress +from utils import wait_for_stable_orthanc_anon + +pytest_plugins = "pytest_pixl" + + +@pytest.fixture() +def host_export_root_dir(): + """Intermediate export dir as seen from the host""" + return Path(__file__).parents[1] / "exports" + + +TEST_DIR = Path(__file__).parent +RESOURCES_DIR = TEST_DIR / "resources" +RESOURCES_OMOP_DIR = RESOURCES_DIR / "omop" + + +@pytest.fixture(scope="session") +def _setup_pixl_cli(ftps_server) -> None: + """Run pixl populate/start. Cleanup intermediate export dir on exit.""" + # CLI calls need to have CWD = test dir so they can find the pixl_config.yml file + run_subprocess(["pixl", "populate", str(RESOURCES_OMOP_DIR.absolute())], TEST_DIR) + run_subprocess(["pixl", "start"], TEST_DIR) + # poll here for two minutes to check for imaging to be processed, printing progress + wait_for_stable_orthanc_anon(121, 5) + yield + run_subprocess( + [ + "docker", + "exec", + "system-test-ehr-api-1", + "rm", + "-r", + "/run/exports/test-extract-uclh-omop-cdm/", + ], + TEST_DIR, + ) + + +@pytest.fixture(scope="session") +def ftp_host_address(): + """Run FTP on docker host - docker containers do need to access it""" + return FtpHostAddress.DOCKERHOST + + +@pytest.fixture(scope="session") +def _extract_radiology_reports(_setup_pixl_cli) -> None: + """ + run pixl extract-radiology-reports. No subsequent wait is needed, because this API call + is synchronous (whether that is itself wise is another matter). + """ + run_subprocess( + ["pixl", "extract-radiology-reports", str(RESOURCES_OMOP_DIR.absolute())], TEST_DIR + ) diff --git a/test/docker-compose.yml b/test/docker-compose.yml index b8ff98416..32a91e356 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -74,26 +74,3 @@ services: retries: 5 networks: pixl-net: - - ftp-server: - container_name: system-test-ftp-server - build: - context: ./dummy-services/ftp-server - ports: - - "127.0.0.1:20021:21" - - "21000-21010:21000-21010" - volumes: - # Mount for uploaded data - - "./dummy-services/ftp-server/mounts/data/:/home/${FTP_USER_NAME}/" - environment: - ADDRESS: ${FTP_HOST} - USERS: ${FTP_USER_NAME}|${FTP_USER_PASSWORD}|/home/${FTP_USER_NAME} - TLS_KEY: /etc/ssl/private/localhost.key - TLS_CERT: /etc/ssl/private/localhost.crt - healthcheck: - test: ping localhost:21 -w 2 - interval: 10s - timeout: 5s - retries: 5 - networks: - pixl-net: diff --git a/test/dummy-services/ftp-server/Dockerfile b/test/dummy-services/ftp-server/Dockerfile deleted file mode 100644 index dcb3c0617..000000000 --- a/test/dummy-services/ftp-server/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -FROM delfer/alpine-ftp-server - -COPY ssl/ /etc/ssl/private/ - -SHELL ["/bin/sh", "-c"] -RUN echo $'# Attempting to enable FTPS.\n\ -ssl_enable=YES\n\ -ssl_sslv2=YES\n\ -ssl_sslv3=YES\n\ -implicit_ssl=YES\n\ -require_ssl_reuse=NO\n\ -xferlog_std_format=YES\n\ -log_ftp_protocol=YES\n\ -vsftpd_log_file=/var/log/vsftpd.log\n\ -syslog_enable=NO' >> /etc/vsftpd/vsftpd.conf diff --git a/test/dummy-services/ftp-server/ssl/localhost.crt b/test/dummy-services/ftp-server/ssl/localhost.crt deleted file mode 100644 index e4ae63bd2..000000000 --- a/test/dummy-services/ftp-server/ssl/localhost.crt +++ /dev/null @@ -1,19 +0,0 @@ ------BEGIN CERTIFICATE----- -MIIDDzCCAfegAwIBAgIUAp2Lf0PMph7l0wbBBawd8gT+esEwDQYJKoZIhvcNAQEL -BQAwFDESMBAGA1UEAwwJbG9jYWxob3N0MB4XDTI0MDExNTE4MTY0NFoXDTI0MDIx -NDE4MTY0NFowFDESMBAGA1UEAwwJbG9jYWxob3N0MIIBIjANBgkqhkiG9w0BAQEF -AAOCAQ8AMIIBCgKCAQEAw2CKWkHi4iiPVXhxiHXkwFsYPRidLgSPc872NXGvnSA4 -XyjBs1mo2DV8GZdafAYcpB1vbmSXaIp8l6K/kXu4Xg+PX0H1e815/hh0F4BuGfIh -RunYMSJDnAQrW14pfJb+xmJeCaMPG6F83eOm+p672Emoy+ny2cAlXApV/yuFsUd+ -hq2aGhX2QkN3LD/Q3qOcRqt1I2+kzWgQs3NffEm+Rm22RnIlc3utiAZ84AnKDabe -seMeqXq7UGh8qIpL17aaWmOHouUzncpmf7Qd3nZejDiebbYoULFQt7y4qbaFJHbx -kZT1zxJdgh/nBVakYwTeMmyIWDUx/MRvIiG1sC0ujwIDAQABo1kwVzAUBgNVHREE -DTALgglsb2NhbGhvc3QwCwYDVR0PBAQDAgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMB -MB0GA1UdDgQWBBRL0Yu9JdkI1LPobvtjaXDf+wjPkjANBgkqhkiG9w0BAQsFAAOC -AQEArFruYj8CADxeHD1nRyCm7KIL1Z8jQFpw+GbV34eM583Q5lokJW7bfamqvdDj -46Mb77U/q6OA8si+HVQ5AOUoIM6qQkWitM8PQT5Unf8juHOaAnKJS+jebQccm9B0 -ZvRsXE900Z6dyxPUjKO+RUxloeyoSjqU6Sss9VzLudr2zXC1Hz76D38ZtoUYgXlA -amqkJ6TS5f5YXvMmY3ofeApxmVYniCJWxzhlvxyED6egKKeyaFVLfR/4UfaWSm1S -8UADyVLjGjhEvXBOEiwNQUz8/p6HePuE1bXP3vRM/nveWPzRXDceIisniJ2Jfrqn -NKY9u2LIiGYON4dL9Lfh8xzdMg== ------END CERTIFICATE----- diff --git a/test/dummy-services/ftp-server/ssl/localhost.key b/test/dummy-services/ftp-server/ssl/localhost.key deleted file mode 100644 index 4351caa7a..000000000 --- a/test/dummy-services/ftp-server/ssl/localhost.key +++ /dev/null @@ -1,28 +0,0 @@ ------BEGIN PRIVATE KEY----- -MIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQDDYIpaQeLiKI9V -eHGIdeTAWxg9GJ0uBI9zzvY1ca+dIDhfKMGzWajYNXwZl1p8BhykHW9uZJdoinyX -or+Re7heD49fQfV7zXn+GHQXgG4Z8iFG6dgxIkOcBCtbXil8lv7GYl4Jow8boXzd -46b6nrvYSajL6fLZwCVcClX/K4WxR36GrZoaFfZCQ3csP9Deo5xGq3Ujb6TNaBCz -c198Sb5GbbZGciVze62IBnzgCcoNpt6x4x6pertQaHyoikvXtppaY4ei5TOdymZ/ -tB3edl6MOJ5ttihQsVC3vLiptoUkdvGRlPXPEl2CH+cFVqRjBN4ybIhYNTH8xG8i -IbWwLS6PAgMBAAECggEADeRK4hEGOftqpEf3QUh/VoKuTgXL2Jj1aZAWfIVUAxw7 -4ImW1ZLzSPBAndc1AAA1QCFhqhvtPL8quGNs3ezJ6/ziPlfZGw+TSX22n3HVIkjP -lJRi+AIOLu/dn1ZUz9QyqFYX2/U5SUcpsuM8HQd1UXMnVTG8v+kuyYUTSUdFxG2H -gkKNn3D60EdKrOjtYQGMk18frwFs0e2tBu4GSkLqKE/07O6JYM6BxO6UBrIzKuOM -ckGSNt+e+MuSvcfY011y6oOmzCnxuRLr8y5FFFIJItXzBYopgV7WQlClMhn1YOAo -4JHSgKFig7UaViQRKdP4TCet70MRiOg3RkAcNn47xQKBgQD08nOS660TLcSuIbYk -pvWZVkuRYgJ4+GVKdsQqTNwgCNxRzWePktxER2+X4Vm3y77TR1JByPIrBPIUzqT7 -t4hEe1Uxq9ia3PIO5QBo3aYWkRuD2Zr7jJoz/kt/v8EQxqwSlOPsgsxS1B2/JeXU -G1Id5N39ZtlA1EsfYLBQYlf0MwKBgQDMMXkTS5p49oxpkR69OW66YUKmk1GVdSd0 -8SIbVvjVnUD4FksJphDHo3CPPZQyrx6f/x27yhUJ1JpCdLIkVoR6/ipPMxUsVT3r -pXUqJLowguV6avKjOlojWrUpRUZg9H5K3zTh8BEqXU7M6QX/ipPS1mhY18twmwFF -qHrrlYngNQKBgQCW+GxFbJ7DCF2F2d9SndkYBjkTRS4y8x4zFBp52998mxl4Dbq7 -og/CwajFGq7aemiF/hpz0293FlhCfM7xGkCRvNZYhAs//9ftWDW1bI92rz8fN9qv -Ggzc6OoNtdlABsN9vGjyl/dpQbWH38rUvXYSWMJ98YsLywz2LPjewo5lQwKBgQCh -TkPTVlpFTJ9HlfiuivxKCCNa6+37H314CDVlW6NfPMbiNNo6WRHQY/C7d23nTCfp -ROP8QXu1NFpYPU+tHRuy/a32uufzCbPMv/x6umDLidw2hN4AzEwAudt7KtqaJQrP -otxnz/n/eY6SmVK/uH1mhTIRXQe9gPXUTXAtQwiUMQKBgFMYe/HNVNbR90e6D8Av -DoWzcwaYdl2jrE+E7LtIUED8kSpBQyZ5jt/UxoUnlWA4c4lzzblNk5WEJUNQBghf -bpBXvv1emWp10snQM/KdFCCIINY3OMnoWlfsnvTyn8mkxEAeJM0xzqEUMQcks3rM -sUzjMP2hWu67y1Hy03QKq+Lf ------END PRIVATE KEY----- diff --git a/test/run-system-test.sh b/test/run-system-test.sh index d26123276..6fc16038d 100755 --- a/test/run-system-test.sh +++ b/test/run-system-test.sh @@ -17,44 +17,36 @@ BIN_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) PACKAGE_DIR="${BIN_DIR%/*}" cd "${PACKAGE_DIR}/test" -docker compose --env-file .env -p system-test down --volumes -# -# Note: cannot run as single docker compose command due to different build contexts -docker compose --env-file .env -p system-test up --wait -d --build --remove-orphans -# Warning: Requires to be run from the project root -(cd .. && \ - docker compose --env-file test/.env -p system-test up --wait -d --build) - -./scripts/insert_test_data.sh - -# Install pixl_cli and test dependencies -pip install -e "${PACKAGE_DIR}/pixl_core" && \ - pip install -e "${PACKAGE_DIR}/cli" && \ - pip install -e "${PACKAGE_DIR}/pytest-pixl" - - -pixl populate "${PACKAGE_DIR}/test/resources/omop" -pixl start - -# need to wait until the DICOM image is "stable" so poll for 2 minutes to check -./scripts/check_entry_in_orthanc_anon_for_2_min.py -./scripts/check_entry_in_pixl_anon.sh - -# test export and upload -pixl extract-radiology-reports "${PACKAGE_DIR}/test/resources/omop" -./scripts/check_radiology_parquet.py \ - ../exports/test-extract-uclh-omop-cdm/latest/radiology/radiology.parquet -./scripts/check_ftps_upload.py - - -ls -laR ../exports/ -docker exec system-test-ehr-api-1 rm -r /run/exports/test-extract-uclh-omop-cdm/ - -# This checks that orthanc-raw acknowledges the configured maximum storage size -./scripts/check_max_storage_in_orthanc_raw.sh - -# Run this last because it will force out original test images from orthanc-raw -./scripts/check_max_storage_in_orthanc_raw.py +setup () { + docker compose --env-file .env -p system-test down --volumes + # + # Note: cannot run as single docker compose command due to different build contexts + docker compose --env-file .env -p system-test up --wait -d --build --remove-orphans + # Warning: Requires to be run from the project root + (cd "${PACKAGE_DIR}" && \ + docker compose --env-file test/.env -p system-test up --wait -d --build) + + ./scripts/insert_test_data.sh +} + +teardown () { + (cd "${PACKAGE_DIR}" && \ + docker compose -f docker-compose.yml -f test/docker-compose.yml -p system-test down --volumes) +} + +# Allow user to perform just setup so that pytest may be run repeatedly without +# redoing the setup again and again. This means that pytest must now be responsible +# for clearing up anything it creates (export temp dir?) +subcmd=${1:-""} +if [ "$subcmd" = "setup" ]; then + setup +elif [ "$subcmd" = "teardown" ]; then + teardown +else + setup + pytest --verbose --log-cli-level INFO + echo FINISHED PYTEST COMMAND + teardown + echo SYSTEM TEST SUCCESSFUL +fi -cd "${PACKAGE_DIR}" -docker compose -f docker-compose.yml -f test/docker-compose.yml -p system-test down --volumes diff --git a/test/scripts/check_entry_in_orthanc_anon_for_2_min.py b/test/scripts/check_entry_in_orthanc_anon_for_2_min.py deleted file mode 100755 index b753acd19..000000000 --- a/test/scripts/check_entry_in_orthanc_anon_for_2_min.py +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -After pixl has run this script will query the orthanc-anon REST API to check -that the correct number of instances have been received. - -Because we have to wait for a stable study, poll for 2 minutes -""" - -import json -import shlex -import subprocess -from time import sleep - -SECONDS_WAIT = 5 - -# Check for two minutes that the instances have been received in orthanc anon -instances = [] -for seconds in range(0, 121, SECONDS_WAIT): - instances_cmd = shlex.split('docker exec system-test-orthanc-anon-1 curl -u "orthanc_anon_username:orthanc_anon_password" http://orthanc-anon:8042/instances') - instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) - instances = json.loads(instances_output.stdout) - print(f"Waited for {seconds} seconds, orthanc-anon instances: {instances}") - if len(instances) == 2: - break - sleep(SECONDS_WAIT) - -assert len(instances) == 2 diff --git a/test/scripts/check_entry_in_pixl_anon.sh b/test/scripts/check_entry_in_pixl_anon.sh deleted file mode 100755 index 540e7d443..000000000 --- a/test/scripts/check_entry_in_pixl_anon.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -euxo pipefail - -_sql_command="select * from emap_data.ehr_anon" -_result=$(docker exec system-test-postgres-1 /bin/bash -c \ - "PGPASSWORD=pixl_db_password psql -U pixl_db_username -d pixl -c \"$_sql_command\"") -echo "$_result" | grep -q "2 row" diff --git a/test/scripts/check_ftps_upload.py b/test/scripts/check_ftps_upload.py deleted file mode 100755 index d3846b125..000000000 --- a/test/scripts/check_ftps_upload.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from pathlib import Path -from shutil import rmtree -from time import sleep - -MOUNTED_DATA_DIR = Path(__file__).parents[1] / "dummy-services" / "ftp-server" / "mounts" / "data" -print(f"mounted data dir: {MOUNTED_DATA_DIR}") - -project_slug = "test-extract-uclh-omop-cdm" -extract_time_slug = "2023-12-07t14-08-58" - -expected_output_dir = MOUNTED_DATA_DIR / project_slug -expected_public_parquet_dir = expected_output_dir / extract_time_slug / "parquet" -print(f"expected output dir: {expected_output_dir}") -print(f"expected parquet files dir: {expected_public_parquet_dir}") - -SECONDS_WAIT = 5 - -zip_files = [] -for seconds in range(0, 121, SECONDS_WAIT): - # Test whether DICOM images have been uploaded - zip_files = list(expected_output_dir.glob("*.zip")) - print(f"Waited for {seconds} seconds. glob_list: {zip_files}") - if len(zip_files) == 2: - break - sleep(SECONDS_WAIT) - -# We expect 2 DICOM image studies to be uploaded -assert len(zip_files) == 2 - -# check the copied files -assert expected_public_parquet_dir.exists() -assert (expected_public_parquet_dir / "omop" / "public" / "PROCEDURE_OCCURRENCE.parquet").exists() -assert (expected_public_parquet_dir / "radiology" / "radiology.parquet").exists() - -# Clean up; only happens if the assertion passes -rmtree(expected_output_dir, ignore_errors=True) diff --git a/test/scripts/check_max_storage_in_orthanc_raw.py b/test/scripts/check_max_storage_in_orthanc_raw.py deleted file mode 100755 index 8bb0cb7fb..000000000 --- a/test/scripts/check_max_storage_in_orthanc_raw.py +++ /dev/null @@ -1,95 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -We need to test that orthanc-raw is enforcing the configured maximum storage -limit. This script will upload sufficient instances to orthanc-raw that the -maximum storage capacity will be reached and then confirm that the original test -images are no longer stored. - -Polling to allow for orthanc processing time. -""" - -from pathlib import Path -import tempfile -from time import sleep - -from decouple import config - -import requests - -from pytest_pixl.dicom import write_volume - -SECONDS_WAIT = 5 - -raw_instances_url = "http://localhost:{0}/instances".format( - config("ORTHANC_RAW_WEB_PORT") -) - -# Check we have the 2 instances expected from insert_test_data.sh -for seconds in range(0, 121, SECONDS_WAIT): - original_instances = requests.get( - raw_instances_url, - auth=(config("ORTHANC_RAW_USERNAME"), config("ORTHANC_RAW_PASSWORD")), - ).json() - print(f"Waited for {seconds} seconds, orthanc-raw instances: {original_instances}") - if len(original_instances) == 2: - break - sleep(SECONDS_WAIT) -assert len(original_instances) == 2 - -# Upload enough new instances to exceed the configured max storage of orthanc-raw -with tempfile.TemporaryDirectory() as temp_dir: - write_volume(temp_dir + "/dcm{slice:03d}.dcm") - n_dcm = 0 - for dcm in Path(temp_dir).glob("*.dcm"): - # We use data= rather than files= in this request because orthanc does not return JSON - # when files= is used. - upload_response = requests.post( - raw_instances_url, - auth=(config("ORTHANC_RAW_USERNAME"), config("ORTHANC_RAW_PASSWORD")), - data=open(dcm, "rb") - ) - if upload_response.status_code != requests.codes.ok: - # orthanc will eventually refuse more instances becuase the test - # exam we're using exceeds the max storage - if upload_response.json()["OrthancError"] == "The file storage is full": - # This is what we're looking for when storage limit reached - break - else: - # Something else happened preventing the upload - raise(RuntimeError(f"Failed to upload {dcm} to orthanc-raw")) - n_dcm += 1 - print(f"Uploaded {n_dcm} new instances") - -# Check the instances in orthanc-raw to see if the original instances have been removed -for seconds in range(0, 121, SECONDS_WAIT): - new_instances = requests.get( - raw_instances_url, - auth=(config("ORTHANC_RAW_USERNAME"), config("ORTHANC_RAW_PASSWORD")), - ).json() - print( - "Waited for {seconds} seconds, orthanc-raw contains {n_instances} instances".format( - seconds=seconds, n_instances=len(new_instances) - ) - ) - if any([instance in new_instances for instance in original_instances]): - sleep(SECONDS_WAIT) - else: - print("Original instances have been removed from orthanc-raw") - break - -assert not any([instance in new_instances for instance in original_instances]) diff --git a/test/scripts/check_max_storage_in_orthanc_raw.sh b/test/scripts/check_max_storage_in_orthanc_raw.sh deleted file mode 100755 index 66ccc75ea..000000000 --- a/test/scripts/check_max_storage_in_orthanc_raw.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -euxo pipefail - -# This could be much improved by having more realistic test data some of -# which actually was persisted -source ./.env -docker logs system-test-orthanc-raw-1 2>&1 | grep "At most ${ORTHANC_RAW_MAXIMUM_STORAGE_SIZE}MB will be used for the storage area" - diff --git a/test/scripts/check_radiology_parquet.py b/test/scripts/check_radiology_parquet.py deleted file mode 100755 index bce84895d..000000000 --- a/test/scripts/check_radiology_parquet.py +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/env python -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import logging -from pathlib import Path - -import pandas as pd -import sys - -logger = logging.getLogger(__name__) - -expected_parquet_file = Path(sys.argv[1]) -exported_data = pd.read_parquet(expected_parquet_file) - -print(exported_data.head()) - -parquet_header_names = ["image_identifier", "procedure_occurrence_id", "image_report"] - -# the fake DEID service adds this string to the end to confirm it has been through it -DE_ID_SUFFIX = '**DE-IDENTIFIED**' - -expected_rows = 2 -assert exported_data.shape[0] == expected_rows - -assert exported_data.loc[0].procedure_occurrence_id == 4 -assert exported_data.loc[0].image_report == 'this is a radiology report 1' + DE_ID_SUFFIX - -# blake2b-256 hash of string ('987654321' + 'AA12345601') with key = 'test_key' -assert exported_data.loc[0].image_identifier == 'a971b114b9133c81c03fb88c6a958f7d95eb1387f04c17ad7ff9ba7cf684c392' - -assert exported_data.loc[1].procedure_occurrence_id == 5 -assert exported_data.loc[1].image_report == 'this is a radiology report 2' + DE_ID_SUFFIX - -# blake2b-256 hash of string ('987654321' + 'AA12345605') with key = 'test_key' -assert exported_data.loc[1].image_identifier == 'f71b228fa97d6c87db751e0bb35605fd9d4c1274834be4bc4bb0923ab8029b2a' - -# Files must not be owned by root - they'll be hard to delete and we shouldn't be running our -# containers as root anyway. -file_stats = expected_parquet_file.stat() -try: - assert file_stats.st_uid != 0 - assert file_stats.st_gid != 0 -except AssertionError: - logger.exception("Known bug: files should not be owned by root") diff --git a/test/system_test.py b/test/system_test.py new file mode 100644 index 000000000..22369242a --- /dev/null +++ b/test/system_test.py @@ -0,0 +1,104 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Replacement for the 'interesting' bits of the system/E2E test""" +import logging +from pathlib import Path + +import pytest +from pytest_pixl.helpers import run_subprocess, wait_for_condition + +pytest_plugins = "pytest_pixl" + + +class TestFtpsUpload: + """tests adapted from ./scripts/check_ftps_upload.py""" + + @pytest.fixture(scope="class", autouse=True) + def _setup(self, ftps_server) -> None: + """Shared test data for the two different kinds of FTP upload test""" + TestFtpsUpload.ftp_home_dir = ftps_server.home_dir + logging.info("ftp home dir: %s", TestFtpsUpload.ftp_home_dir) + + TestFtpsUpload.project_slug = "test-extract-uclh-omop-cdm" + TestFtpsUpload.extract_time_slug = "2023-12-07t14-08-58" + + TestFtpsUpload.expected_output_dir = ( + TestFtpsUpload.ftp_home_dir / TestFtpsUpload.project_slug + ) + TestFtpsUpload.expected_public_parquet_dir = ( + TestFtpsUpload.expected_output_dir / TestFtpsUpload.extract_time_slug / "parquet" + ) + logging.info("expected output dir: %s", TestFtpsUpload.expected_output_dir) + logging.info("expected parquet files dir: %s", TestFtpsUpload.expected_public_parquet_dir) + # No cleanup of ftp uploads needed because it's in a temp dir + + @pytest.mark.usefixtures("_extract_radiology_reports") + def test_ftps_parquet_upload(self): + """The copied parquet files""" + assert TestFtpsUpload.expected_public_parquet_dir.exists() + + assert ( + TestFtpsUpload.expected_public_parquet_dir + / "omop" + / "public" + / "PROCEDURE_OCCURRENCE.parquet" + ).exists() + assert ( + TestFtpsUpload.expected_public_parquet_dir / "radiology" / "radiology.parquet" + ).exists() + + @pytest.mark.usefixtures("_extract_radiology_reports") + def test_ftps_dicom_upload(self): + """Test whether DICOM images have been uploaded""" + zip_files = [] + + def zip_file_list() -> str: + return f"zip files found: {zip_files}" + + def two_zip_files_present() -> bool: + nonlocal zip_files + zip_files = list(TestFtpsUpload.expected_output_dir.glob("*.zip")) + # We expect 2 DICOM image studies to be uploaded + return len(zip_files) == 2 + + wait_for_condition( + two_zip_files_present, + seconds_max=121, + seconds_interval=5, + progress_string_fn=zip_file_list, + ) + + +@pytest.mark.usefixtures("_setup_pixl_cli") +def test_ehr_anon_entries(): + """Check data has reached ehr_anon.""" + + def exists_two_rows() -> bool: + # This was converted from old shell script - better to check more than just row count? + sql_command = "select * from emap_data.ehr_anon" + cp = run_subprocess( + [ + "docker", + "exec", + "system-test-postgres-1", + "/bin/bash", + "-c", + f'PGPASSWORD=pixl_db_password psql -U pixl_db_username -d pixl -c "{sql_command}"', + ], + Path.cwd(), + ) + return cp.stdout.decode().find("(2 rows)") != -1 + + # We already waited in _setup_pixl_cli, so should be true immediately. + wait_for_condition(exists_two_rows) diff --git a/test/test_radiology_parquet.py b/test/test_radiology_parquet.py new file mode 100644 index 000000000..84f722269 --- /dev/null +++ b/test/test_radiology_parquet.py @@ -0,0 +1,78 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Check validity of radiology export""" +import logging +from pathlib import Path + +import pandas as pd +import pytest + +logger = logging.getLogger(__name__) + + +@pytest.mark.usefixtures("_extract_radiology_reports") +def test_radiology_parquet(host_export_root_dir: Path): + """ + From: + scripts/test_radiology_parquet.py \ + ../exports/test-extract-uclh-omop-cdm/latest/radiology/radiology.parquet + Test contents of radiology report parquet file in the export location + """ + expected_radiology_parquet_file = ( + host_export_root_dir + / "test-extract-uclh-omop-cdm" + / "latest" + / "radiology" + / "radiology.parquet" + ) + + exported_data = pd.read_parquet(expected_radiology_parquet_file) + + logger.warning(exported_data.head()) + + parquet_header_names = ["image_identifier", "procedure_occurrence_id", "image_report"] + assert (exported_data.columns == parquet_header_names).all() + + # the fake DEID service adds this string to the end to confirm it has been through it + DE_ID_SUFFIX = "**DE-IDENTIFIED**" + + expected_rows = 2 + assert exported_data.shape[0] == expected_rows + + assert exported_data.loc[0].procedure_occurrence_id == 4 + assert exported_data.loc[0].image_report == "this is a radiology report 1" + DE_ID_SUFFIX + + # blake2b-256 hash of string ('987654321' + 'AA12345601') with key = 'test_key' + assert ( + exported_data.loc[0].image_identifier + == "a971b114b9133c81c03fb88c6a958f7d95eb1387f04c17ad7ff9ba7cf684c392" + ) + + assert exported_data.loc[1].procedure_occurrence_id == 5 + assert exported_data.loc[1].image_report == "this is a radiology report 2" + DE_ID_SUFFIX + + # blake2b-256 hash of string ('987654321' + 'AA12345605') with key = 'test_key' + assert ( + exported_data.loc[1].image_identifier + == "f71b228fa97d6c87db751e0bb35605fd9d4c1274834be4bc4bb0923ab8029b2a" + ) + + # Files must not be owned by root - they'll be hard to delete and we shouldn't be running our + # containers as root anyway. + file_stats = expected_radiology_parquet_file.stat() + try: + assert file_stats.st_uid != 0 + assert file_stats.st_gid != 0 + except AssertionError: + logger.exception("Known bug: files should not be owned by root") diff --git a/test/utils.py b/test/utils.py new file mode 100644 index 000000000..70d0a74ee --- /dev/null +++ b/test/utils.py @@ -0,0 +1,50 @@ +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for the system test""" +import json +import shlex +import subprocess + +from pytest_pixl.helpers import wait_for_condition + + +def wait_for_stable_orthanc_anon(seconds_max, seconds_interval) -> None: + """ + Query the orthanc-anon REST API to check that the correct number of instances + have been received. + If they haven't within the time limit, raise a TimeoutError + """ + instances = [] + + def are_two_instances() -> bool: + nonlocal instances + instances_cmd = shlex.split( + "docker exec system-test-orthanc-anon-1 " + 'curl -u "orthanc_anon_username:orthanc_anon_password" ' + "http://orthanc-anon:8042/instances" + ) + instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) # noqa: S603 + instances = json.loads(instances_output.stdout) + return len(instances) == 2 + + def list_instances() -> str: + return f"orthanc-anon instances: {instances}" + + wait_for_condition( + are_two_instances, + seconds_max=seconds_max, + seconds_interval=seconds_interval, + progress_string_fn=list_instances, + )