diff --git a/.env.sample b/.env.sample index 19d922e4d..23ce281d8 100644 --- a/.env.sample +++ b/.env.sample @@ -43,16 +43,16 @@ ORTHANC_RAW_MAXIMUM_STORAGE_SIZE= // MB # PIXL Orthanc anon instance ORTHANC_ANON_USERNAME= -ORTHANC_ANON_PASSWORD= +ORTHANC_ANON_PASSWORD= ORTHANC_ANON_AE_TITLE= ORTHANC_ANON_HTTP_TIMEOUT=60 -ORTHANC_AUTOROUTE_ANON_TO_AZURE=false +ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT=false ENABLE_DICOM_WEB=true STUDY_TIME_OFFSET= SALT_VALUE=PIXL -# UCVNAQR DICOM node information +# UCVNAQR DICOM node information VNAQR_AE_TITLE= VNAQR_DICOM_PORT= VNAQR_IP_ADDR= @@ -83,3 +83,9 @@ RABBITMQ_PASSWORD= # PACS extraction API PIXL_DICOM_TRANSFER_TIMEOUT=240 PIXL_QUERY_TIMEOUT=10 + +# FTP server +FTP_HOST= +FTP_USER_NAME= +FTP_USER_PASSWORD= +FTP_PORT= diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a5dd3b952..3bf9f9a28 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -211,6 +211,11 @@ jobs: timeout-minutes: 30 steps: - uses: actions/checkout@v3 + - name: Prune docker volumes + # seems like we're getting an error from ftp-server exiting with zero status code + # this is a workaround that resolves it locally 🤞 + run: | + docker volume prune --force - uses: docker/setup-buildx-action@v3 # pre-build and cache the postgres container as installing python3 takes a while, doesn't push diff --git a/.gitignore b/.gitignore index e8f43ec66..ed0409bdb 100644 --- a/.gitignore +++ b/.gitignore @@ -148,4 +148,4 @@ dmypy.json # project specific files /exports/ **/test_producer.csv -pixl_core/tests/ftp-server/mounts/data/* \ No newline at end of file +/test/dummy-services/ftp-server/mounts/data/* diff --git a/cli/pyproject.toml b/cli/pyproject.toml index fbc33dd7b..3c7f01d03 100644 --- a/cli/pyproject.toml +++ b/cli/pyproject.toml @@ -6,7 +6,7 @@ authors = [ ] description = "PIXL command line interface" readme = "README.md" -requires-python = "<3.11" +requires-python = "<3.12" classifiers = [ "Programming Language :: Python :: 3" ] diff --git a/docker-compose.yml b/docker-compose.yml index 13c3a9f5b..a73f686d0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -55,6 +55,12 @@ x-pixl-db: &pixl-db PIXL_DB_PASSWORD: ${PIXL_DB_PASSWORD} PIXL_DB_NAME: ${PIXL_DB_NAME} +x-ftp-host: &ftp-host + FTP_HOST: ${FTP_HOST} + FTP_USER_NAME: ${FTP_USER_NAME} + FTP_USER_PASSWORD: ${FTP_USER_PASSWORD} + FTP_PORT: ${FTP_PORT} + x-logs-volume: &logs-volume type: volume source: logs @@ -110,12 +116,12 @@ services: <<: *build-args-common command: /run/secrets environment: - <<: [*proxy-common, *pixl-common-env] + <<: [*proxy-common, *pixl-common-env, *ftp-host] ORTHANC_NAME: "PIXL: Anon" ORTHANC_USERNAME: ${ORTHANC_ANON_USERNAME} ORTHANC_PASSWORD: ${ORTHANC_ANON_PASSWORD} ORTHANC_ANON_AE_TITLE: ${ORTHANC_ANON_AE_TITLE} - ORTHANC_AUTOROUTE_ANON_TO_AZURE: ${ORTHANC_AUTOROUTE_ANON_TO_AZURE} + ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT: ${ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT} ORTHANC_RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} ORTHANC_RAW_DICOM_PORT: "4242" ORTHANC_RAW_HOSTNAME: "orthanc-raw" @@ -223,7 +229,7 @@ services: args: <<: *build-args-common environment: - <<: [*pixl-db, *emap-db, *proxy-common, *pixl-common-env, *pixl-rabbit-mq] + <<: [*pixl-db, *emap-db, *proxy-common, *pixl-common-env, *pixl-rabbit-mq, *ftp-host] AZURE_CLIENT_ID: ${PIXL_EHR_API_AZ_CLIENT_ID} AZURE_CLIENT_SECRET: ${PIXL_EHR_API_AZ_CLIENT_SECRET} AZURE_TENANT_ID: ${PIXL_EHR_API_AZ_TENANT_ID} diff --git a/docs/services/ftp-server.md b/docs/services/ftp-server.md new file mode 100644 index 000000000..8b3a31216 --- /dev/null +++ b/docs/services/ftp-server.md @@ -0,0 +1,26 @@ +# FTPS server + +Currently, we can only upload files to the Data Safe Haven (DSH) through an +[FTPS](https://en.wikipedia.org/wiki/FTPS) connection. + +The [`core.upload`](../../pixl_core/src/core/upload.py) module implements functionality to upload +DICOM tags and parquet files to an **FTPS server**. This requires the following environment +variables to be set: + +- `FTP_HOST`: URL to the FTPS server +- `FTP_PORT`: port on which the FTPS server is listening +- `FTP_USER_NAME`: name of user with access to the FTPS server +- `FTP_USER_PASSWORD`: password for the authorised user + +We provide mock values for these for the unit tests (see +[`./tests/conftest.py`](./tests/conftest.py)). When running in production, these should be defined +in the `.env` file (see [the example](../.env.sample)). + +For the `pixl_core` unit tests and the system test, we spin up an FTPS server with a Docker +container, defined in [`test/dummy-services/ftp-server`](../../test/dummy-services/ftp-server/) and +set the necessary environment variables in [`.env.test`](../../test/.env.test). + +## FTPS test server + +We provide a Docker container to spin up a test FTPS server. The documentation for this can be found +in [`test/README.md`](../../test/README.md). diff --git a/orthanc/orthanc-anon/plugin/pixl.py b/orthanc/orthanc-anon/plugin/pixl.py index 6168427d7..cefb9f3b2 100644 --- a/orthanc/orthanc-anon/plugin/pixl.py +++ b/orthanc/orthanc-anon/plugin/pixl.py @@ -28,6 +28,7 @@ from io import BytesIO from pathlib import Path from time import sleep +from typing import TYPE_CHECKING import requests import yaml @@ -38,14 +39,18 @@ import orthanc import pixl_dcmd +if TYPE_CHECKING: + from typing import Any + ORTHANC_USERNAME = config("ORTHANC_USERNAME") ORTHANC_PASSWORD = config("ORTHANC_PASSWORD") ORTHANC_URL = "http://localhost:8042" +logger = logging.getLogger(__name__) + def AzureAccessToken(): """ - Send payload to oath2/token url and return the response """ @@ -69,7 +74,6 @@ def AzureAccessToken(): def AzureDICOMTokenRefresh(): """ - Refresh Azure DICOM token If this fails then wait 30s and try again If successful then access_token can be used in @@ -141,7 +145,7 @@ def SendViaStow(resourceId): payload = {"Resources": [resourceId], "Synchronous": False} - logging.info("Payload: %s", payload) + logger.info("Payload: %s", payload) try: requests.post( @@ -160,51 +164,80 @@ def SendViaFTPS(resourceId: str) -> None: Makes a POST API call to upload the resource to an FTPS server using orthanc credentials as authorisation """ - # Query orthanc-anon for the study + msg = f"Sending {resourceId} via FTPS" + logging.info(msg) + # Download zip archive of the DICOM resource query = f"{ORTHANC_URL}/studies/{resourceId}/archive" - try: - response_study = requests.get(query, auth=(ORTHANC_USERNAME, ORTHANC_PASSWORD), timeout=10) - success_code = 200 - if response_study.status_code != success_code: - msg = "Could not download archive of resource '%s'" - raise RuntimeError(msg, resourceId) - except requests.exceptions.RequestException: - orthanc.LogError(f"Failed to query'{resourceId}'") + fail_msg = "Could not download archive of resource '%s'" + response_study = _query(resourceId, query, fail_msg) # get the zip content zip_content = response_study.content - logging.info("Downloaded data for resource %s", resourceId) + logger.info("Downloaded data for resource %s", resourceId) - upload.upload_dicom_image(zip_content, resourceId) + upload.upload_dicom_image(BytesIO(zip_content), _get_patient_id(resourceId)) + logger.info("Uploaded data to FTPS for resource %s", resourceId) -def ShouldAutoRoute(): +def _get_patient_id(resourceId: str) -> str: + """ + Queries the Orthanc instance to get the PatientID for a given resource. + When anonymisation has been applied, the PatientID is the pseudo-anonymised ID. """ - Checks whether ORTHANC_AUTOROUTE_ANON_TO_AZURE environment variable is + query = f"{ORTHANC_URL}/studies/{resourceId}" + fail_msg = "Could not query study for resource '%s'" + + response_study = _query(resourceId, query, fail_msg) + json_response = json.loads(response_study.content.decode()) + return str(json_response["PatientMainDicomTags"]["PatientID"]) + + +def _query(resourceId: str, query: str, fail_msg: str) -> requests.Response: + try: + response = requests.get(query, auth=(ORTHANC_USERNAME, ORTHANC_PASSWORD), timeout=10) + success_code = 200 + if response.status_code != success_code: + raise RuntimeError(fail_msg, resourceId) + except requests.exceptions.RequestException as request_exception: + orthanc.LogError(f"Failed to query'{resourceId}'") + raise SystemExit from request_exception + else: + return response + + +def ShouldAutoRoute() -> bool: + """ + Checks whether ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT environment variable is set to true or false """ - return os.environ.get("ORTHANC_AUTOROUTE_ANON_TO_AZURE", "false").lower() == "true" + logger.debug("Checking value of autoroute") + return os.environ.get("ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT", "false").lower() == "true" + +def _azure_available() -> bool: + # Check if AZ_DICOM_ENDPOINT_CLIENT_ID is set + return config("AZ_DICOM_ENDPOINT_CLIENT_ID", default="") != "" -def OnChange(changeType, level, resource) -> None: # noqa: ARG001 + +def OnChange(changeType, level, resource): # noqa: ARG001 """ Three ChangeTypes included in this function: - - If a study if stable and if ShouldAutoRoute returns true - then SendViaStow is called + - If a study is stable and if ShouldAutoRoute returns true + then SendViaFTPS is called - If orthanc has started then message added to Orthanc LogWarning and AzureDICOMTokenRefresh called - If orthanc has stopped and TIMER is not none then message added to Orthanc LogWarning and TIMER cancelled - """ if not ShouldAutoRoute(): return if changeType == orthanc.ChangeType.STABLE_STUDY and ShouldAutoRoute(): - print("Stable study: %s" % resource) # noqa: T201 - SendViaStow(resource) + msg = f"Stable study: {resource}" + logger.info(msg) + SendViaFTPS(resource) - if changeType == orthanc.ChangeType.ORTHANC_STARTED: + if changeType == orthanc.ChangeType.ORTHANC_STARTED and _azure_available(): orthanc.LogWarning("Starting the scheduler") AzureDICOMTokenRefresh() elif changeType == orthanc.ChangeType.ORTHANC_STOPPED: @@ -213,13 +246,13 @@ def OnChange(changeType, level, resource) -> None: # noqa: ARG001 TIMER.cancel() -def OnHeartBeat(output, uri, **request): # noqa: ARG001 +def OnHeartBeat(output, uri, **request) -> Any: # noqa: ARG001 """Extends the REST API by registering a new route in the REST API""" orthanc.LogWarning("OK") output.AnswerBuffer("OK\n", "text/plain") -def ReceivedInstanceCallback(receivedDicom, origin): +def ReceivedInstanceCallback(receivedDicom: bytes, origin: str) -> Any: """Modifies a DICOM instance received by Orthanc and applies anonymisation.""" if origin == orthanc.InstanceOrigin.REST_API: orthanc.LogWarning("DICOM instance received from the REST API") diff --git a/pixl_core/README.md b/pixl_core/README.md index b8b513535..8e477b6d1 100644 --- a/pixl_core/README.md +++ b/pixl_core/README.md @@ -6,13 +6,14 @@ upstream services. Specifically, it defines: -- The [Token buffer](#token-buffer) for rate limiting requests to the upstream services -- The [RabbitMQ queue](#patient-queue) implementation shared by the EHR and Imaging APIs -- The PIXL `postgres` internal database for storing exported images and extracts from the messages - processed by the CLI driver -- The [`ParquetExport`](./src/core/exports.py) class for exporting OMOP and EMAP extracts to parquet files -- Handling of [uploads over FTPS](./src/core/upload.py), used to transfer images and parquet files - to the DSH (Data Safe Haven) +- The [Token buffer](#token-buffer) for rate limiting requests to the upstream services +- The [RabbitMQ queue](#patient-queue) implementation shared by the EHR and Imaging APIs +- The PIXL `postgres` internal database for storing exported images and extracts from the messages + processed by the CLI driver +- The [`ParquetExport`](./src/core/exports.py) class for exporting OMOP and EMAP extracts to + parquet files +- Handling of [uploads over FTPS](./src/core/upload.py), used to transfer images and parquet files + to the DSH (Data Safe Haven) ## Installation @@ -33,9 +34,9 @@ The token buffer is needed to limit the download rate for images from PAX/VNA. C suggests that a rate limit of five images per second should be sufficient, however that may have to be altered dynamically through command line interaction. -The current implementation of the token buffer uses the [token bucket implementation from -Falconry](https://github.com/falconry/token-bucket/). Furthermore, the token buffer is not set up as -a service as it is only needed for the image download rate. +The current implementation of the token buffer uses the +[token bucket implementation from Falconry](https://github.com/falconry/token-bucket/). Furthermore, +the token buffer is not set up as a service as it is only needed for the image download rate. ## Patient queue @@ -45,7 +46,8 @@ different PIXL services. Currently, we define two queues: 1. `pacs` for downloading and de-identifying images 2. `ehr` for downloading and de-identifying the EHR data -The image anonymisation will be triggered automatically once the image has been downloaded to the raw Orthanc server. +The image anonymisation will be triggered automatically once the image has been downloaded to the +raw Orthanc server. ### RabbitMQ @@ -54,8 +56,8 @@ RabbitMQ is used for the queue implementation. The client of choice for RabbitMQ at this point in time is [pika](https://pika.readthedocs.io/en/stable/), which provides both a synchronous and asynchronous way of transferring messages. The former is geared towards high data throughput whereas the latter -is geared towards stability. The asynchronous mode of transferring messages is a lot more complex -as it is based on the [asyncio event loop](https://docs.python.org/3/library/asyncio-eventloop.html). +is geared towards stability. The asynchronous mode of transferring messages is a lot more complex as +it is based on the [asyncio event loop](https://docs.python.org/3/library/asyncio-eventloop.html). ### OMOP ES files @@ -85,3 +87,17 @@ for convenience `latest` is a symlink to the most recent extract. │ └── public └── latest -> all_extracts/2023-12-13t16-22-40 ``` + +## Uploading to an FTPS server + +The `core.upload` module implements functionality to upload DICOM tags and parquet files to an +**FTPS server**. This requires the following environment variables to be set: + +- `FTP_HOST`: URL to the FTPS server +- `FTP_PORT`: port on which the FTPS server is listening +- `FTP_USER_NAME`: name of user with access to the FTPS server +- `FTP_USER_PASSWORD`: password for the authorised user + +We provide mock values for these for the unit tests (see +[`./tests/conftest.py`](./tests/conftest.py)). When running in production, these should be defined +in the `.env` file (see [the example](../.env.sample)). diff --git a/pixl_core/src/core/upload.py b/pixl_core/src/core/upload.py index 411f7dbfd..ab1d6bf3b 100644 --- a/pixl_core/src/core/upload.py +++ b/pixl_core/src/core/upload.py @@ -68,7 +68,12 @@ def upload_dicom_image(zip_content: BinaryIO, pseudo_anon_id: str) -> None: logger.debug("Running %s", command) # Store the file using a binary handler - ftp.storbinary(command, zip_content) + try: + ftp.storbinary(command, zip_content) + except ftplib.all_errors as ftp_error: + ftp.quit() + error_msg = "Failed to run STOR command '%s': '%s'" + raise ConnectionError(error_msg, command, ftp_error) from ftp_error # Close the FTP connection ftp.quit() @@ -82,13 +87,17 @@ def _connect_to_ftp() -> FTP_TLS: ftp_host = os.environ["FTP_HOST"] ftp_port = os.environ["FTP_PORT"] # FTPS usually uses port 21 ftp_user = os.environ["FTP_USER_NAME"] - ftp_password = os.environ["FTP_USER_PASS"] + ftp_password = os.environ["FTP_USER_PASSWORD"] # Connect to the server and login - ftp = ImplicitFtpTls() - ftp.connect(ftp_host, int(ftp_port)) - ftp.login(ftp_user, ftp_password) - ftp.prot_p() + try: + ftp = ImplicitFtpTls() + ftp.connect(ftp_host, int(ftp_port)) + ftp.login(ftp_user, ftp_password) + ftp.prot_p() + except ftplib.all_errors as ftp_error: + error_msg = "Failed to connect to FTPS server: '%s'" + raise ConnectionError(error_msg, ftp_error) from ftp_error return ftp diff --git a/pixl_core/tests/conftest.py b/pixl_core/tests/conftest.py index 76465c3c7..5d96492c4 100644 --- a/pixl_core/tests/conftest.py +++ b/pixl_core/tests/conftest.py @@ -31,10 +31,13 @@ os.environ["RABBITMQ_PORT"] = "25672" os.environ["FTP_HOST"] = "localhost" os.environ["FTP_USER_NAME"] = "pixl" -os.environ["FTP_USER_PASS"] = "longpassword" # noqa: S105 Hardcoding password +os.environ["FTP_USER_PASSWORD"] = "longpassword" # noqa: S105 Hardcoding password os.environ["FTP_PORT"] = "20021" TEST_DIR = Path(__file__).parent +MOUNTED_DATA_DIR = ( + Path(__file__).parents[2] / "test" / "dummy-services" / "ftp-server" / "mounts" / "data" +) STUDY_DATE = datetime.date.fromisoformat("2023-01-01") @@ -71,10 +74,8 @@ def mounted_data() -> Path: The mounted data directory for the ftp server. This will contain the data after successful upload. """ - yield TEST_DIR / "ftp-server" / "mounts" / "data" - sub_dirs = [ - f.path for f in os.scandir(TEST_DIR / "ftp-server" / "mounts" / "data") if f.is_dir() - ] + yield MOUNTED_DATA_DIR + sub_dirs = [f.path for f in os.scandir(MOUNTED_DATA_DIR) if f.is_dir()] # Tear down the directory after tests for sub_dir in sub_dirs: shutil.rmtree(sub_dir, ignore_errors=True) diff --git a/pixl_core/tests/docker-compose.yml b/pixl_core/tests/docker-compose.yml index 0e577a5bd..f14217904 100644 --- a/pixl_core/tests/docker-compose.yml +++ b/pixl_core/tests/docker-compose.yml @@ -32,16 +32,14 @@ services: retries: 5 ftp-server: container_name: test-ftp-server - build: - context: ftp-server + build: + context: ../../test/dummy-services/ftp-server ports: - "20021:21" - "21000-21010:21000-21010" volumes: # Mount for uploaded data - - "./ftp-server/mounts/data/:/home/pixl/" - # Mount SSL keys for TLS - - "./ftp-server/mounts/ssl/:/etc/ssl/private/" + - "../../test/dummy-services/ftp-server/mounts/data/:/home/pixl/" environment: ADDRESS: "localhost" USERS: pixl|longpassword|/home/pixl diff --git a/pixl_ehr/README.md b/pixl_ehr/README.md index 5d95d9b48..d149fac94 100644 --- a/pixl_ehr/README.md +++ b/pixl_ehr/README.md @@ -44,8 +44,8 @@ and the processing tests with ``` To test the availability of a CogStack instance, we mock up a *FastAPI* server which simply takes in -some input text and returns the text with a fixed suffix. The configuration of this mock instance is -defined in [`test/dummy-services`](/test/dummy-services/). +some input text and returns the text with a fixed suffix. The mocking is handled by a *pytest* fixture in +`test_processing.py()` (`_mock_requests`). ## Usage diff --git a/test/.env.test b/test/.env.test index c7b265ad5..6ac095d2c 100644 --- a/test/.env.test +++ b/test/.env.test @@ -29,6 +29,7 @@ PIXL_EHR_API_PORT=7006 PIXL_PACS_API_PORT=7007 RABBITMQ_PORT=7008 RABBITMQ_ADMIN_PORT=7009 +FTP_PORT=21 # PIXL Orthanc raw instance ORTHANC_RAW_USERNAME=orthanc_raw_username @@ -43,7 +44,7 @@ ORTHANC_ANON_PASSWORD=orthanc_anon_password ORTHANC_ANON_AE_TITLE=ORTHANCANON ORTHANC_ANON_HTTP_TIMEOUT=60 ENABLE_DICOM_WEB=true -ORTHANC_AUTOROUTE_ANON_TO_AZURE=false +ORTHANC_AUTOROUTE_ANON_TO_ENDPOINT=true PIXL_DICOM_TRANSFER_TIMEOUT=240 STUDY_TIME_OFFSET=0 @@ -59,3 +60,8 @@ PIXL_EHR_COGSTACK_REDACT_URL=http://cogstack-api:8000/redact # RabbitMQ RABBITMQ_USERNAME=rabbitmq_username RABBITMQ_PASSWORD=rabbitmq_password + +# FTP server +FTP_HOST=ftp-server +FTP_USER_NAME=ftp_username +FTP_USER_PASSWORD=longpassword diff --git a/test/README.md b/test/README.md index 57a359b17..f524ed1d7 100644 --- a/test/README.md +++ b/test/README.md @@ -1,15 +1,16 @@ # PIXL System Tests -This directory contains a system/integration test that runs locally and aims to -test the essential functionality of the full PIXL system. +This directory contains a system/integration test that runs locally and aims to test the essential +functionality of the full PIXL system. -**Given** a DICOM image in an Orthanc instance (mocked vendor -neutral archive, VNA) and a single patient with the same identifier in a -postgres instance (mocked EMAP database, star schema). -**When** a message containing the patient and study identifier is added to the -queue and the consumers started. -**Then** a row in the "anon" EMAP data instance of the PIXL postgres instance exists -and the DICOM study exists in the "anon" PIXL Orthanc instance. +**Given** a DICOM image in an Orthanc instance (mocked vendor neutral archive, VNA) and a single +patient with the same identifier in a postgres instance (mocked EMAP database, star schema). + +**When** a message containing the patient and study identifier is added to the queue and the +consumers started. + +**Then** a row in the "anon" EMAP data instance of the PIXL postgres instance exists and the DICOM +study exists in the "anon" PIXL Orthanc instance. You can run the system test with: @@ -36,11 +37,36 @@ A test `pixl_config.yml` file is provided to run the PIXL pipeline. `./dummy-services` contains a Python script and Dockerfile to mock the CogStack service, used for de-identification of the radiology reports in the EHR API. +#### FTP server + +We spin up a test FTP server from the Docker container defined in `./dummy-services/ftp-server/`. +The Docker container inherits from +[`delfer/alpine-ftp-server`](https://github.com/delfer/docker-alpine-ftp-server) and uses `vsftpd` +as the FTP client. The Docker container requires the following environment variables to be defined: + +- `ADDRESS`: external address to which clients can connect for passive ports +- `USERS`: space and `|` separated list of usernames, passwords, home directories and groups: + - format `name1|password1|[folder1][|uid1][|gid1] name2|password2|[folder2][|uid2][|gid2]` + - the values in `[]` are optional +- `TLS_KEY`: keyfile for the TLS certificate +- `TLS_CERT`: TLS certificate + +> [!warning] The `ADDRESS` should match the `FTP_HOST` environment variable defined in `.env.test`, +> otherwise FTP commands such as `STOR` or `dir` run from other Docker containers in the network +> (such as `orthanc-anon`) will fail. _Note: connecting and logging into the FTP server might still +> work, as the address name is only checked for protected operations such as listing and transfering +> files._ + +**Volume**: to check succesful uploads, we mount a local data directory to `/home/${FTP_USERNAME}/` + +**SSL certifcates**: the SSL certificate files are defined in `test/dummy-services/ftp-server/ssl` +and are copied into `/etc/ssl/private` when building the Docker container. + ### Resources -- `./resources/` provides 2 mock DICOM images used to populate the mock VNA. -- `./resources/omop` contains mock public and private Parquet files used to populate the message - queues and extract the radiology reports +- `./resources/` provides 2 mock DICOM images used to populate the mock VNA. +- `./resources/omop` contains mock public and private Parquet files used to populate the message + queues and extract the radiology reports ### VNA config diff --git a/test/docker-compose.yml b/test/docker-compose.yml index 50cc47f24..b8ff98416 100644 --- a/test/docker-compose.yml +++ b/test/docker-compose.yml @@ -14,66 +14,86 @@ version: "3.8" volumes: - vna-qr-data: + vna-qr-data: networks: - pixl-net: + pixl-net: services: + vna-qr: + image: osimis/orthanc:22.9.0-full-stable + environment: + ORTHANC_NAME: ${VNAQR_AE_TITLE} + ORTHANC_USERNAME: "orthanc" + ORTHANC_PASSWORD: "orthanc" + ORTHANC_AE_TITLE: "VNAQR" + RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} + RAW_DICOM_PORT: "4242" + RAW_IP_ADDR: "orthanc-raw" # aka. hostname + ports: + - "4243:4242" + - "8043:8042" + volumes: + - ${PWD}/vna_config/:/run/secrets:ro + healthcheck: + test: ["CMD", "curl", "-f", "-u", "orthanc:orthanc", "http://vna-qr:8042/patients"] + interval: 10s + timeout: 30s + retries: 5 + networks: + pixl-net: - vna-qr: - image: osimis/orthanc:22.9.0-full-stable - environment: - ORTHANC_NAME: ${VNAQR_AE_TITLE} - ORTHANC_USERNAME: "orthanc" - ORTHANC_PASSWORD: "orthanc" - ORTHANC_AE_TITLE: "VNAQR" - RAW_AE_TITLE: ${ORTHANC_RAW_AE_TITLE} - RAW_DICOM_PORT: "4242" - RAW_IP_ADDR: "orthanc-raw" # aka. hostname - ports: - - "4243:4242" - - "8043:8042" - volumes: - - ${PWD}/vna_config/:/run/secrets:ro - healthcheck: - test: [ "CMD", "curl", "-f", "-u" , "orthanc:orthanc", "http://vna-qr:8042/patients" ] - interval: 10s - timeout: 30s - retries: 5 - networks: - pixl-net: - - star: - container_name: system-test-fake-star-db - build: - context: .. - dockerfile: test/Dockerfile.fake_emap_star - args: - N_TABLE_ROWS: 2 - environment: - POSTGRES_USER: ${EMAP_UDS_USER} - POSTGRES_PASSWORD: ${EMAP_UDS_PASSWORD} - healthcheck: - test: ["CMD", "pg_isready", "-U", "postgres"] - interval: 10s - timeout: 30s - retries: 5 - ports: - - "5432:5432" - networks: - pixl-net: - - cogstack-api: - container_name: system-test-cogstack - build: - context: ./dummy-services/ - healthcheck: - test: [ "CMD", "curl", "-f", "http://localhost:8000/heart-beat" ] - interval: 10s - timeout: 30s - retries: 5 - networks: - pixl-net: + star: + container_name: system-test-fake-star-db + build: + context: .. + dockerfile: test/Dockerfile.fake_emap_star + args: + N_TABLE_ROWS: 2 + environment: + POSTGRES_USER: ${EMAP_UDS_USER} + POSTGRES_PASSWORD: ${EMAP_UDS_PASSWORD} + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 10s + timeout: 30s + retries: 5 + ports: + - "5432:5432" + networks: + pixl-net: + cogstack-api: + container_name: system-test-cogstack + build: + context: ./dummy-services/cogstack/ + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/heart-beat"] + interval: 10s + timeout: 30s + retries: 5 + networks: + pixl-net: + ftp-server: + container_name: system-test-ftp-server + build: + context: ./dummy-services/ftp-server + ports: + - "127.0.0.1:20021:21" + - "21000-21010:21000-21010" + volumes: + # Mount for uploaded data + - "./dummy-services/ftp-server/mounts/data/:/home/${FTP_USER_NAME}/" + environment: + ADDRESS: ${FTP_HOST} + USERS: ${FTP_USER_NAME}|${FTP_USER_PASSWORD}|/home/${FTP_USER_NAME} + TLS_KEY: /etc/ssl/private/localhost.key + TLS_CERT: /etc/ssl/private/localhost.crt + healthcheck: + test: ping localhost:21 -w 2 + interval: 10s + timeout: 5s + retries: 5 + networks: + pixl-net: diff --git a/test/dummy-services/Dockerfile b/test/dummy-services/cogstack/Dockerfile similarity index 100% rename from test/dummy-services/Dockerfile rename to test/dummy-services/cogstack/Dockerfile diff --git a/test/dummy-services/cogstack.py b/test/dummy-services/cogstack/cogstack.py similarity index 100% rename from test/dummy-services/cogstack.py rename to test/dummy-services/cogstack/cogstack.py diff --git a/test/dummy-services/pyproject.toml b/test/dummy-services/cogstack/pyproject.toml similarity index 100% rename from test/dummy-services/pyproject.toml rename to test/dummy-services/cogstack/pyproject.toml diff --git a/pixl_core/tests/ftp-server/Dockerfile b/test/dummy-services/ftp-server/Dockerfile similarity index 81% rename from pixl_core/tests/ftp-server/Dockerfile rename to test/dummy-services/ftp-server/Dockerfile index deea7891f..dcb3c0617 100644 --- a/pixl_core/tests/ftp-server/Dockerfile +++ b/test/dummy-services/ftp-server/Dockerfile @@ -13,10 +13,16 @@ # limitations under the License. FROM delfer/alpine-ftp-server +COPY ssl/ /etc/ssl/private/ + SHELL ["/bin/sh", "-c"] RUN echo $'# Attempting to enable FTPS.\n\ ssl_enable=YES\n\ ssl_sslv2=YES\n\ ssl_sslv3=YES\n\ implicit_ssl=YES\n\ -require_ssl_reuse=NO' >> /etc/vsftpd/vsftpd.conf \ No newline at end of file +require_ssl_reuse=NO\n\ +xferlog_std_format=YES\n\ +log_ftp_protocol=YES\n\ +vsftpd_log_file=/var/log/vsftpd.log\n\ +syslog_enable=NO' >> /etc/vsftpd/vsftpd.conf diff --git a/pixl_core/tests/ftp-server/mounts/data/.gitkeep b/test/dummy-services/ftp-server/mounts/data/.gitkeep similarity index 100% rename from pixl_core/tests/ftp-server/mounts/data/.gitkeep rename to test/dummy-services/ftp-server/mounts/data/.gitkeep diff --git a/pixl_core/tests/ftp-server/mounts/ssl/localhost.crt b/test/dummy-services/ftp-server/ssl/localhost.crt similarity index 100% rename from pixl_core/tests/ftp-server/mounts/ssl/localhost.crt rename to test/dummy-services/ftp-server/ssl/localhost.crt diff --git a/pixl_core/tests/ftp-server/mounts/ssl/localhost.key b/test/dummy-services/ftp-server/ssl/localhost.key similarity index 100% rename from pixl_core/tests/ftp-server/mounts/ssl/localhost.key rename to test/dummy-services/ftp-server/ssl/localhost.key diff --git a/test/run-system-test.sh b/test/run-system-test.sh index 7a65e963f..bff6eb14c 100755 --- a/test/run-system-test.sh +++ b/test/run-system-test.sh @@ -30,19 +30,21 @@ docker compose --env-file .env.test -p system-test up --wait -d --build --remove pip install -e "${PACKAGE_DIR}/pixl_core" && pip install -e "${PACKAGE_DIR}/cli" pixl populate "${PACKAGE_DIR}/test/resources/omop" pixl start -sleep 65 # need to wait until the DICOM image is "stable" = 60s -./scripts/check_entry_in_pixl_anon.sh -./scripts/check_entry_in_orthanc_anon.py + ./scripts/check_max_storage_in_orthanc_raw.sh +# need to wait until the DICOM image is "stable" so poll for 2 minutes to check +./scripts/check_entry_in_orthanc_anon_for_2_min.py +./scripts/check_entry_in_pixl_anon.sh +# test export and upload pixl extract-radiology-reports "${PACKAGE_DIR}/test/resources/omop" - ./scripts/check_radiology_parquet.py \ ../exports/test-extract-uclh-omop-cdm/latest/radiology/radiology.parquet +./scripts/check_ftps_upload.py + ls -laR ../exports/ docker exec system-test-ehr-api-1 rm -r /run/exports/test-extract-uclh-omop-cdm/ cd "${PACKAGE_DIR}" docker compose -f docker-compose.yml -f test/docker-compose.yml -p system-test down --volumes - diff --git a/test/scripts/check_entry_in_orthanc_anon.py b/test/scripts/check_entry_in_orthanc_anon_for_2_min.py similarity index 55% rename from test/scripts/check_entry_in_orthanc_anon.py rename to test/scripts/check_entry_in_orthanc_anon_for_2_min.py index 13d43243a..b753acd19 100755 --- a/test/scripts/check_entry_in_orthanc_anon.py +++ b/test/scripts/check_entry_in_orthanc_anon_for_2_min.py @@ -17,15 +17,26 @@ """ After pixl has run this script will query the orthanc-anon REST API to check that the correct number of instances have been received. + +Because we have to wait for a stable study, poll for 2 minutes """ import json import shlex import subprocess +from time import sleep + +SECONDS_WAIT = 5 +# Check for two minutes that the instances have been received in orthanc anon +instances = [] +for seconds in range(0, 121, SECONDS_WAIT): + instances_cmd = shlex.split('docker exec system-test-orthanc-anon-1 curl -u "orthanc_anon_username:orthanc_anon_password" http://orthanc-anon:8042/instances') + instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) + instances = json.loads(instances_output.stdout) + print(f"Waited for {seconds} seconds, orthanc-anon instances: {instances}") + if len(instances) == 2: + break + sleep(SECONDS_WAIT) -instances_cmd = shlex.split('docker exec system-test-orthanc-anon-1 curl -u "orthanc_anon_username:orthanc_anon_password" http://orthanc-anon:8042/instances') -instances_output = subprocess.run(instances_cmd, capture_output=True, check=True, text=True) -instances = json.loads(instances_output.stdout) -print("orthanc-anon instances:", instances) assert len(instances) == 2 diff --git a/test/scripts/check_ftps_upload.py b/test/scripts/check_ftps_upload.py new file mode 100755 index 000000000..e81a5182f --- /dev/null +++ b/test/scripts/check_ftps_upload.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright (c) University College London Hospitals NHS Foundation Trust +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import subprocess +from pathlib import Path +from shutil import rmtree +from time import sleep + +PARQUET_PATH = Path(__file__).parents[1] / "resources" / "omop" +print(f"parquet path: {PARQUET_PATH}") +MOUNTED_DATA_DIR = Path(__file__).parents[1] / "dummy-services" / "ftp-server" / "mounts" / "data" +print(f"mounted data dir: {MOUNTED_DATA_DIR}") + +project_name = "test-extract-uclh-omop-cdm" +print(f"project name: {project_name}") +expected_output_dir = MOUNTED_DATA_DIR / project_name +print(f"expected output dir: {expected_output_dir}") + +SECONDS_WAIT = 5 + +glob_list = [] +for seconds in range(0, 121, SECONDS_WAIT): + # Test whether DICOM images have been uploaded + glob_list = list(expected_output_dir.glob("*.zip")) + print(f"Waited for {seconds} seconds. glob_list: {glob_list}") + if len(glob_list) == 2: + break + sleep(SECONDS_WAIT) + +# Check for expected number of uploaded files and clean up, even if the assertion fails +try: + # We expect 2 DICOM image studies to be uploaded + assert len(glob_list) == 2 + # TODO: check parquet files upload before deleting +finally: + # To we want to always remove the files if its failed, may help debugging not to? + rmtree(expected_output_dir, ignore_errors=True) +