diff --git a/.env.sample b/.env.sample index d24aa3de8..276a0ed0f 100644 --- a/.env.sample +++ b/.env.sample @@ -1,11 +1,14 @@ ENV=dev DEBUG=True +PIXL_DICOM_TRANSFER_TIMEOUT=120 +PIXL_QUERY_TIMEOUT=120 # PIXL PostgreSQL instance PIXL_DB_HOST=postgres PIXL_DB_NAME=pixl PIXL_DB_USER=pixl PIXL_DB_PASSWORD= +SKIP_ALEMBIC=false # EMAP UDS EMAP_UDS_HOST= @@ -26,6 +29,15 @@ PIXL_EHR_API_PORT= RABBITMQ_PORT= RABBITMQ_ADMIN_PORT= PIXL_IMAGING_API_PORT= +FTP_PORT= + +# PIXL EHR API +PIXL_EHR_API_HOST=localhost +PIXL_EHR_API_RATE=1 + +# PIXL Imaging API +PIXL_IMAGING_API_HOST=localhost +PIXL_IMAGING_API_RATE=1 # Hasher API HASHER_API_AZ_CLIENT_ID= @@ -77,6 +89,7 @@ PIXL_EHR_API_AZ_STORAGE_CONTAINER_NAME= PIXL_EHR_COGSTACK_REDACT_URL= # RABBIT MQ queue. UI available at localhost:$RABBITMQ_ADMIN_PORT +RABBITMQ_HOST=localhost RABBITMQ_USERNAME= RABBITMQ_PASSWORD= @@ -88,4 +101,3 @@ PIXL_QUERY_TIMEOUT=10 FTP_HOST= FTP_USER_NAME= FTP_USER_PASSWORD= -FTP_PORT= diff --git a/README.md b/README.md index a9938270c..74bacf14c 100644 --- a/README.md +++ b/README.md @@ -146,10 +146,10 @@ This is one of `dev|test|staging|prod` and referred to as `` in the ### 2. Initialise environment configuration -Create a local `.env` and `pixl_config.yml` file in the _PIXL_ directory: +Create a local `.env` file in the _PIXL_ directory: ```bash -cp .env.sample .env && cp pixl_config.yml.sample pixl_config.yml +cp .env.sample .env ``` Add the missing configuration values to the new files: diff --git a/cli/README.md b/cli/README.md index 3e6d5b57a..deb22fe55 100644 --- a/cli/README.md +++ b/cli/README.md @@ -9,11 +9,6 @@ stopped cleanly. `PIXL CLI` requires Python version 3.10. -The CLI requires a `pixl_config.yml` file in the current working directory. A -[sample file](../pixl_config.yml.sample) is provided in the root of the repository. If you want to -run locally during development, we recommend running `pixl` from the [`./tests/`](./tests/) -directory, which contains a mock `pixl_config.yml` file. - Running the tests requires [docker](https://docs.docker.com/get-docker/) to be installed. ## Installation @@ -40,6 +35,39 @@ See the commands and subcommands with ```bash pixl --help ``` +### Configuration + +The `rabbitmq` and `postgress` services are configured by setting the following environment variables +(default values shown): + +```sh +RABBITMQ_HOST=localhost +RABBITMQ_PORT=7008 +RABBITMQ_USERNAME=rabbitmq_username +RABBITMQ_PASSWORD=rabbitmq_password + +POSTGRES_HOST=localhost +POSTGRES_PORT=7001 +PIXL_DB_USER=pixl_db_username +PIXL_DB_PASSWORD=pixl_db_password +PIXL_DB_NAME=pixl +``` + +The `rabbitmq` queues for the `ehr` and `imaging` APIs are configured by setting: + +```sh +PIXL_EHR_API_HOST=localhost +PIXL_EHR_API_PORT=7006 +PIXL_EHR_API_RATE=1 + +PIXL_IMAGING_API_HOST=localhost +PIXL_IMAGING_API_PORT=7007 +PIXL_IMAGING_API_RATE=1 +``` + +where the `*_RATE` variables set the default querying rate for the message queues. + +### Running the pipeline Populate queue for Imaging and EHR extraction @@ -113,10 +141,9 @@ pip install -e ../pixl_core/ -e .[test] ### Running tests -The CLI tests require a running instance of the `rabbitmq` service, for which we provide a -`docker-compose` [file](./tests/docker-compose.yml). Spinning up the service and running `pytest` -can be done by running +Tests can be run with `pytest` from the `tests` directory. ```bash -./tests/run-tests.sh +cd tests +pytest ``` diff --git a/cli/src/pixl_cli/_config.py b/cli/src/pixl_cli/_config.py index 6f4fdb9ad..456cfa58c 100644 --- a/cli/src/pixl_cli/_config.py +++ b/cli/src/pixl_cli/_config.py @@ -12,21 +12,70 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Configuration of CLI from config file.""" +"""Configuration of CLI from environment variables.""" + from pathlib import Path -import yaml +from decouple import Config, RepositoryEmpty, RepositoryEnv + +env_file = Path.cwd() / ".env" +config = Config(RepositoryEnv(env_file)) if env_file.exists() else Config(RepositoryEmpty()) + +SERVICE_SETTINGS = { + "rabbitmq": { + "host": config("RABBITMQ_HOST"), + "port": int(config("RABBITMQ_PORT")), + "username": config("RABBITMQ_USERNAME"), + "password": config("RABBITMQ_PASSWORD"), + }, + "postgres": { + "host": config("POSTGRES_HOST"), + "port": int(config("POSTGRES_PORT")), + "username": config("PIXL_DB_USER"), + "password": config("PIXL_DB_PASSWORD"), + "database": config("PIXL_DB_NAME"), + }, +} # type: dict + + +class APIConfig: + """API Configuration""" + + def __init__(self, host: str, port: int, default_rate: float = 1) -> None: + """Initialise the APIConfig class""" + self.host = host + self.port = port + self.default_rate = default_rate + + @property + def base_url(self) -> str: + """Return the base url for the API""" + return f"http://{self.host}:{self.port}" + +API_CONFIGS = { + "ehr_api": APIConfig( + host=config("PIXL_EHR_API_HOST"), + port=int(config("PIXL_EHR_API_PORT")), + default_rate=float(config("PIXL_EHR_API_RATE", default=1)), + ), + "imaging_api": APIConfig( + host=config("PIXL_IMAGING_API_HOST"), + port=int(config("PIXL_IMAGING_API_PORT")), + default_rate=float(config("PIXL_IMAGING_API_RATE", default=1)), + ), +} -def _load_config(filename: str = "pixl_config.yml") -> dict: - """CLI configuration generated from a .yaml file""" - if not Path(filename).exists(): - msg = f"Failed to find {filename}. It must be present in the current working directory" - raise FileNotFoundError(msg) - with Path(filename).open() as config_file: - config_dict = yaml.safe_load(config_file) - return dict(config_dict) +def api_config_for_queue(queue_name: str) -> APIConfig: + """Configuration for an API associated with a queue""" + api_name = f"{queue_name}_api" + if api_name not in API_CONFIGS: + msg = ( + f"Cannot update the rate for {queue_name}. {api_name} was" + f" not specified in the configuration" + ) + raise ValueError(msg) -cli_config = _load_config() + return API_CONFIGS[api_name] diff --git a/cli/src/pixl_cli/_database.py b/cli/src/pixl_cli/_database.py index b935af77b..2c2a52f45 100644 --- a/cli/src/pixl_cli/_database.py +++ b/cli/src/pixl_cli/_database.py @@ -19,9 +19,9 @@ from sqlalchemy import URL, create_engine from sqlalchemy.orm import Session, sessionmaker -from pixl_cli._config import cli_config +from pixl_cli._config import SERVICE_SETTINGS -connection_config = cli_config["postgres"] +connection_config = SERVICE_SETTINGS["postgres"] url = URL.create( drivername="postgresql+psycopg2", diff --git a/cli/src/pixl_cli/_io.py b/cli/src/pixl_cli/_io.py index b9a368d52..8d928692c 100644 --- a/cli/src/pixl_cli/_io.py +++ b/cli/src/pixl_cli/_io.py @@ -47,19 +47,23 @@ def messages_from_state_file(filepath: Path) -> list[Message]: return [deserialise(line) for line in filepath.open().readlines() if string_is_non_empty(line)] -def config_from_log_file(parquet_path: Path) -> tuple[str, datetime]: - log_file = parquet_path / "extract_summary.json" +def project_info(resources_path: Path) -> tuple[str, datetime]: + """ + Get the project name and extract timestamp from the extract summary log file. + :param resources_path: path to the input resources + """ + log_file = resources_path / "extract_summary.json" logs = json.load(log_file.open()) project_name = logs["settings"]["cdm_source_name"] omop_es_timestamp = datetime.fromisoformat(logs["datetime"]) return project_name, omop_es_timestamp -def copy_parquet_return_logfile_fields(parquet_path: Path) -> tuple[str, datetime]: +def copy_parquet_return_logfile_fields(resources_path: Path) -> tuple[str, datetime]: """Copy public parquet file to extracts directory, and return fields from logfile""" - project_name, omop_es_timestamp = config_from_log_file(parquet_path) + project_name, omop_es_timestamp = project_info(resources_path) extract = ParquetExport(project_name, omop_es_timestamp, HOST_EXPORT_ROOT_DIR) - project_name_slug = extract.copy_to_exports(parquet_path) + project_name_slug = extract.copy_to_exports(resources_path) return project_name_slug, omop_es_timestamp diff --git a/cli/src/pixl_cli/main.py b/cli/src/pixl_cli/main.py index 78fcfbc1d..74e75cc3b 100644 --- a/cli/src/pixl_cli/main.py +++ b/cli/src/pixl_cli/main.py @@ -25,13 +25,13 @@ from core.patient_queue.producer import PixlProducer from core.patient_queue.subscriber import PixlBlockingConsumer -from pixl_cli._config import cli_config +from pixl_cli._config import SERVICE_SETTINGS, api_config_for_queue from pixl_cli._database import filter_exported_or_add_to_db from pixl_cli._io import ( - config_from_log_file, copy_parquet_return_logfile_fields, messages_from_parquet, messages_from_state_file, + project_info, ) from pixl_cli._logging import logger, set_log_level from pixl_cli._utils import clear_file, remove_file_if_it_exists @@ -98,7 +98,7 @@ def populate(parquet_dir: Path, *, restart: bool, queues: str) -> None: sorted_messages = filter_exported_or_add_to_db( sorted_messages, messages[0].project_name ) - with PixlProducer(queue_name=queue, **cli_config["rabbitmq"]) as producer: + with PixlProducer(queue_name=queue, **SERVICE_SETTINGS["rabbitmq"]) as producer: producer.publish(sorted_messages) @@ -113,7 +113,7 @@ def extract_radiology_reports(parquet_dir: Path) -> None: PARQUET_DIR: Directory containing the extract_summary.json log file defining which extract to export radiology reports for. """ - project_name, omop_es_datetime = config_from_log_file(parquet_dir) + project_name, omop_es_datetime = project_info(parquet_dir) # Call the EHR API api_config = api_config_for_queue("ehr") @@ -143,10 +143,9 @@ def extract_radiology_reports(parquet_dir: Path) -> None: "--rate", type=float, default=None, - help="Rate at which to process items from a queue (in items per second)." - "If None then will use the default rate defined in the config file", + help="Rate at which to process items from a queue (in items per second).", ) -def start(queues: str, rate: Optional[int]) -> None: +def start(queues: str, rate: Optional[float]) -> None: """Start consumers for a set of queues""" if rate == 0: msg = "Cannot start extract with a rate of 0. Must be >0" @@ -186,10 +185,7 @@ def _update_extract_rate(queue_name: str, rate: Optional[float]) -> None: if rate is None: if api_config.default_rate is None: - msg = ( - "Cannot update the rate for %s. No default rate was specified.", - queue_name, - ) + msg = f"Cannot update the rate for {queue_name}. No valid rate was specified." raise ValueError(msg) rate = float(api_config.default_rate) logger.info(f"Using the default extract rate of {rate}/second") @@ -299,7 +295,7 @@ def consume_all_messages_and_save_csv_file(queue_name: str, timeout_in_seconds: f"{timeout_in_seconds} seconds" ) - with PixlBlockingConsumer(queue_name=queue_name, **cli_config["rabbitmq"]) as consumer: + with PixlBlockingConsumer(queue_name=queue_name, **SERVICE_SETTINGS["rabbitmq"]) as consumer: state_filepath = state_filepath_for_queue(queue_name) if consumer.message_count > 0: logger.info("Found messages in the queue. Clearing the state file") @@ -325,51 +321,3 @@ def inform_user_that_queue_will_be_populated_from(path: Path) -> None: # noqa: f"state files should be ignored, or delete this file to ignore. Press " f"Ctrl-C to exit and any key to continue" ) - - -class APIConfig: - """ - Class to represent the configuration for an API - - Attributes - ---------- - host : str - Hostname for the API - port : int - Port for the API - default_rate : int - Default rate for the API - - Methods - ------- - base_url() - Return the base url for the API - - """ - - def __init__(self, kwargs: dict) -> None: - """Initialise the APIConfig class""" - self.host: Optional[str] = None - self.port: Optional[int] = None - self.default_rate: Optional[int] = None - - self.__dict__.update(kwargs) - - @property - def base_url(self) -> str: - """Return the base url for the API""" - return f"http://{self.host}:{self.port}" - - -def api_config_for_queue(queue_name: str) -> APIConfig: - """Configuration for an API associated with a queue""" - config_key = f"{queue_name}_api" - - if config_key not in cli_config: - msg = ( - f"Cannot update the rate for {queue_name}. {config_key} was" - f" not specified in the configuration" - ) - raise ValueError(msg) - - return APIConfig(cli_config[config_key]) diff --git a/cli/tests/conftest.py b/cli/tests/conftest.py index f8d0f69d8..c97401690 100644 --- a/cli/tests/conftest.py +++ b/cli/tests/conftest.py @@ -14,6 +14,7 @@ """CLI testing fixtures.""" from __future__ import annotations +import os import pathlib import pytest @@ -21,6 +22,26 @@ from sqlalchemy import Engine, create_engine from sqlalchemy.orm import Session, sessionmaker +# Set the necessary environment variables +os.environ["PIXL_EHR_API_HOST"] = "localhost" +os.environ["PIXL_EHR_API_RATE"] = "1" +os.environ["PIXL_EHR_API_PORT"] = "7006" + +os.environ["PIXL_IMAGING_API_HOST"] = "localhost" +os.environ["PIXL_IMAGING_API_RATE"] = "1" +os.environ["PIXL_IMAGING_API_PORT"] = "7007" + +os.environ["RABBITMQ_HOST"] = "localhost" +os.environ["RABBITMQ_USERNAME"] = "rabbitmq_username" +os.environ["RABBITMQ_PASSWORD"] = "rabbitmq_password" # noqa: S105 +os.environ["RABBITMQ_PORT"] = "7008" + +os.environ["PIXL_DB_USER"] = "pixl_db_username" +os.environ["PIXL_DB_PASSWORD"] = "pixl_db_password" # noqa: S105 +os.environ["POSTGRES_HOST"] = "locahost" +os.environ["POSTGRES_PORT"] = "7001" +os.environ["PIXL_DB_NAME"] = "pixl" + @pytest.fixture(autouse=True) def export_dir(tmp_path_factory: pytest.TempPathFactory) -> pathlib.Path: diff --git a/cli/tests/pixl_config.yml b/cli/tests/pixl_config.yml deleted file mode 100644 index d93eadd1e..000000000 --- a/cli/tests/pixl_config.yml +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -rabbitmq: - host: localhost - port: 5672 - username: guest - password: guest -ehr_api: - host: localhost - port: 9000 - default_rate: 5 # ~queries per second -imaging_api: - default_rate: 5 # queries per second -postgres: - host: host - port: 5432 - username: username - password: password - database: database diff --git a/pixl_config.yml.sample b/pixl_config.yml.sample deleted file mode 100644 index 7d3e1febd..000000000 --- a/pixl_config.yml.sample +++ /dev/null @@ -1,19 +0,0 @@ -rabbitmq: - host: - port: - username: - password: -ehr_api: - host: - port: - default_rate: 5 # ~queries per second -imaging_api: - host: - port: - default_rate: 0.1 # queries per second -postgres: - host: - port: - username: - password: - database: diff --git a/test/.env b/test/.env index b77a670fd..6edcf534c 100644 --- a/test/.env +++ b/test/.env @@ -32,6 +32,15 @@ RABBITMQ_PORT=7008 RABBITMQ_ADMIN_PORT=7009 FTP_PORT=20021 +# PIXL EHR API +POSTGRES_HOST=localhost +PIXL_EHR_API_HOST=localhost +PIXL_EHR_API_RATE=1 + +# PIXL Imaging API +PIXL_IMAGING_API_HOST=localhost +PIXL_IMAGING_API_RATE=1 + # PIXL Orthanc raw instance ORTHANC_RAW_USERNAME=orthanc_raw_username ORTHANC_RAW_PASSWORD=orthanc_raw_password @@ -59,6 +68,7 @@ VNAQR_MODALITY=UCVNAQR PIXL_EHR_COGSTACK_REDACT_URL=http://cogstack-api:8000/redact # RabbitMQ +RABBITMQ_HOST=localhost RABBITMQ_USERNAME=rabbitmq_username RABBITMQ_PASSWORD=rabbitmq_password diff --git a/test/README.md b/test/README.md index 0ee56ab9e..0e3ad87e7 100644 --- a/test/README.md +++ b/test/README.md @@ -36,10 +36,6 @@ Run the following to teardown: ## File organisation -### PIXL configuration - -A test `pixl_config.yml` file is provided to run the PIXL pipeline. - ### Docker compose `./docker-compose.yml` contains the docker compose configuration for the system test. diff --git a/test/pixl_config.yml b/test/pixl_config.yml deleted file mode 100644 index d185ccac0..000000000 --- a/test/pixl_config.yml +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright (c) University College London Hospitals NHS Foundation Trust -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -rabbitmq: - host: localhost - port: 7008 - username: rabbitmq_username - password: rabbitmq_password -ehr_api: - host: localhost - port: 7006 - default_rate: 1 -imaging_api: - host: localhost - port: 7007 - default_rate: 1 -postgres: - host: localhost - port: 7001 - username: pixl_db_username - password: pixl_db_password - database: pixl