diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..fe3c1531 --- /dev/null +++ b/codecov.yml @@ -0,0 +1,4 @@ +coverage: + ignore: + - nipoppy/ # legacy code + - nipoppy_cli/docs/ # documentation diff --git a/nipoppy_cli/docs/source/conf.py b/nipoppy_cli/docs/source/conf.py index 962e6955..0206399d 100644 --- a/nipoppy_cli/docs/source/conf.py +++ b/nipoppy_cli/docs/source/conf.py @@ -4,6 +4,9 @@ https://www.sphinx-doc.org/en/master/usage/configuration.html """ +# for substitutions +from nipoppy.layout import DEFAULT_LAYOUT_INFO + # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information @@ -56,7 +59,12 @@ # -- MyST configuration ------------------------------------------------------- -myst_enable_extensions = ["fieldlist"] +myst_enable_extensions = ["fieldlist", "substitution"] + +myst_substitutions = { + "fpath_manifest": f"`{DEFAULT_LAYOUT_INFO.fpath_manifest}`", + "fpath_config": f"`{DEFAULT_LAYOUT_INFO.fpath_config}`", +} # -- Autodoc/AutoAPI configuration ---------------------------------------------------- diff --git a/nipoppy_cli/docs/source/quickstart.md b/nipoppy_cli/docs/source/quickstart.md index 56e5b1b3..eda517b2 100644 --- a/nipoppy_cli/docs/source/quickstart.md +++ b/nipoppy_cli/docs/source/quickstart.md @@ -35,7 +35,7 @@ The [`nipoppy init`](#cli_reference/init) command copies examples of these files (customizing-config)= ### Customizing the configuration file -The configuration file at `/proc/global_configs.json` contains general information about a dataset (e.g. name, visit and session names) and configurations for running processing pipelines (e.g., pipeline version and runtime parameters). +The configuration file at {{fpath_config}} contains general information about a dataset (e.g. name, visit and session names) and configurations for running processing pipelines (e.g., pipeline version and runtime parameters). The example config file contains configurations for all BIDS conversion and image processing software that are supported out-of-the-box by Nipoppy. You should replace the placeholder strings/substrings (e.g. ``) by more appropriate values for your dataset. See dropdown below (lines that should be changed are highlighted). @@ -45,7 +45,7 @@ You can also delete (or not) any configuration for a software/version that you d --- class: dropdown --- -Here is the default content of `/proc/global_configs.json`: +Here is the default content of {{fpath_config}}: ```{literalinclude} ../../nipoppy/data/examples/sample_global_configs.json --- linenos: True @@ -63,7 +63,7 @@ See the {ref}`schema reference ` for more information about each ### Generating the manifest file -The manifest file at `/tabular/manifest.csv` contains *ground truth* information about the participants and visits/sessions available for a dataset. +The manifest file at {{fpath_manifest}} contains *ground truth* information about the participants and visits/sessions available for a dataset. There must be only **one row** per unique participant/visit combination. diff --git a/nipoppy_cli/nipoppy/cli/parser.py b/nipoppy_cli/nipoppy/cli/parser.py index 2a8f4912..36c65ff6 100644 --- a/nipoppy_cli/nipoppy/cli/parser.py +++ b/nipoppy_cli/nipoppy/cli/parser.py @@ -4,6 +4,7 @@ from argparse import ArgumentParser, HelpFormatter, _ActionsContainer, _SubParsersAction from pathlib import Path +from nipoppy.layout import DEFAULT_LAYOUT_INFO from nipoppy.utils import ( BIDS_SESSION_PREFIX, BIDS_SUBJECT_PREFIX, @@ -72,13 +73,13 @@ def add_args_pipeline(parser: _ActionsContainer) -> _ActionsContainer: "--pipeline", type=str, required=True, - help="Pipeline name.", + help="Pipeline name, as written in the config file.", ) parser.add_argument( "--pipeline-version", type=str, required=False, - help="Pipeline version.", + help="Pipeline version, as written in the config file.", ) return parser @@ -178,7 +179,7 @@ def add_subparser_doughnut( action="store_true", help=( "Set all statuses to False in newly added records" - " (regardless of what is on disk)." + " (regardless of what is on disk). May be useful to reduce runtime." ), ) parser.add_argument( @@ -197,7 +198,11 @@ def add_subparser_dicom_reorg( formatter_class: type[HelpFormatter] = HelpFormatter, ) -> ArgumentParser: """Add subparser for reorg command.""" - description = "(Re)organize raw DICOM files." # TODO give paths in layout model + description = ( + "(Re)organize raw (DICOM) files, from the raw DICOM directory " + f"({DEFAULT_LAYOUT_INFO.dpath_raw_dicom}) to the organized " + f"sourcedata directory ({DEFAULT_LAYOUT_INFO.dpath_sourcedata})." + ) parser = subparsers.add_parser( COMMAND_DICOM_REORG, description=description, @@ -209,7 +214,16 @@ def add_subparser_dicom_reorg( parser.add_argument( "--copy-files", action="store_true", - help=("Copy files when reorganizing (default: create symlinks)."), + help="Copy files when reorganizing (default: create symlinks).", + ) + parser.add_argument( + "--check-dicoms", + action="store_true", + help=( + "Read DICOM file headers when reorganizing and check if they have the " + '"DERIVED" image type (which can be problematic for some BIDS ' + "converters). The paths to the derived DICOMs will be written to the log." + ), ) return parser @@ -232,7 +246,7 @@ def add_subparser_bids_conversion( "--pipeline-step", type=str, required=False, - help="Pipeline step.", + help="Pipeline step, as written in the config file.", ) parser = add_args_participant_and_session(parser) parser = add_arg_simulate(parser) diff --git a/nipoppy_cli/nipoppy/config/main.py b/nipoppy_cli/nipoppy/config/main.py index 007667b0..646e4940 100644 --- a/nipoppy_cli/nipoppy/config/main.py +++ b/nipoppy_cli/nipoppy/config/main.py @@ -10,7 +10,15 @@ from nipoppy.config.container import ModelWithContainerConfig from nipoppy.config.pipeline import PipelineConfig -from nipoppy.utils import StrOrPathLike, check_session, load_json +from nipoppy.layout import DEFAULT_LAYOUT_INFO +from nipoppy.tabular.dicom_dir_map import DicomDirMap +from nipoppy.utils import ( + BIDS_SESSION_PREFIX, + StrOrPathLike, + check_session, + check_session_strict, + load_json, +) class Config(ModelWithContainerConfig): @@ -19,12 +27,33 @@ class Config(ModelWithContainerConfig): DATASET_NAME: str = Field(description="Name of the dataset") VISITS: list[str] = Field(description="List of visits available in the study") SESSIONS: Optional[list[str]] = Field( - default=None, + default=None, # will be a list after validation description=( - "List of sessions available in the study" + "List of BIDS-compliant sessions available in the study" + f', prefixed with "{BIDS_SESSION_PREFIX}"' " (inferred from VISITS if not given)" ), ) + DICOM_DIR_MAP_FILE: Optional[Path] = Field( + default=None, + description=( + "Path to a CSV file mapping participant IDs to DICOM directories" + ", to be used in the DICOM reorg step. Note: this field and " + "DICOM_DIR_PARTICIPANT_FIRST cannot both be specified" + f'. The CSV should have three columns: "{DicomDirMap.col_participant_id}"' + f' , "{DicomDirMap.col_session}"' + f', and "{DicomDirMap.col_participant_dicom_dir}"' + ), + ) + DICOM_DIR_PARTICIPANT_FIRST: Optional[bool] = Field( + default=None, + description=( + "Whether subdirectories under the raw dicom directory (default: " + f"{DEFAULT_LAYOUT_INFO.dpath_raw_dicom}) follow the pattern " + "/ (default) or /. Note: " + "this field and and DICOM_DIR_MAP_FILE cannot both be specified" + ), + ) BIDS: dict[str, dict[str, dict[str, PipelineConfig]]] = Field( default={}, description="Configurations for BIDS converters, if any" ) @@ -34,6 +63,27 @@ class Config(ModelWithContainerConfig): model_config = ConfigDict(extra="allow") + def _check_sessions_have_prefix(self) -> Self: + """Check that sessions have the BIDS prefix.""" + for session in self.SESSIONS: + check_session_strict(session) + return self + + def _check_dicom_dir_options(self) -> Self: + """Check that only one DICOM directory mapping option is given.""" + if ( + self.DICOM_DIR_MAP_FILE is not None + and self.DICOM_DIR_PARTICIPANT_FIRST is not None + ): + raise ValueError( + "Cannot specify both DICOM_DIR_MAP_FILE and DICOM_DIR_PARTICIPANT_FIRST" + ) + # otherwise set the default if needed + elif self.DICOM_DIR_PARTICIPANT_FIRST is None: + self.DICOM_DIR_PARTICIPANT_FIRST = True + + return self + def _check_no_duplicate_pipeline(self) -> Self: """Check that BIDS and PROC_PIPELINES do not have common pipelines.""" bids_pipelines = set(self.BIDS.keys()) @@ -43,6 +93,7 @@ def _check_no_duplicate_pipeline(self) -> Self: "Cannot have the same pipeline under BIDS and PROC_PIPELINES" f", got {bids_pipelines} and {proc_pipelines}" ) + return self def _propagate_container_config(self) -> Self: """Propagate the container config to all pipelines.""" @@ -82,6 +133,8 @@ def check_input(cls, data: Any): @model_validator(mode="after") def validate_and_process(self) -> Self: """Validate and process the configuration.""" + self._check_sessions_have_prefix() + self._check_dicom_dir_options() self._check_no_duplicate_pipeline() self._propagate_container_config() return self diff --git a/nipoppy_cli/nipoppy/layout.py b/nipoppy_cli/nipoppy/layout.py index 05e13b23..13d8ce28 100644 --- a/nipoppy_cli/nipoppy/layout.py +++ b/nipoppy_cli/nipoppy/layout.py @@ -309,3 +309,7 @@ def get_dpath_bids_db( pipeline_name, pipeline_version, participant=participant, session=session ) return self.dpath_bids_db / dname + + +# for printing defaults in docs +DEFAULT_LAYOUT_INFO = DatasetLayout(dpath_root="") diff --git a/nipoppy_cli/nipoppy/logger.py b/nipoppy_cli/nipoppy/logger.py index 8285107f..91b93466 100644 --- a/nipoppy_cli/nipoppy/logger.py +++ b/nipoppy_cli/nipoppy/logger.py @@ -4,6 +4,7 @@ from pathlib import Path from typing import Optional +from rich.console import Console from rich.logging import RichHandler from nipoppy.utils import StrOrPathLike @@ -13,16 +14,32 @@ FORMAT_FILE = "%(asctime)s %(levelname)-7s %(message)s" -def get_logger(name: Optional[str] = None, level: int = logging.INFO) -> logging.Logger: +def get_logger( + name: Optional[str] = "nipoppy", level: int = logging.INFO +) -> logging.Logger: """Create/get a logger with rich formatting.""" - logging.basicConfig( - level=level, - format=FORMAT_RICH, - datefmt=DATE_FORMAT, - handlers=[RichHandler(show_time=False, markup=True, rich_tracebacks=True)], - force=True, + # create logger + logger = logging.getLogger(name=name) + logger.setLevel(level) + + # stream WARNING and above to stderr with rich formatting + stderr_handler = RichHandler( + console=Console(stderr=True), show_time=False, markup=True, rich_tracebacks=True + ) + stderr_handler.addFilter(lambda record: record.levelno >= logging.WARNING) + logger.addHandler(stderr_handler) + + # stream levels below WARNING to stdout with rich formatting + stdout_handler = RichHandler( + console=Console(stderr=False), + show_time=False, + markup=True, + rich_tracebacks=True, ) - return logging.getLogger(name=name) + stdout_handler.addFilter(lambda record: record.levelno < logging.WARNING) + logger.addHandler(stdout_handler) + + return logger def add_logfile(logger: logging.Logger, fpath_log: StrOrPathLike) -> None: diff --git a/nipoppy_cli/nipoppy/tabular/base.py b/nipoppy_cli/nipoppy/tabular/base.py index 2e931c2a..a0f6aeba 100644 --- a/nipoppy_cli/nipoppy/tabular/base.py +++ b/nipoppy_cli/nipoppy/tabular/base.py @@ -19,12 +19,12 @@ class BaseTabularModel(BaseModel): Helper class for validating tabular data. Subclasses should define fields and their types, - and optionally override the _validate_fields() method. + and optionally override the _validate_before_fields() method. """ @model_validator(mode="before") @classmethod - def validate_input(cls, data: Any): + def validate_before(cls, data: Any): """Validate the raw input.""" if isinstance(data, dict): # generic validation @@ -52,12 +52,12 @@ def validate_input(cls, data: Any): # model-specific validation # to be overridden in subclass if needed - data = cls.validate_fields(data) + data = cls._validate_before_fields(data) return data @classmethod - def validate_fields(cls, data: dict): + def _validate_before_fields(cls, data: dict): """Validate model-specific fields. To be overridden in subclass if needed.""" return data diff --git a/nipoppy_cli/nipoppy/tabular/dicom_dir_map.py b/nipoppy_cli/nipoppy/tabular/dicom_dir_map.py new file mode 100644 index 00000000..14d450fc --- /dev/null +++ b/nipoppy_cli/nipoppy/tabular/dicom_dir_map.py @@ -0,0 +1,140 @@ +"""Classes for the DICOM directory mapping.""" + +from __future__ import annotations + +from pathlib import Path + +from pydantic import Field, model_validator +from typing_extensions import Self + +from nipoppy.layout import DEFAULT_LAYOUT_INFO +from nipoppy.tabular.base import BaseTabular, BaseTabularModel +from nipoppy.tabular.manifest import Manifest +from nipoppy.utils import ( + BIDS_SESSION_PREFIX, + BIDS_SUBJECT_PREFIX, + FIELD_DESCRIPTION_MAP, +) + + +class DicomDirMapModel(BaseTabularModel): + """Model for file mapping participant IDs to DICOM directories.""" + + participant_id: str = Field( + title="Participant ID", description=FIELD_DESCRIPTION_MAP["participant_id"] + ) + session: str = Field(description=FIELD_DESCRIPTION_MAP["session"]) + participant_dicom_dir: str = Field( + title="Participant's raw DICOM directory", + description=( + "Path to the participant's raw DICOM directory, relative to the dataset's" + f"raw DICOM directory (default: {DEFAULT_LAYOUT_INFO.dpath_raw_dicom})" + ), + ) + + @model_validator(mode="after") + def validate_after(self) -> Self: + """Validate participant_id and session fields.""" + if self.participant_id.startswith(BIDS_SUBJECT_PREFIX): + raise ValueError( + f'Participant ID should not start with "{BIDS_SUBJECT_PREFIX}"' + f", got {self.participant_id}" + ) + if not self.session.startswith(BIDS_SESSION_PREFIX): + raise ValueError( + f'Session should start with "{BIDS_SESSION_PREFIX}"' + f", got {self.session}" + ) + return self + + +class DicomDirMap(BaseTabular): + """ + A dataset's DICOM directory mapping. + + This mapping is used during DICOM reorganization and doughnut generation. + """ + + # column names + col_participant_id = "participant_id" + col_session = "session" + col_participant_dicom_dir = "participant_dicom_dir" + + index_cols = [col_participant_id, col_session] + + # set the model + model = DicomDirMapModel + + _metadata = BaseTabular._metadata + [ + "col_participant_id", + "col_session", + "col_participant_dicom_dir", + "index_cols", + "model", + ] + + @classmethod + def load_or_generate( + cls, + manifest: Manifest, + fpath_dicom_dir_map: str | Path | None, + participant_first: bool, + validate: bool = True, + ) -> Self: + """Load or generate a DicomDirMap instance. + + Parameters + ---------- + manifest : :class:`nipoppy.tabular.manifest.Manifest` + Manifest for generating the mapping (not used if ``fpath_dicom_dir_map`` + is not ``None``). + fpath_dicom_dir_map : str | Path | None + Path to a custom DICOM directory mapping file. If ``None``, + the DICOM directory mapping will be generated from the manifest. + participant_first : bool + Whether the generated uses ``/`` order + (True) or ``/`` (False). Not used if + ``fpath_dicom_dir_map`` is not ``None`` + validate : bool, optional + Whether to validate (through Pydantic) the created object, + by default ``True`` + + Returns + ------- + :class:`nipoppy.tabular.dicom_dir_map.DicomDirMap` + """ + # if these is a custom dicom_dir_map, use it + if fpath_dicom_dir_map is not None: + return cls.load(Path(fpath_dicom_dir_map), validate=validate) + + # else depends on participant_first or no + else: + data_dicom_dir_map = [] + for participant, session in manifest.get_participants_sessions(): + if participant_first: + participant_dicom_dir = f"{participant}/{session}" + else: + participant_dicom_dir = f"{session}/{participant}" + data_dicom_dir_map.append( + { + cls.col_participant_id: participant, + cls.col_session: session, + cls.col_participant_dicom_dir: participant_dicom_dir, + } + ) + dicom_dir_map = cls(data=data_dicom_dir_map) + if validate: + dicom_dir_map.validate() + return dicom_dir_map + + def get_dicom_dir(self, participant: str, session: str) -> str: + """Return the participant's raw DICOM directory for a given session. + + Parameters + ---------- + participant : str + Participant ID, without the BIDS prefix + session : str + Session, with the BIDS prefix + """ + return self.set_index(self.index_cols).loc[participant, session].item() diff --git a/nipoppy_cli/nipoppy/tabular/doughnut.py b/nipoppy_cli/nipoppy/tabular/doughnut.py index 928898d1..3add59e6 100644 --- a/nipoppy_cli/nipoppy/tabular/doughnut.py +++ b/nipoppy_cli/nipoppy/tabular/doughnut.py @@ -10,6 +10,7 @@ from typing_extensions import Self from nipoppy.logger import get_logger +from nipoppy.tabular.dicom_dir_map import DicomDirMap from nipoppy.tabular.manifest import Manifest, ManifestModel from nipoppy.utils import ( FIELD_DESCRIPTION_MAP, @@ -149,29 +150,25 @@ def get_bidsified_participants_sessions( def generate_doughnut( manifest: Manifest, + dicom_dir_map: DicomDirMap, dpath_downloaded: Optional[StrOrPathLike] = None, dpath_organized: Optional[StrOrPathLike] = None, dpath_bidsified: Optional[StrOrPathLike] = None, empty=False, logger: Optional[logging.Logger] = None, - # TODO allow custom map from participant_id to participant_dicom_dir ) -> Doughnut: """Generate a doughnut object.""" def check_status( dpath: Optional[StrOrPathLike], - participant_dname: str, - session: str, - session_first=False, + dname_subdirectory: StrOrPathLike, ): + dname_subdirectory = Path(dname_subdirectory) if dpath is None: status = False else: dpath = Path(dpath) - if session_first: - dpath_participant = dpath / session / participant_dname - else: - dpath_participant = dpath / participant_dname / session + dpath_participant = dpath / dname_subdirectory if dpath_participant.exists(): status = next(dpath_participant.iterdir(), None) is not None else: @@ -193,7 +190,9 @@ def check_status( session = manifest_record[manifest.col_session] # get DICOM dir - participant_dicom_dir = participant + participant_dicom_dir = dicom_dir_map.get_dicom_dir( + participant=participant, session=session + ) # get DICOM and BIDS IDs dicom_id = participant_id_to_dicom_id(participant) @@ -205,22 +204,14 @@ def check_status( status_bidsified = False else: status_downloaded = check_status( - dpath_downloaded, - participant_dicom_dir, - session, - session_first=True, + dpath=dpath_downloaded, + dname_subdirectory=participant_dicom_dir, ) status_organized = check_status( - dpath_organized, - dicom_id, - session, - session_first=True, + dpath=dpath_organized, dname_subdirectory=Path(dicom_id, session) ) status_bidsified = check_status( - dpath_bidsified, - bids_id, - session, - session_first=False, + dpath=dpath_bidsified, dname_subdirectory=Path(bids_id, session) ) doughnut_records.append( @@ -246,6 +237,7 @@ def check_status( def update_doughnut( doughnut: Doughnut, manifest: Manifest, + dicom_dir_map: DicomDirMap, dpath_downloaded: Optional[StrOrPathLike] = None, dpath_organized: Optional[StrOrPathLike] = None, dpath_bidsified: Optional[StrOrPathLike] = None, @@ -267,6 +259,7 @@ def update_doughnut( updated_doughnut = doughnut.concatenate( generate_doughnut( manifest=manifest_subset, + dicom_dir_map=dicom_dir_map, dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, diff --git a/nipoppy_cli/nipoppy/tabular/manifest.py b/nipoppy_cli/nipoppy/tabular/manifest.py index 65e89a8f..cb8b6b13 100644 --- a/nipoppy_cli/nipoppy/tabular/manifest.py +++ b/nipoppy_cli/nipoppy/tabular/manifest.py @@ -5,11 +5,15 @@ from typing import Optional import pandas as pd -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, model_validator from typing_extensions import Self from nipoppy.tabular.base import BaseTabular, BaseTabularModel -from nipoppy.utils import FIELD_DESCRIPTION_MAP +from nipoppy.utils import ( + FIELD_DESCRIPTION_MAP, + check_participant_id_strict, + check_session_strict, +) class ManifestModel(BaseTabularModel): @@ -28,7 +32,7 @@ class ManifestModel(BaseTabularModel): ) @classmethod - def validate_fields(cls, data: dict): + def _validate_before_fields(cls, data: dict): """Validate manifest-specific fields.""" datatype = data.get(Manifest.col_datatype) if datatype is not None and not isinstance(datatype, list): @@ -42,6 +46,13 @@ def validate_fields(cls, data: dict): ) return data + @model_validator(mode="after") + def validate_after(self) -> Self: + """Validate fields after instance creation.""" + check_participant_id_strict(self.participant_id) + check_session_strict(self.session) + return self + # allow extra columns model_config = ConfigDict(extra="allow") diff --git a/nipoppy_cli/nipoppy/utils.py b/nipoppy_cli/nipoppy/utils.py index 46941535..8a5c75c4 100644 --- a/nipoppy_cli/nipoppy/utils.py +++ b/nipoppy_cli/nipoppy/utils.py @@ -67,6 +67,20 @@ def check_participant(participant: Optional[str]): return str(participant).removeprefix(BIDS_SUBJECT_PREFIX) +def check_participant_id_strict(participant_id: str): + """ + Make sure participant_id does not have the BIDS prefix. + + To use when validating user-provided files (e.g. the manifest). + """ + if participant_id.startswith(BIDS_SUBJECT_PREFIX): + raise ValueError( + f'Participant ID should not start with "{BIDS_SUBJECT_PREFIX}"' + f", got {participant_id}" + ) + return participant_id + + def check_session(session: Optional[str]): """Check/process a session string.""" if session is None: @@ -80,6 +94,19 @@ def check_session(session: Optional[str]): return f"{BIDS_SESSION_PREFIX}{session}" +def check_session_strict(session: Optional[str]): + """ + Make sure session has the BIDS prefix. + + To use when validating user-provided files (e.g. the manifest). + """ + if session is not None and not session.startswith(BIDS_SESSION_PREFIX): + raise ValueError( + f'Session should start with "{BIDS_SESSION_PREFIX}"' f", got {session}" + ) + return session + + def strip_session(session: Optional[str]): """Strip the BIDS prefix from a session string.""" if session is None: diff --git a/nipoppy_cli/nipoppy/workflows/base.py b/nipoppy_cli/nipoppy/workflows/base.py index 791a67fa..66392ce6 100644 --- a/nipoppy_cli/nipoppy/workflows/base.py +++ b/nipoppy_cli/nipoppy/workflows/base.py @@ -17,6 +17,7 @@ from nipoppy.layout import DatasetLayout from nipoppy.logger import get_logger from nipoppy.tabular.base import BaseTabular +from nipoppy.tabular.dicom_dir_map import DicomDirMap from nipoppy.tabular.doughnut import Doughnut, generate_doughnut from nipoppy.tabular.manifest import Manifest from nipoppy.utils import StrOrPathLike, add_path_timestamp @@ -270,7 +271,7 @@ def config(self) -> Config: @cached_property def manifest(self) -> Manifest: """Load the manifest.""" - fpath_manifest = self.layout.fpath_manifest + fpath_manifest = Path(self.layout.fpath_manifest) expected_sessions = self.config.SESSIONS expected_visits = self.config.VISITS try: @@ -286,7 +287,7 @@ def manifest(self) -> Manifest: def doughnut(self) -> Doughnut: """Load the doughnut.""" logger = self.logger - fpath_doughnut = self.layout.fpath_doughnut + fpath_doughnut = Path(self.layout.fpath_doughnut) try: return Doughnut.load(fpath_doughnut) except FileNotFoundError: @@ -296,6 +297,7 @@ def doughnut(self) -> Doughnut: ) doughnut = generate_doughnut( manifest=self.manifest, + dicom_dir_map=self.dicom_dir_map, dpath_downloaded=self.layout.dpath_raw_dicom, dpath_organized=self.layout.dpath_sourcedata, dpath_bidsified=self.layout.dpath_bids, @@ -314,3 +316,21 @@ def doughnut(self) -> Doughnut: ) return doughnut + + @cached_property + def dicom_dir_map(self) -> DicomDirMap: + """Get the DICOM directory mapping.""" + fpath_dicom_dir_map = self.config.DICOM_DIR_MAP_FILE + if fpath_dicom_dir_map is not None: + fpath_dicom_dir_map = Path(fpath_dicom_dir_map) + if not fpath_dicom_dir_map.exists(): + raise FileNotFoundError( + "DICOM directory map file not found" + f": {self.config.DICOM_DIR_MAP_FILE}" + ) + + return DicomDirMap.load_or_generate( + manifest=self.manifest, + fpath_dicom_dir_map=fpath_dicom_dir_map, + participant_first=self.config.DICOM_DIR_PARTICIPANT_FIRST, + ) diff --git a/nipoppy_cli/nipoppy/workflows/dicom_reorg.py b/nipoppy_cli/nipoppy/workflows/dicom_reorg.py index 07c598f8..ba7d678b 100644 --- a/nipoppy_cli/nipoppy/workflows/dicom_reorg.py +++ b/nipoppy_cli/nipoppy/workflows/dicom_reorg.py @@ -5,10 +5,23 @@ from pathlib import Path from typing import Optional +import pydicom + from nipoppy.utils import StrOrPathLike from nipoppy.workflows.base import BaseWorkflow +def is_derived_dicom(fpath: Path) -> bool: + """ + Read a DICOM file's header and check if it is a derived file. + + Some BIDS converters (e.g. Heudiconv) do not support derived DICOM files. + """ + dcm_info = pydicom.dcmread(fpath) + img_types = dcm_info.ImageType + return "DERIVED" in img_types + + class DicomReorgWorkflow(BaseWorkflow): """Workflow for organizing raw DICOM files.""" @@ -16,6 +29,7 @@ def __init__( self, dpath_root: StrOrPathLike, copy_files: bool = False, + check_dicoms: bool = False, fpath_layout: Optional[StrOrPathLike] = None, logger: Optional[logging.Logger] = None, dry_run: bool = False, @@ -29,21 +43,18 @@ def __init__( dry_run=dry_run, ) self.copy_files = copy_files + self.check_dicoms = check_dicoms def get_fpaths_to_reorg( - self, participant: str, session: str, participant_first=True + self, + participant: str, + session: str, ) -> list[Path]: - """ - Get file paths to reorganize for a single participant and session. - - This method can be overridden if the raw DICOM layout is different than what - is typically expected. - """ - # support both participant-first and session-first raw DICOM layouts - if participant_first: - dpath_downloaded = self.layout.dpath_raw_dicom / participant / session - else: - dpath_downloaded = self.layout.dpath_raw_dicom / session / participant + """Get file paths to reorganize for a single participant and session.""" + dpath_downloaded = ( + self.layout.dpath_raw_dicom + / self.dicom_dir_map.get_dicom_dir(participant=participant, session=session) + ) # make sure directory exists if not dpath_downloaded.exists(): @@ -73,15 +84,24 @@ def apply_fname_mapping( def run_single(self, participant: str, session: str): """Reorganize downloaded DICOM files for a single participant and session.""" # get paths to reorganize - # TODO add config option for session-first or participant-first raw DICOM layout - fpaths_to_reorg = self.get_fpaths_to_reorg( - participant, session, participant_first=False - ) + fpaths_to_reorg = self.get_fpaths_to_reorg(participant, session) # do reorg dpath_reorganized: Path = self.layout.dpath_sourcedata / participant / session self.mkdir(dpath_reorganized) for fpath_source in fpaths_to_reorg: + # check file (though only error out if DICOM cannot be read) + if self.check_dicoms: + try: + if is_derived_dicom(fpath_source): + self.logger.warning( + f"Derived DICOM file detected: {fpath_source}" + ) + except Exception as exception: + raise RuntimeError( + f"Error checking DICOM file {fpath_source}: {exception}" + ) + fpath_dest = dpath_reorganized / self.apply_fname_mapping( fpath_source.name, participant=participant, session=session ) diff --git a/nipoppy_cli/nipoppy/workflows/doughnut.py b/nipoppy_cli/nipoppy/workflows/doughnut.py index 4196ee43..137f9a77 100644 --- a/nipoppy_cli/nipoppy/workflows/doughnut.py +++ b/nipoppy_cli/nipoppy/workflows/doughnut.py @@ -48,6 +48,7 @@ def run_main(self): doughnut = update_doughnut( doughnut=old_doughnut, manifest=self.manifest, + dicom_dir_map=self.dicom_dir_map, dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, @@ -62,6 +63,7 @@ def run_main(self): logger.info(f"Did not find existing doughnut at {fpath_doughnut}") doughnut = generate_doughnut( manifest=self.manifest, + dicom_dir_map=self.dicom_dir_map, dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, diff --git a/nipoppy_cli/pyproject.toml b/nipoppy_cli/pyproject.toml index 69b86109..c9947289 100644 --- a/nipoppy_cli/pyproject.toml +++ b/nipoppy_cli/pyproject.toml @@ -24,6 +24,7 @@ dependencies = [ "pandas", "pybids", "pydantic", + "pydicom", "rich", "rich_argparse", "typing-extensions", diff --git a/nipoppy_cli/tests/conftest.py b/nipoppy_cli/tests/conftest.py index 578bc9dc..947b511b 100644 --- a/nipoppy_cli/tests/conftest.py +++ b/nipoppy_cli/tests/conftest.py @@ -136,7 +136,7 @@ def _fake_dicoms( max_n_files_per_image: int = 5, min_n_subdir_levels: int = 1, max_n_subdir_levels: int = 2, - participant_first: bool = False, + participant_first: bool = True, max_dname_dicom: int = 1000000, rng_seed: int = 3791, ): @@ -200,7 +200,7 @@ def fake_dicoms_downloaded( max_n_files_per_image: int = 5, min_n_subdir_levels: int = 1, max_n_subdir_levels: int = 2, - participant_first: bool = False, + participant_first: bool = True, max_dname_dicom: int = 1000000, rng_seed: int = 3791, ): @@ -229,7 +229,7 @@ def fake_dicoms_organized( n_images: int = 3, min_n_files_per_image: int = 1, max_n_files_per_image: int = 5, - participant_first: bool = False, + participant_first: bool = True, max_dname_dicom: int = 1000000, rng_seed: int = 3791, ): diff --git a/nipoppy_cli/tests/data/config_invalid2.json b/nipoppy_cli/tests/data/config_invalid2.json new file mode 100644 index 00000000..8c57850e --- /dev/null +++ b/nipoppy_cli/tests/data/config_invalid2.json @@ -0,0 +1,31 @@ +{ + "DATASET_NAME": "my_dataset", + "VISITS": [ + "V01", + "V02" + ], + "SESSIONS": [ + "V01" + ], + "PROC_PIPELINES": { + "mriqc": { + "23.1.0": { + "CONTAINER": "mriqc_{}.sif", + "URI": "docker://nipreps/mriqc:{version}", + "INVOCATION": {} + } + }, + "fmriprep": { + "20.2.7": { + "CONTAINER": "fmriprep_{}.sif", + "URI": "docker://nipreps/fmriprep:{version}", + "INVOCATION": {} + } + }, + "freesurfer": { + "6.0.1": { + "DESCRIPTION": "FreeSurfer version associated with fMRIPrep 20.2.7" + } + } + } +} diff --git a/nipoppy_cli/tests/data/dicom-derived.dcm b/nipoppy_cli/tests/data/dicom-derived.dcm new file mode 100644 index 00000000..78238aaa Binary files /dev/null and b/nipoppy_cli/tests/data/dicom-derived.dcm differ diff --git a/nipoppy_cli/tests/data/dicom-not_derived.dcm b/nipoppy_cli/tests/data/dicom-not_derived.dcm new file mode 100644 index 00000000..47fae99d Binary files /dev/null and b/nipoppy_cli/tests/data/dicom-not_derived.dcm differ diff --git a/nipoppy_cli/tests/data/dicom_dir_map1.csv b/nipoppy_cli/tests/data/dicom_dir_map1.csv new file mode 100644 index 00000000..883f3347 --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map1.csv @@ -0,0 +1,4 @@ +participant_id,session,participant_dicom_dir +01,ses-1,path/for/sub-01/ses-1 +01,ses-2,path/for/sub-01/ses-2 +02,ses-1,path/for/sub-02/ses-1 diff --git a/nipoppy_cli/tests/data/dicom_dir_map2.csv b/nipoppy_cli/tests/data/dicom_dir_map2.csv new file mode 100644 index 00000000..93d0b4d5 --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map2.csv @@ -0,0 +1,4 @@ +participant_id,session,participant_dicom_dir +01,ses-BL,all_BL_files_are_together +01,ses-M12,all_M12_files_are_together +02,ses-BL,all_BL_files_are_together diff --git a/nipoppy_cli/tests/data/dicom_dir_map_invalid1.csv b/nipoppy_cli/tests/data/dicom_dir_map_invalid1.csv new file mode 100644 index 00000000..a1021200 --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map_invalid1.csv @@ -0,0 +1,4 @@ +participant_id,session,dicom_dir +01,ses-1,path/for/sub-01/ses-1 +01,ses-2,path/for/sub-01/ses-2 +02,ses-1,path/for/sub-02/ses-1 diff --git a/nipoppy_cli/tests/data/dicom_dir_map_invalid2.csv b/nipoppy_cli/tests/data/dicom_dir_map_invalid2.csv new file mode 100644 index 00000000..7d370362 --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map_invalid2.csv @@ -0,0 +1,4 @@ +participant_id,participant_dicom_dir +01,path/for/sub-01/ses-1 +01,path/for/sub-01/ses-2 +02,path/for/sub-02/ses-1 diff --git a/nipoppy_cli/tests/data/dicom_dir_map_invalid3.csv b/nipoppy_cli/tests/data/dicom_dir_map_invalid3.csv new file mode 100644 index 00000000..d37218df --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map_invalid3.csv @@ -0,0 +1,4 @@ +participant_id,session,participant_dicom_dir +01,ses-1,path/for/sub-01/ses-1 +01,ses-2,path/for/sub-01/ses-2 +sub-02,ses-1,path/for/sub-02/ses-1 diff --git a/nipoppy_cli/tests/data/dicom_dir_map_invalid4.csv b/nipoppy_cli/tests/data/dicom_dir_map_invalid4.csv new file mode 100644 index 00000000..3d1124a2 --- /dev/null +++ b/nipoppy_cli/tests/data/dicom_dir_map_invalid4.csv @@ -0,0 +1,4 @@ +participant_id,session,participant_dicom_dir +01,ses-1,path/for/sub-01/ses-1 +01,ses-2,path/for/sub-01/ses-2 +02,1,path/for/sub-02/ses-1 diff --git a/nipoppy_cli/tests/data/manifest_invalid3.csv b/nipoppy_cli/tests/data/manifest_invalid3.csv new file mode 100644 index 00000000..ff0e2919 --- /dev/null +++ b/nipoppy_cli/tests/data/manifest_invalid3.csv @@ -0,0 +1,6 @@ +participant_id,visit,session,datatype +01,BL,ses-BL,"['anat']" +01,M12,ses-M12,"['anat']" +02,BL,ses-BL,"['anat','dwi']" +02,M12,ses-M12,"['anat','dwi']" +sub-01,BL,ses-BL,"['anat']" diff --git a/nipoppy_cli/tests/data/manifest_invalid4.csv b/nipoppy_cli/tests/data/manifest_invalid4.csv new file mode 100644 index 00000000..1107c083 --- /dev/null +++ b/nipoppy_cli/tests/data/manifest_invalid4.csv @@ -0,0 +1,6 @@ +participant_id,visit,session,datatype +01,BL,ses-BL,"['anat']" +01,M12,ses-M12,"['anat']" +02,BL,ses-BL,"['anat','dwi']" +02,M12,ses-M12,"['anat','dwi']" +03,BL,BL,"['anat']" diff --git a/nipoppy_cli/tests/test_config_main.py b/nipoppy_cli/tests/test_config_main.py index c1b89e71..e80b4357 100644 --- a/nipoppy_cli/tests/test_config_main.py +++ b/nipoppy_cli/tests/test_config_main.py @@ -1,6 +1,7 @@ """Tests for the config module.""" import json +from contextlib import nullcontext from pathlib import Path import pytest @@ -69,6 +70,29 @@ def test_sessions_inferred(visits, expected_sessions): assert config.SESSIONS == expected_sessions +@pytest.mark.parametrize( + "dicom_dir_map_file,dicom_dir_participant_first,is_valid", + [ + (None, None, True), + ("path", None, True), + (None, True, True), + (None, False, True), + ("path", True, False), + ], +) +def test_check_dicom_dir_options( + valid_config_data, dicom_dir_map_file, dicom_dir_participant_first, is_valid +): + valid_config_data["DICOM_DIR_MAP_FILE"] = dicom_dir_map_file + valid_config_data["DICOM_DIR_PARTICIPANT_FIRST"] = dicom_dir_participant_first + with ( + pytest.raises(ValueError, match="Cannot specify both") + if not is_valid + else nullcontext() + ): + assert isinstance(Config(**valid_config_data), Config) + + @pytest.mark.parametrize( "data_root,data_pipeline,data_expected", [ @@ -213,6 +237,13 @@ def test_load(path): assert hasattr(config, field) -def test_load_missing_required(): +@pytest.mark.parametrize( + "path", + [ + DPATH_TEST_DATA / "config_invalid1.json", # missing required + DPATH_TEST_DATA / "config_invalid2.json", # invalid sessions + ], +) +def test_load_invalid(path): with pytest.raises(ValueError): - Config.load(DPATH_TEST_DATA / "config_invalid1.json") + Config.load(path) diff --git a/nipoppy_cli/tests/test_logger.py b/nipoppy_cli/tests/test_logger.py index a7b2a457..c14611bf 100644 --- a/nipoppy_cli/tests/test_logger.py +++ b/nipoppy_cli/tests/test_logger.py @@ -16,6 +16,25 @@ def test_get_logger(level: int, name: str): assert logger.name == name +def test_get_logger_stdout(capsys: pytest.CaptureFixture): + logger = get_logger(level=logging.DEBUG) + logger.debug("debug") + logger.info("info") + captured = capsys.readouterr() + assert captured.out + assert not captured.err + + +def test_get_logger_stderr(capsys: pytest.CaptureFixture): + logger = get_logger(level=logging.DEBUG) + logger.warning("warning") + logger.error("error") + logger.critical("critical") + captured = capsys.readouterr() + assert not captured.out + assert captured.err + + @pytest.mark.parametrize( "level", [logging.DEBUG, logging.INFO, logging.WARNING, logging.ERROR] ) diff --git a/nipoppy_cli/tests/test_parser.py b/nipoppy_cli/tests/test_parser.py index 88dc6710..e86291cf 100644 --- a/nipoppy_cli/tests/test_parser.py +++ b/nipoppy_cli/tests/test_parser.py @@ -112,6 +112,7 @@ def test_add_subparser_doughnut(args): [ ["--dataset-root", "my_dataset"], ["--dataset-root", "my_dataset", "--copy-files"], + ["--dataset-root", "my_dataset", "--check-dicoms"], ], ) def test_add_subparser_dicom_reorg(args): diff --git a/nipoppy_cli/tests/test_tabular_dicom_dir_map.py b/nipoppy_cli/tests/test_tabular_dicom_dir_map.py new file mode 100644 index 00000000..40d5430d --- /dev/null +++ b/nipoppy_cli/tests/test_tabular_dicom_dir_map.py @@ -0,0 +1,74 @@ +"""Tests for the DICOM directory map classes.""" + +import pytest + +from nipoppy.tabular.dicom_dir_map import DicomDirMap +from nipoppy.tabular.manifest import Manifest + +from .conftest import DPATH_TEST_DATA + + +@pytest.mark.parametrize( + "fname", + ["dicom_dir_map1.csv", "dicom_dir_map2.csv"], +) +def test_load(fname): + assert isinstance(DicomDirMap.load(DPATH_TEST_DATA / fname), DicomDirMap) + + +@pytest.mark.parametrize( + "fname", + [ + "dicom_dir_map_invalid1.csv", + "dicom_dir_map_invalid2.csv", + "dicom_dir_map_invalid3.csv", + "dicom_dir_map_invalid4.csv", + ], +) +def test_load_invalid(fname): + with pytest.raises(ValueError): + DicomDirMap.load(DPATH_TEST_DATA / fname) + + +@pytest.mark.parametrize( + "fpath_dicom_dir_map", + [ + DPATH_TEST_DATA / "dicom_dir_map1.csv", + str(DPATH_TEST_DATA / "dicom_dir_map1.csv"), + ], +) +def test_load_or_generate_load(fpath_dicom_dir_map): + dicom_dir_map = DicomDirMap.load_or_generate( + manifest=Manifest(), + fpath_dicom_dir_map=fpath_dicom_dir_map, + participant_first=True, + ) + assert isinstance(dicom_dir_map, DicomDirMap) + + +@pytest.mark.parametrize( + "participant_ids,sessions,participant_first,expected", + [ + (["01", "02"], ["ses-1", "ses-2"], True, ["01/ses-1", "02/ses-2"]), + (["P01", "P02"], ["ses-BL", "ses-BL"], False, ["ses-BL/P01", "ses-BL/P02"]), + ], +) +def test_load_or_generate_generate( + participant_ids, sessions, participant_first, expected +): + manifest = Manifest( + data={ + Manifest.col_participant_id: participant_ids, + Manifest.col_visit: sessions, + Manifest.col_session: sessions, + Manifest.col_datatype: [[] for _ in participant_ids], + } + ) + + dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, + fpath_dicom_dir_map=None, + participant_first=participant_first, + ) + + assert dicom_dir_map[DicomDirMap.col_participant_dicom_dir].tolist() == expected diff --git a/nipoppy_cli/tests/test_tabular_doughnut.py b/nipoppy_cli/tests/test_tabular_doughnut.py index 95c36dfd..18980b59 100644 --- a/nipoppy_cli/tests/test_tabular_doughnut.py +++ b/nipoppy_cli/tests/test_tabular_doughnut.py @@ -5,6 +5,7 @@ import pytest +from nipoppy.tabular.dicom_dir_map import DicomDirMap from nipoppy.tabular.doughnut import Doughnut, generate_doughnut, update_doughnut from nipoppy.utils import StrOrPathLike @@ -208,6 +209,9 @@ def test_generate_and_update( # generate the doughnut doughnut1 = generate_doughnut( manifest=manifest1, + dicom_dir_map=DicomDirMap.load_or_generate( + manifest=manifest1, fpath_dicom_dir_map=None, participant_first=True + ), dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, @@ -230,6 +234,9 @@ def test_generate_and_update( doughnut2 = update_doughnut( doughnut=doughnut1, manifest=manifest2, + dicom_dir_map=DicomDirMap.load_or_generate( + manifest=manifest2, fpath_dicom_dir_map=None, participant_first=True + ), dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, @@ -270,6 +277,9 @@ def test_generate_missing_paths(tmp_path: Path): doughnut = generate_doughnut( manifest=manifest, + dicom_dir_map=DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=True + ), dpath_downloaded=dpath_downloaded, dpath_organized=dpath_organized, dpath_bidsified=dpath_bidsified, diff --git a/nipoppy_cli/tests/test_tabular_manifest.py b/nipoppy_cli/tests/test_tabular_manifest.py index 746e709a..14a5d488 100644 --- a/nipoppy_cli/tests/test_tabular_manifest.py +++ b/nipoppy_cli/tests/test_tabular_manifest.py @@ -38,6 +38,8 @@ def test_load_keep_extra_cols(): (DPATH_TEST_DATA / "manifest2.csv", True), (DPATH_TEST_DATA / "manifest_invalid1.csv", False), (DPATH_TEST_DATA / "manifest_invalid2.csv", False), + (DPATH_TEST_DATA / "manifest_invalid3.csv", False), + (DPATH_TEST_DATA / "manifest_invalid4.csv", False), ], ) def test_validate(fpath, is_valid): diff --git a/nipoppy_cli/tests/test_utils.py b/nipoppy_cli/tests/test_utils.py index 8ed7c8c7..2ad83f60 100644 --- a/nipoppy_cli/tests/test_utils.py +++ b/nipoppy_cli/tests/test_utils.py @@ -2,6 +2,7 @@ import json import re +from contextlib import nullcontext from pathlib import Path from typing import Optional @@ -14,7 +15,9 @@ add_path_suffix, add_path_timestamp, check_participant, + check_participant_id_strict, check_session, + check_session_strict, dicom_id_to_bids_id, get_pipeline_tag, load_json, @@ -61,6 +64,16 @@ def test_check_participant(participant, expected): assert check_participant(participant) == expected +@pytest.mark.parametrize("participant_id,is_valid", [("01", True), ("sub-01", False)]) +def test_check_participant_id_strict(participant_id, is_valid): + with ( + pytest.raises(ValueError, match="Participant ID should not start with") + if not is_valid + else nullcontext() + ): + assert check_participant_id_strict(participant_id) == participant_id + + @pytest.mark.parametrize( "session,expected", [("ses-BL", "ses-BL"), ("BL", "ses-BL"), ("M12", "ses-M12"), (None, None)], @@ -69,6 +82,16 @@ def test_check_session(session, expected): assert check_session(session) == expected +@pytest.mark.parametrize("session,is_valid", [("ses-1", True), ("1", False)]) +def test_check_session_strict(session, is_valid): + with ( + pytest.raises(ValueError, match="Session should start with") + if not is_valid + else nullcontext() + ): + assert check_session_strict(session) == session + + @pytest.mark.parametrize( "session,expected", [("ses-BL", "BL"), ("BL", "BL"), ("ses-01", "01"), (None, None)] ) diff --git a/nipoppy_cli/tests/test_workflow_base.py b/nipoppy_cli/tests/test_workflow_base.py index e205de74..5701e20d 100644 --- a/nipoppy_cli/tests/test_workflow_base.py +++ b/nipoppy_cli/tests/test_workflow_base.py @@ -9,12 +9,13 @@ from nipoppy.config.main import Config from nipoppy.logger import get_logger +from nipoppy.tabular.dicom_dir_map import DicomDirMap from nipoppy.tabular.manifest import Manifest from nipoppy.utils import FPATH_SAMPLE_CONFIG, FPATH_SAMPLE_MANIFEST from nipoppy.workflows.base import BaseWorkflow from .conftest import datetime_fixture # noqa F401 -from .conftest import create_empty_dataset, get_config +from .conftest import DPATH_TEST_DATA, create_empty_dataset, get_config, prepare_dataset @pytest.fixture(params=[get_logger("my_logger"), None], scope="function") @@ -27,6 +28,8 @@ def run_main(self): workflow = DummyWorkflow( dpath_root=dpath_root, name="my_workflow", logger=request.param ) + manifest = prepare_dataset(participants_and_sessions_manifest={}) + manifest.save_with_backup(workflow.layout.fpath_manifest) workflow.logger.setLevel(logging.DEBUG) # capture all logs return workflow @@ -144,3 +147,21 @@ def test_manifest(workflow: BaseWorkflow): def test_manifest_not_found(workflow: BaseWorkflow): with pytest.raises(FileNotFoundError): workflow.manifest + + +def test_dicom_dir_map(workflow: BaseWorkflow): + workflow.config = get_config() + assert isinstance(workflow.dicom_dir_map, DicomDirMap) + + +def test_dicom_dir_map_custom(workflow: BaseWorkflow): + workflow.config = get_config() + workflow.config.DICOM_DIR_MAP_FILE = DPATH_TEST_DATA / "dicom_dir_map1.csv" + assert isinstance(workflow.dicom_dir_map, DicomDirMap) + + +def test_dicom_dir_map_not_found(workflow: BaseWorkflow): + workflow.config = get_config() + workflow.config.DICOM_DIR_MAP_FILE = "fake_path" + with pytest.raises(FileNotFoundError, match="DICOM directory map file not found"): + workflow.dicom_dir_map diff --git a/nipoppy_cli/tests/test_workflow_dicom_reorg.py b/nipoppy_cli/tests/test_workflow_dicom_reorg.py index d13e1a32..ef7921b0 100644 --- a/nipoppy_cli/tests/test_workflow_dicom_reorg.py +++ b/nipoppy_cli/tests/test_workflow_dicom_reorg.py @@ -1,13 +1,27 @@ """Tests for DicomReorgWorkflow.""" +import logging +import shutil from pathlib import Path import pytest +from nipoppy.tabular.dicom_dir_map import DicomDirMap from nipoppy.tabular.manifest import Manifest -from nipoppy.workflows.dicom_reorg import DicomReorgWorkflow +from nipoppy.workflows.dicom_reorg import DicomReorgWorkflow, is_derived_dicom -from .conftest import create_empty_dataset, get_config, prepare_dataset +from .conftest import DPATH_TEST_DATA, create_empty_dataset, get_config, prepare_dataset + + +@pytest.mark.parametrize( + "fpath,expected_result", + [ + (DPATH_TEST_DATA / "dicom-not_derived.dcm", False), + (DPATH_TEST_DATA / "dicom-derived.dcm", True), + ], +) +def test_is_derived_dicom(fpath, expected_result): + assert is_derived_dicom(fpath) == expected_result @pytest.mark.parametrize( @@ -27,9 +41,16 @@ def test_get_fpaths_to_reorg( ): dpath_root = tmp_path / "my_dataset" + manifest = prepare_dataset( + participants_and_sessions_manifest={participant: [session]} + ) + workflow = DicomReorgWorkflow(dpath_root=dpath_root) + workflow.dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=participant_first + ) for fpath in fpaths: - fpath_full = workflow.layout.dpath_raw_dicom / fpath + fpath_full: Path = workflow.layout.dpath_raw_dicom / fpath fpath_full.parent.mkdir(parents=True, exist_ok=True) fpath_full.touch() @@ -37,14 +58,22 @@ def test_get_fpaths_to_reorg( workflow.get_fpaths_to_reorg( participant=participant, session=session, - participant_first=participant_first, ) ) == len(fpaths) def test_get_fpaths_to_reorg_error_not_found(tmp_path: Path): dpath_root = tmp_path / "my_dataset" + participant = "XXX" + session = "ses-X" + workflow = DicomReorgWorkflow(dpath_root=dpath_root) + manifest = prepare_dataset( + participants_and_sessions_manifest={participant: [session]} + ) + workflow.dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=True + ) with pytest.raises(FileNotFoundError, match="Raw DICOM directory not found"): workflow.get_fpaths_to_reorg("XXX", "ses-X") @@ -96,12 +125,20 @@ def test_run_single_error_file_exists(tmp_path: Path): participant = "01" session = "ses-1" dataset_name = "my_dataset" + workflow = DicomReorgWorkflow(dpath_root=tmp_path / dataset_name) + manifest = prepare_dataset( + participants_and_sessions_manifest={participant: [session]} + ) + workflow.dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=True + ) + # create the same file in both the downloaded and organized directories fname = "test.dcm" for fpath in [ - workflow.layout.dpath_raw_dicom / session / participant / fname, + workflow.layout.dpath_raw_dicom / participant / session / fname, workflow.layout.dpath_sourcedata / participant / session / fname, ]: fpath.parent.mkdir(parents=True, exist_ok=True) @@ -111,12 +148,73 @@ def test_run_single_error_file_exists(tmp_path: Path): workflow.run_single(participant, session) +def test_run_single_invalid_dicom(tmp_path: Path, caplog: pytest.LogCaptureFixture): + participant = "01" + session = "ses-1" + dataset_name = "my_dataset" + workflow = DicomReorgWorkflow(dpath_root=tmp_path / dataset_name, check_dicoms=True) + + manifest = prepare_dataset( + participants_and_sessions_manifest={participant: [session]} + ) + workflow.dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=True + ) + + # use derived DICOM file + fpath_dicom = workflow.layout.dpath_raw_dicom / participant / session / "test.dcm" + fpath_dicom.parent.mkdir(parents=True, exist_ok=True) + shutil.copyfile(DPATH_TEST_DATA / "dicom-derived.dcm", fpath_dicom) + + try: + workflow.run_single(participant, session) + except Exception: + pass + + assert any( + [ + "Derived DICOM file detected" in record.message + and record.levelno == logging.WARNING + for record in caplog.records + ] + ) + + +def test_run_single_error_dicom_read(tmp_path: Path): + participant = "01" + session = "ses-1" + dataset_name = "my_dataset" + workflow = DicomReorgWorkflow(dpath_root=tmp_path / dataset_name, check_dicoms=True) + + manifest = prepare_dataset( + participants_and_sessions_manifest={participant: [session]} + ) + workflow.dicom_dir_map = DicomDirMap.load_or_generate( + manifest=manifest, fpath_dicom_dir_map=None, participant_first=True + ) + + # create an invalid DICOM file + fname = "test.dcm" + fpath = workflow.layout.dpath_raw_dicom / participant / session / fname + fpath.parent.mkdir(parents=True, exist_ok=True) + fpath.touch() + + with pytest.raises(RuntimeError, match="Error checking DICOM file"): + workflow.run_single(participant, session) + + def test_copy_files_default(tmp_path: Path): dataset_name = "my_dataset" workflow = DicomReorgWorkflow(dpath_root=tmp_path / dataset_name) assert workflow.copy_files is False +def test_check_dicoms_default(tmp_path: Path): + dataset_name = "my_dataset" + workflow = DicomReorgWorkflow(dpath_root=tmp_path / dataset_name) + assert workflow.check_dicoms is False + + @pytest.mark.parametrize( "participants_and_sessions_manifest,participants_and_sessions_downloaded", [