Skip to content

Commit

Permalink
[ENH] Add DICOM header checking and DicomDirMap to `DicomReorgWorkf…
Browse files Browse the repository at this point in the history
…low` (nipoppy#229)

* add dicom-checking functionality and fix logging things for testing

* split console logging to stdout/stderr instead of just stdout

* add options for using custom mapping or session-first DICOM directories for reorg/doughnut

* fix Sphinx warnings

* add tests for manifest validation

* add tests for DicomDirMap

* more tests + light refactor of participant/session strict validation

* add test for invalid path to DicomDirMap

* use Myst substitutions and default layout object for paths in docs

* slightly update CLI argument descriptions

* import Self from typing_extensions

* also do `from __future__ import annotations`...

* add Codecov config file
  • Loading branch information
michellewang authored May 31, 2024
1 parent cccdbf6 commit 8991806
Show file tree
Hide file tree
Showing 37 changed files with 738 additions and 78 deletions.
4 changes: 4 additions & 0 deletions codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
coverage:
ignore:
- nipoppy/ # legacy code
- nipoppy_cli/docs/ # documentation
10 changes: 9 additions & 1 deletion nipoppy_cli/docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
https://www.sphinx-doc.org/en/master/usage/configuration.html
"""

# for substitutions
from nipoppy.layout import DEFAULT_LAYOUT_INFO

# -- Project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information

Expand Down Expand Up @@ -56,7 +59,12 @@

# -- MyST configuration -------------------------------------------------------

myst_enable_extensions = ["fieldlist"]
myst_enable_extensions = ["fieldlist", "substitution"]

myst_substitutions = {
"fpath_manifest": f"`{DEFAULT_LAYOUT_INFO.fpath_manifest}`",
"fpath_config": f"`{DEFAULT_LAYOUT_INFO.fpath_config}`",
}

# -- Autodoc/AutoAPI configuration ----------------------------------------------------

Expand Down
6 changes: 3 additions & 3 deletions nipoppy_cli/docs/source/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ The [`nipoppy init`](#cli_reference/init) command copies examples of these files
(customizing-config)=
### Customizing the configuration file

The configuration file at `<PATH_TO_DATASET_ROOT>/proc/global_configs.json` contains general information about a dataset (e.g. name, visit and session names) and configurations for running processing pipelines (e.g., pipeline version and runtime parameters).
The configuration file at {{fpath_config}} contains general information about a dataset (e.g. name, visit and session names) and configurations for running processing pipelines (e.g., pipeline version and runtime parameters).

The example config file contains configurations for all BIDS conversion and image processing software that are supported out-of-the-box by Nipoppy. You should replace the placeholder strings/substrings (e.g. `<DATASET_NAME>`) by more appropriate values for your dataset. See dropdown below (lines that should be changed are highlighted).

Expand All @@ -45,7 +45,7 @@ You can also delete (or not) any configuration for a software/version that you d
---
class: dropdown
---
Here is the default content of `<PATH_TO_DATASET_ROOT>/proc/global_configs.json`:
Here is the default content of {{fpath_config}}:
```{literalinclude} ../../nipoppy/data/examples/sample_global_configs.json
---
linenos: True
Expand All @@ -63,7 +63,7 @@ See the {ref}`schema reference <config-schema>` for more information about each

### Generating the manifest file

The manifest file at `<PATH_TO_DATASET_ROOT>/tabular/manifest.csv` contains *ground truth* information about the participants and visits/sessions available for a dataset.
The manifest file at {{fpath_manifest}} contains *ground truth* information about the participants and visits/sessions available for a dataset.

There must be only **one row** per unique participant/visit combination.

Expand Down
26 changes: 20 additions & 6 deletions nipoppy_cli/nipoppy/cli/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from argparse import ArgumentParser, HelpFormatter, _ActionsContainer, _SubParsersAction
from pathlib import Path

from nipoppy.layout import DEFAULT_LAYOUT_INFO
from nipoppy.utils import (
BIDS_SESSION_PREFIX,
BIDS_SUBJECT_PREFIX,
Expand Down Expand Up @@ -72,13 +73,13 @@ def add_args_pipeline(parser: _ActionsContainer) -> _ActionsContainer:
"--pipeline",
type=str,
required=True,
help="Pipeline name.",
help="Pipeline name, as written in the config file.",
)
parser.add_argument(
"--pipeline-version",
type=str,
required=False,
help="Pipeline version.",
help="Pipeline version, as written in the config file.",
)
return parser

Expand Down Expand Up @@ -178,7 +179,7 @@ def add_subparser_doughnut(
action="store_true",
help=(
"Set all statuses to False in newly added records"
" (regardless of what is on disk)."
" (regardless of what is on disk). May be useful to reduce runtime."
),
)
parser.add_argument(
Expand All @@ -197,7 +198,11 @@ def add_subparser_dicom_reorg(
formatter_class: type[HelpFormatter] = HelpFormatter,
) -> ArgumentParser:
"""Add subparser for reorg command."""
description = "(Re)organize raw DICOM files." # TODO give paths in layout model
description = (
"(Re)organize raw (DICOM) files, from the raw DICOM directory "
f"({DEFAULT_LAYOUT_INFO.dpath_raw_dicom}) to the organized "
f"sourcedata directory ({DEFAULT_LAYOUT_INFO.dpath_sourcedata})."
)
parser = subparsers.add_parser(
COMMAND_DICOM_REORG,
description=description,
Expand All @@ -209,7 +214,16 @@ def add_subparser_dicom_reorg(
parser.add_argument(
"--copy-files",
action="store_true",
help=("Copy files when reorganizing (default: create symlinks)."),
help="Copy files when reorganizing (default: create symlinks).",
)
parser.add_argument(
"--check-dicoms",
action="store_true",
help=(
"Read DICOM file headers when reorganizing and check if they have the "
'"DERIVED" image type (which can be problematic for some BIDS '
"converters). The paths to the derived DICOMs will be written to the log."
),
)
return parser

Expand All @@ -232,7 +246,7 @@ def add_subparser_bids_conversion(
"--pipeline-step",
type=str,
required=False,
help="Pipeline step.",
help="Pipeline step, as written in the config file.",
)
parser = add_args_participant_and_session(parser)
parser = add_arg_simulate(parser)
Expand Down
59 changes: 56 additions & 3 deletions nipoppy_cli/nipoppy/config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,15 @@

from nipoppy.config.container import ModelWithContainerConfig
from nipoppy.config.pipeline import PipelineConfig
from nipoppy.utils import StrOrPathLike, check_session, load_json
from nipoppy.layout import DEFAULT_LAYOUT_INFO
from nipoppy.tabular.dicom_dir_map import DicomDirMap
from nipoppy.utils import (
BIDS_SESSION_PREFIX,
StrOrPathLike,
check_session,
check_session_strict,
load_json,
)


class Config(ModelWithContainerConfig):
Expand All @@ -19,12 +27,33 @@ class Config(ModelWithContainerConfig):
DATASET_NAME: str = Field(description="Name of the dataset")
VISITS: list[str] = Field(description="List of visits available in the study")
SESSIONS: Optional[list[str]] = Field(
default=None,
default=None, # will be a list after validation
description=(
"List of sessions available in the study"
"List of BIDS-compliant sessions available in the study"
f', prefixed with "{BIDS_SESSION_PREFIX}"'
" (inferred from VISITS if not given)"
),
)
DICOM_DIR_MAP_FILE: Optional[Path] = Field(
default=None,
description=(
"Path to a CSV file mapping participant IDs to DICOM directories"
", to be used in the DICOM reorg step. Note: this field and "
"DICOM_DIR_PARTICIPANT_FIRST cannot both be specified"
f'. The CSV should have three columns: "{DicomDirMap.col_participant_id}"'
f' , "{DicomDirMap.col_session}"'
f', and "{DicomDirMap.col_participant_dicom_dir}"'
),
)
DICOM_DIR_PARTICIPANT_FIRST: Optional[bool] = Field(
default=None,
description=(
"Whether subdirectories under the raw dicom directory (default: "
f"{DEFAULT_LAYOUT_INFO.dpath_raw_dicom}) follow the pattern "
"<PARTICIPANT>/<SESSION> (default) or <SESSION>/<PARTICIPANT>. Note: "
"this field and and DICOM_DIR_MAP_FILE cannot both be specified"
),
)
BIDS: dict[str, dict[str, dict[str, PipelineConfig]]] = Field(
default={}, description="Configurations for BIDS converters, if any"
)
Expand All @@ -34,6 +63,27 @@ class Config(ModelWithContainerConfig):

model_config = ConfigDict(extra="allow")

def _check_sessions_have_prefix(self) -> Self:
"""Check that sessions have the BIDS prefix."""
for session in self.SESSIONS:
check_session_strict(session)
return self

def _check_dicom_dir_options(self) -> Self:
"""Check that only one DICOM directory mapping option is given."""
if (
self.DICOM_DIR_MAP_FILE is not None
and self.DICOM_DIR_PARTICIPANT_FIRST is not None
):
raise ValueError(
"Cannot specify both DICOM_DIR_MAP_FILE and DICOM_DIR_PARTICIPANT_FIRST"
)
# otherwise set the default if needed
elif self.DICOM_DIR_PARTICIPANT_FIRST is None:
self.DICOM_DIR_PARTICIPANT_FIRST = True

return self

def _check_no_duplicate_pipeline(self) -> Self:
"""Check that BIDS and PROC_PIPELINES do not have common pipelines."""
bids_pipelines = set(self.BIDS.keys())
Expand All @@ -43,6 +93,7 @@ def _check_no_duplicate_pipeline(self) -> Self:
"Cannot have the same pipeline under BIDS and PROC_PIPELINES"
f", got {bids_pipelines} and {proc_pipelines}"
)
return self

def _propagate_container_config(self) -> Self:
"""Propagate the container config to all pipelines."""
Expand Down Expand Up @@ -82,6 +133,8 @@ def check_input(cls, data: Any):
@model_validator(mode="after")
def validate_and_process(self) -> Self:
"""Validate and process the configuration."""
self._check_sessions_have_prefix()
self._check_dicom_dir_options()
self._check_no_duplicate_pipeline()
self._propagate_container_config()
return self
Expand Down
4 changes: 4 additions & 0 deletions nipoppy_cli/nipoppy/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,3 +309,7 @@ def get_dpath_bids_db(
pipeline_name, pipeline_version, participant=participant, session=session
)
return self.dpath_bids_db / dname


# for printing defaults in docs
DEFAULT_LAYOUT_INFO = DatasetLayout(dpath_root="<DATASET_ROOT>")
33 changes: 25 additions & 8 deletions nipoppy_cli/nipoppy/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pathlib import Path
from typing import Optional

from rich.console import Console
from rich.logging import RichHandler

from nipoppy.utils import StrOrPathLike
Expand All @@ -13,16 +14,32 @@
FORMAT_FILE = "%(asctime)s %(levelname)-7s %(message)s"


def get_logger(name: Optional[str] = None, level: int = logging.INFO) -> logging.Logger:
def get_logger(
name: Optional[str] = "nipoppy", level: int = logging.INFO
) -> logging.Logger:
"""Create/get a logger with rich formatting."""
logging.basicConfig(
level=level,
format=FORMAT_RICH,
datefmt=DATE_FORMAT,
handlers=[RichHandler(show_time=False, markup=True, rich_tracebacks=True)],
force=True,
# create logger
logger = logging.getLogger(name=name)
logger.setLevel(level)

# stream WARNING and above to stderr with rich formatting
stderr_handler = RichHandler(
console=Console(stderr=True), show_time=False, markup=True, rich_tracebacks=True
)
stderr_handler.addFilter(lambda record: record.levelno >= logging.WARNING)
logger.addHandler(stderr_handler)

# stream levels below WARNING to stdout with rich formatting
stdout_handler = RichHandler(
console=Console(stderr=False),
show_time=False,
markup=True,
rich_tracebacks=True,
)
return logging.getLogger(name=name)
stdout_handler.addFilter(lambda record: record.levelno < logging.WARNING)
logger.addHandler(stdout_handler)

return logger


def add_logfile(logger: logging.Logger, fpath_log: StrOrPathLike) -> None:
Expand Down
8 changes: 4 additions & 4 deletions nipoppy_cli/nipoppy/tabular/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ class BaseTabularModel(BaseModel):
Helper class for validating tabular data.
Subclasses should define fields and their types,
and optionally override the _validate_fields() method.
and optionally override the _validate_before_fields() method.
"""

@model_validator(mode="before")
@classmethod
def validate_input(cls, data: Any):
def validate_before(cls, data: Any):
"""Validate the raw input."""
if isinstance(data, dict):
# generic validation
Expand Down Expand Up @@ -52,12 +52,12 @@ def validate_input(cls, data: Any):

# model-specific validation
# to be overridden in subclass if needed
data = cls.validate_fields(data)
data = cls._validate_before_fields(data)

return data

@classmethod
def validate_fields(cls, data: dict):
def _validate_before_fields(cls, data: dict):
"""Validate model-specific fields. To be overridden in subclass if needed."""
return data

Expand Down
Loading

0 comments on commit 8991806

Please sign in to comment.