diff --git a/.gitignore b/.gitignore index f877086d..4a624115 100644 --- a/.gitignore +++ b/.gitignore @@ -33,3 +33,7 @@ env/ # VS Code .vscode/ + +# docs +nipoppy_cli/docs/build +nipoppy_cli/docs/source/schemas/*.json diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..c4859bda --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,27 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + jobs: + pre_build: + - python nipoppy_cli/docs/scripts/pydantic_to_jsonschema.py + +python: + install: + - method: pip + path: nipoppy_cli + extra_requirements: + - doc + +# Build documentation with Sphinx +sphinx: + configuration: nipoppy_cli/docs/source/conf.py + fail_on_warning: true diff --git a/README.md b/README.md index 7d542dba..c27b35ad 100644 --- a/README.md +++ b/README.md @@ -1,26 +1,30 @@ -# Nipoppy: Parkinson's Progression Markers Initiative dataset +# Nipoppy -This repository contains code to process tabular and imaging data from the Parkinson's Progression Markers Initiative (PPMI) dataset. It is a fork of the main [Nipoppy](https://github.com/neurodatascience/nipoppy) repository. Nipoppy is a lightweight workflow management and harmonization tools for MRI and clinical data. This fork adds scripts, configuration files, and downstream analyses that are specific to PPMI. +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8084759.svg)](https://doi.org/10.5281/zenodo.8084759) +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/license/mit) +[![codecov](https://codecov.io/gh/neurodatascience/nipoppy/graph/badge.svg?token=SN38ITRO4M)](https://codecov.io/gh/neurodatascience/nipoppy) +[![https://github.com/psf/black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://black.readthedocs.io/en/stable/) +[![Documentation Status](https://readthedocs.org/projects/nipoppy/badge/?version=latest)](https://nipoppy.readthedocs.io/en/latest/?badge=latest) -## BIDS data file naming +Nipoppy is a lightweight framework for standardized organization and processing of neuroimaging-clinical datasets. Its goal is to help users adopt the +[FAIR](https://www.go-fair.org/fair-principles/) principles +and improve the reproducibility of studies. - -The [tabular/ppmi_imaging_descriptions.json](https://github.com/neurodatascience/nipoppy-ppmi/blob/main/nipoppy/workflow/tabular/ppmi_imaging_descriptions.json) file is used to determine the BIDS datatype and suffix (contrast) associated with an image's MRI series description. It will be updated as new data is processed. +The framework includes three components: -Here is a description of the available BIDS data and the tags that can appear in their filenames: +1. A specification for dataset organization that extends the [Brain Imaging Data Structure (BIDS) standard](https://bids.neuroimaging.io/) by providing additional guidelines for tabular (e.g., phenotypic) data and imaging derivatives. -- `anat` - - The available suffixes are: `T1w`, `T2w`, `T2starw`, and `FLAIR` - - Most images have an `acq` tag: - - Non-neuromelanin images: `acq-`, where - - `` is one of: `sag`, `ax`, or `cor` (for sagittal, axial, or coronal scans respectively) - - `` is one of: `2D`, or `3D` - - Neuromelanin images: `acq-NM` - - For some images, the acquisition plane (`sag`/`ax`/`cor`) or type (`2D`/`3D`) cannot be easily obtained. In those cases, the filename will not contain an `acq` tag. -- `dwi` - - All imaging files have the `dwi` suffix. - - Most images have a `dir` tag corresponding to the phase-encoding direction. This is one of: `LR`, `RL`, `AP`, or `PA` - - Images where the phase-encoding direction cannot be easily inferred from the series description string do not have a `dir` tag. - - Some participants have multi-shell sequences for their diffusion data. These files will have an additional `acq-B` tag, where `value` is the b-value for that sequence. + ![Nipoppy specification](nipoppy_cli/docs/source/_static/img/nipoppy_specification.jpg) -Currently, only structural (`anat`) and diffusion (`dwi`) MRI data are supported. Functional (`func`) data has not been converted to the BIDS format yet. +2. A protocol for data organization, curation and processing, with steps that include the following: + - **Organization** of raw data, including conversion of raw DICOMs (or NIfTIs) to [BIDS](https://bids.neuroimaging.io/) + - **Processing** of imaging data with existing or custom pipelines + - **Tracking** of data availability and processing status + - **Extraction** of imaging-derived phenotypes (IDPs) for downstream statistical modelling and analysis + + ![Nipoppy protocol](nipoppy_cli/docs/source/_static/img/nipoppy_protocol.jpg) + +3. A **command-line interface** and **Python package** that provide user-friendly tools for applying the framework. The tools build upon existing technologies such as the [Apptainer container platform](https://apptainer.org/) and the [Boutiques descriptor framework](https://boutiques.github.io/). Several existing containerized pipelines are supported out-of-the-box, and new pipelines can be added easily by the user. + - We have also developed a [**web dashboard**](https://digest.neurobagel.org) for interactive visualizations of imaging and phenotypic data availability. + +See the [documentation website](https://neurobagel.org/nipoppy/overview/) for more information! diff --git a/README_PPMI.md b/README_PPMI.md new file mode 100644 index 00000000..7d542dba --- /dev/null +++ b/README_PPMI.md @@ -0,0 +1,26 @@ +# Nipoppy: Parkinson's Progression Markers Initiative dataset + +This repository contains code to process tabular and imaging data from the Parkinson's Progression Markers Initiative (PPMI) dataset. It is a fork of the main [Nipoppy](https://github.com/neurodatascience/nipoppy) repository. Nipoppy is a lightweight workflow management and harmonization tools for MRI and clinical data. This fork adds scripts, configuration files, and downstream analyses that are specific to PPMI. + +## BIDS data file naming + + +The [tabular/ppmi_imaging_descriptions.json](https://github.com/neurodatascience/nipoppy-ppmi/blob/main/nipoppy/workflow/tabular/ppmi_imaging_descriptions.json) file is used to determine the BIDS datatype and suffix (contrast) associated with an image's MRI series description. It will be updated as new data is processed. + +Here is a description of the available BIDS data and the tags that can appear in their filenames: + +- `anat` + - The available suffixes are: `T1w`, `T2w`, `T2starw`, and `FLAIR` + - Most images have an `acq` tag: + - Non-neuromelanin images: `acq-`, where + - `` is one of: `sag`, `ax`, or `cor` (for sagittal, axial, or coronal scans respectively) + - `` is one of: `2D`, or `3D` + - Neuromelanin images: `acq-NM` + - For some images, the acquisition plane (`sag`/`ax`/`cor`) or type (`2D`/`3D`) cannot be easily obtained. In those cases, the filename will not contain an `acq` tag. +- `dwi` + - All imaging files have the `dwi` suffix. + - Most images have a `dir` tag corresponding to the phase-encoding direction. This is one of: `LR`, `RL`, `AP`, or `PA` + - Images where the phase-encoding direction cannot be easily inferred from the series description string do not have a `dir` tag. + - Some participants have multi-shell sequences for their diffusion data. These files will have an additional `acq-B` tag, where `value` is the b-value for that sequence. + +Currently, only structural (`anat`) and diffusion (`dwi`) MRI data are supported. Functional (`func`) data has not been converted to the BIDS format yet. diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 46777cd7..00000000 --- a/docs/README.md +++ /dev/null @@ -1,67 +0,0 @@ -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8084759.svg)](https://doi.org/10.5281/zenodo.8084759) - -# nipoppy - -A workflow manager for curating MRI and tabular data and standardized processing. - -_Pull-Organize-Process-Push-Yey!_ - -**Note: This is a template branch which is typically customized for a specific dataset** - -## Documentation - -### nipoppy modules - -- [nipoppy](https://neurobagel.org/nipoppy/overview/) - -### Individual containerized pipelines: - -- [Heudionv](https://heudiconv.readthedocs.io/en/latest/installation.html#singularity) -- [MRIQC](https://mriqc.readthedocs.io/en/stable/) -- [fMRIPrep](https://fmriprep.org/en/1.5.5/singularity.html) -- [TractoFlow](https://github.com/scilus/tractoflow) -- [MAGeT Brain](https://github.com/CoBrALab/MAGeTbrain) - -### GH workflow for contributing to template and dataset-specific forks - -![nipoppy_gh_contribution_workflow](https://user-images.githubusercontent.com/29051929/236941655-f7dcc981-a2f4-4f3f-b1fc-8c2afdfcc8cf.png) - -### Organization -* Under the `neurodatascience` GitHub organization: - * The `nipoppy(*)` ("template") repository contains all common code-base: `neurodatascience/nipoppy` - * Make a fork for each dataset: `neurodatascience/nipoppy-[dataset]` -* Under the `user` GitHub account: - * Make a fork of `nipoppy(*)` ("template") repository: `/nipoppy` -* Local machine - * Clone all the `neurodatascience/nipoppy-[dataset]` and the `/nipoppy` repos. - -### Basic principles -* `nipoppy(*)` is the code-base common across all dataset forks -* `nipoppy-[dataset]` will have additional files but there **should not be** different versions of the same file (including `README.md`) between `nipoppy(*)` and `nipoppy-[dataset]` -* Branch-protection are set to avoid direct commits to all main branches. Contributions should be done through PRs only -* Updates to `nipoppy(*)` and `nipoppy-[dataset]` will follow separate paths requiring different repo-clones on the local machine -* GH-Actions are used to distribute common changes from `nipoppy(*)` to `nipoppy-[dataset]` -* Nothing is pushed from `neurodatascience/nipoppy-[dataset]` to `neurodatascience/nipoppy` - -### Contribution steps: - * Changes that apply to all datasets (e.g. bids conversion, pipeline run scripts, tracker scripts): - * Make a user fork of `neurodatascience/nipoppy`: `[gh-username]/nipoppy` - * Clone `[gh-username]/nipoppy` locally - * Push to `[gh-username]/nipoppy` on GitHub - * PR from `[gh-username]/nipoppy` to `neurodatascience/nipoppy` - * Any time a PR is merged to `neurodatascience/nipoppy:main`, the newly added changes (common across dataset) will propagate automatically to **all** `neurodatascience/nipoppy-[dataset]` forks through a GitHub Actions workflow - * In each dataset fork, the `main-upstream` branch is created/updated automatically to match `neurodatascience/nipoppy:main` - * A PR labelled "automerge" is automatically created to incorporate changes from `main-upstream` to `main` - * "Automerge" PRs are approved and merged automatically if there are no merge conflicts - * If merge conflicts exist, they must be resolved manually. Then the PR needs to be **merged**, without squashing or rebasing - * Changes that apply to individual datasets (e.g. dicom wrangling, statistical analysis) - * Clone `neurodatascience/nipoppy-[dataset]` locally - * Make a new branch when working on a new feature - * `main` is protected on these forks as well - all contributions have to be made through dev branches. - * ***IMPORTANT***: need to be careful with branch names, they should be unique - * PR from `neurodatascience/nipoppy-[dataset]:[feature_branch]` to `neurodatascience/nipoppy-[dataset]:main` - * Delete branch when done - * Adding a new dataset - * Make a new fork of the template repo (`neurodatascience/nipoppy`) called `neurodatascience/nipoppy-[dataset]` - * Update the GitHub Actions workflow file to add the new fork to the job matrix - * Add dataset-specific files to the fork and begin processing (see the [`nipoppy` documentation](https://neurobagel.org/nipoppy/workflow/dicom_org/) for more information) diff --git a/nipoppy/extractors/fmriprep/run_FC.py b/nipoppy/extractors/fmriprep/run_FC.py index 2fe940d7..0b99d9a2 100644 --- a/nipoppy/extractors/fmriprep/run_FC.py +++ b/nipoppy/extractors/fmriprep/run_FC.py @@ -229,7 +229,7 @@ def run(participant_id: str, if output_dir is None: output_dir = f"{DATASET_ROOT}/derivatives/" - fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{FMRIPREP_VERSION}/output" + fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{FMRIPREP_VERSION}/output" DKT_dir = f"{DATASET_ROOT}/derivatives/networks/0.9.0/output" FC_dir = f"{output_dir}/FC" @@ -290,4 +290,4 @@ def run(participant_id: str, with open(FC_config_file, 'r') as f: FC_configs = json.load(f) - run(participant_id, global_configs, FC_configs, session_id, output_dir) \ No newline at end of file + run(participant_id, global_configs, FC_configs, session_id, output_dir) diff --git a/nipoppy/extractors/freesurfer/run_structural_measures.py b/nipoppy/extractors/freesurfer/run_structural_measures.py index 3034f98d..50437440 100644 --- a/nipoppy/extractors/freesurfer/run_structural_measures.py +++ b/nipoppy/extractors/freesurfer/run_structural_measures.py @@ -14,10 +14,10 @@ # Globals # Brainload has two separate functions to extract aseg data. measure_column_names = ["StructName","Structure","Description","Volume_mm3", "unit"] -aseg_cols = ["StructName", "Volume_mm3"] -dkt_cols = ["StructName", "ThickAvg"] def get_aseg_stats(participant_stats_dir, aseg_cols): + """ Parses the aseg.stats file + """ aseg_cols = ["StructName", "Volume_mm3"] aseg_stats = bl.stat(f'{participant_stats_dir}/aseg.stats') table_df = pd.DataFrame(aseg_stats["table_data"], columns=aseg_stats["table_column_headers"])[aseg_cols] @@ -25,17 +25,19 @@ def get_aseg_stats(participant_stats_dir, aseg_cols): _df = pd.concat([table_df,measure_df],axis=0) return _df -def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): +def get_DKT_stats(participant_stats_dir, dkt_cols, parcel="aparc.DKTatlas"): + """ Parses the <>.aparc.DKTatlas.stats file + """ hemi = "lh" stat_file = f"{hemi}.{parcel}.stats" lh_dkt_stats = bl.stat(f'{participant_stats_dir}/{stat_file}') - lh_df = pd.DataFrame(lh_dkt_stats["table_data"], columns=lh_dkt_stats["table_column_headers"])[aparc_cols] + lh_df = pd.DataFrame(lh_dkt_stats["table_data"], columns=lh_dkt_stats["table_column_headers"])[dkt_cols] lh_df["hemi"] = hemi hemi = "rh" stat_file = f"{hemi}.{parcel}.stats" - rh_dkt_stats = bl.stat(f'{participant_stats_dir}/rh.aparc.DKTatlas.stats') - rh_df = pd.DataFrame(rh_dkt_stats["table_data"], columns=rh_dkt_stats["table_column_headers"])[aparc_cols] + rh_dkt_stats = bl.stat(f'{participant_stats_dir}/{stat_file}') + rh_df = pd.DataFrame(rh_dkt_stats["table_data"], columns=rh_dkt_stats["table_column_headers"])[dkt_cols] rh_df["hemi"] = hemi _df = pd.concat([lh_df,rh_df], axis=0) @@ -52,7 +54,7 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): parser.add_argument('--FS_config', type=str, help='path to freesurfer configs for a given nipoppy dataset', required=True) parser.add_argument('--participants_list', default=None, help='path to participants list (csv or tsv') parser.add_argument('--session_id', type=str, help='session id for the participant', required=True) -parser.add_argument('--save_dir', default='./', help='path to save_dir') +parser.add_argument('--output_dir', default=None, help='path to save extracted output (default: derivatives/freesurfer//IDP/)') args = parser.parse_args() @@ -60,9 +62,8 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): FS_config_file = args.FS_config participants_list = args.participants_list session_id = args.session_id -save_dir = args.save_dir - session = f"ses-{session_id}" +output_dir = args.output_dir # Read global configs with open(global_config_file, 'r') as f: @@ -77,9 +78,12 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): stat_configs = FS_configs["stat_configs"] stat_config_names = stat_configs.keys() -print(f"Using dataset root: {DATASET_ROOT} and FreeSurfer version: {FS_version}") +print(f"Using dataset root: {DATASET_ROOT} and FreeSurfer version: v{FS_version}") print(f"Using stat configs: {stat_config_names}") +if output_dir == None: + output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/v{FS_version}/IDP/{session}/" + if participants_list == None: # use doughnut doughnut_file = f"{DATASET_ROOT}/scratch/raw_dicom/doughnut.csv" @@ -97,17 +101,17 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): # Extract stats for each participant -fs_output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/{FS_version}/output/{session}/" +fs_output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/v{FS_version}/output/{session}/" aseg_df = pd.DataFrame() -aparc_df = pd.DataFrame() +dkt_df = pd.DataFrame() for participant_id in bids_participants: participant_stats_dir = f"{fs_output_dir}{participant_id}/stats/" print(f"Extracting stats for participant: {participant_id}") for config_name, config_cols in stat_configs.items(): print(f"Extracting data for config: {config_name}") - if config_name.strip() == "aseg": + if config_name.strip().lower() == "aseg": try: _df = get_aseg_stats(participant_stats_dir, config_cols) # transpose it to wideform @@ -122,9 +126,9 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): except: print(f"Error parsing aseg data for {participant_id}") - elif config_name.strip() == "aparc": + elif config_name.strip().lower() == "dkt": try: - _df = get_aparc_stats(participant_stats_dir, config_cols) + _df = get_DKT_stats(participant_stats_dir, config_cols) # transpose it to wideform names_col = config_cols[0] values_col = config_cols[1] @@ -132,26 +136,25 @@ def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): vals = [participant_id] + list(_df[values_col]) _df_wide = pd.DataFrame(columns=cols) _df_wide.loc[0] = vals - aparc_df = pd.concat([aparc_df,_df_wide], axis=0) + dkt_df = pd.concat([dkt_df,_df_wide], axis=0) except Exception as e: - print(f"Error parsing aparc data for {participant_id} with exception: {e}") + print(f"Error parsing dkt data for {participant_id} with exception: {e}") else: print(f"Unknown stat config: {config_name}") # Save configs -print(f"Saving collated stat tables at: {save_dir}") -aseg_csv = f"{save_dir}/aseg.csv" -aparc_csv = f"{save_dir}/aparc.csv" +print(f"Saving collated stat tables at: {output_dir}") +aseg_csv = f"{output_dir}/aseg.csv" +dkt_csv = f"{output_dir}/dkt.csv" if len(aseg_df) > 0: aseg_df.to_csv(aseg_csv, index=None) else: print("aseg_df is empty") -if len(aparc_df) > 0: - aparc_df.to_csv(aparc_csv, index=None) +if len(dkt_df) > 0: + dkt_df.to_csv(dkt_csv, index=None) else: - print("aparc_df is empty") - + print("dkt_df is empty") \ No newline at end of file diff --git a/nipoppy/extractors/maget_brain/prepare_data.py b/nipoppy/extractors/maget_brain/prepare_data.py index 24be4969..81897d0c 100644 --- a/nipoppy/extractors/maget_brain/prepare_data.py +++ b/nipoppy/extractors/maget_brain/prepare_data.py @@ -50,15 +50,17 @@ def get_masked_image(img_path, mask_path, masked_img_path): fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{fmriprep_version}/output/" maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/{maget_version}/output/" maget_preproc_T1w_nii_dir = f"{maget_dir}/ses-{session_id}/preproc_T1w_nii/" +maget_proc_list_file = f"{maget_preproc_T1w_nii_dir}proc_participant.csv" # Check / create maget subdirs Path(maget_preproc_T1w_nii_dir).mkdir(parents=True, exist_ok=True) -# get all the subject ids -manifest_csv = f"{DATASET_ROOT}/tabular/manifest.csv" -manifest_df = pd.read_csv(manifest_csv) -bids_id_list = manifest_df["bids_id"].unique() +# get all the subject ids from the doughnut +doughnut_csv = f"{DATASET_ROOT}/scratch/raw_dicom/doughnut.csv" +doughnut_df = pd.read_csv(doughnut_csv) +bids_id_list = doughnut_df["bids_id"].unique() +proc_participants = [] # To be replaced when maget-brain tracker is written... for bids_id in bids_id_list: if run_id == None: img_file_name = f"{bids_id}_ses-{session_id}_desc-preproc_T1w.nii.gz" @@ -74,8 +76,14 @@ def get_masked_image(img_path, mask_path, masked_img_path): mask_path = f"{fmriprep_dir}/{bids_id}/ses-{session_id}/anat/{mask_file_name}" masked_img_path = f"{maget_preproc_T1w_nii_dir}/{masked_img_file_name}" - try: - get_masked_image(img_path, mask_path, masked_img_path) - except Exception as e: - print(e) - \ No newline at end of file + # Check if the masked image exists + if os.path.isfile(masked_img_path): + print(f"Participant segmentation already exist: {bids_id}") + else: + try: + get_masked_image(img_path, mask_path, masked_img_path) + proc_participants.append(bids_id) + except Exception as e: + print(e) + +pd.DataFrame(data=proc_participants).to_csv(maget_proc_list_file, header=False, index=False) \ No newline at end of file diff --git a/nipoppy/trackers/run_tracker.py b/nipoppy/trackers/run_tracker.py index 5f389140..bc5cc996 100755 --- a/nipoppy/trackers/run_tracker.py +++ b/nipoppy/trackers/run_tracker.py @@ -41,6 +41,7 @@ "fmriprep": ["anat"], "mriqc": ["anat"], "tractoflow": ["anat", "dwi"], + "maget_brain": ["anat"] } ALL_DATATYPES = sorted(["anat", "dwi", "func", "fmap"]) BIDS_PIPES = ["mriqc","fmriprep"] diff --git a/nipoppy_cli/README.md b/nipoppy_cli/README.md deleted file mode 100644 index 8c731ae2..00000000 --- a/nipoppy_cli/README.md +++ /dev/null @@ -1,26 +0,0 @@ -![License](https://img.shields.io/badge/license-MIT-blue.svg) -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8084759.svg)](https://doi.org/10.5281/zenodo.8084759) -![https://github.com/psf/black](https://img.shields.io/badge/code%20style-black-000000.svg) - -# Nipoppy - -A framework for standardized organization and processing of neuroimaging-clinical datasets. - -## Developer setup - -Clone this repo: - -```bash -git clone https://github.com/neurodatascience/nipoppy.git -``` - -Install `nipoppy` in editable mode with `dev` dependencies: -```bash -cd nipoppy -pip install -e .[dev] -``` - -Set up `pre-commit`: -```bash -pre-commit install -``` diff --git a/nipoppy_cli/docs/Makefile b/nipoppy_cli/docs/Makefile new file mode 100644 index 00000000..71398749 --- /dev/null +++ b/nipoppy_cli/docs/Makefile @@ -0,0 +1,21 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + ./scripts/pydantic_to_jsonschema.py + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/nipoppy_cli/docs/make.bat b/nipoppy_cli/docs/make.bat new file mode 100644 index 00000000..747ffb7b --- /dev/null +++ b/nipoppy_cli/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/nipoppy_cli/docs/scripts/pydantic_to_jsonschema.py b/nipoppy_cli/docs/scripts/pydantic_to_jsonschema.py new file mode 100755 index 00000000..36c65cb3 --- /dev/null +++ b/nipoppy_cli/docs/scripts/pydantic_to_jsonschema.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +"""Script to automatically generate JSON schema files for Pydantic models.""" + +import json +from pathlib import Path + +from nipoppy.config.boutiques import BoutiquesConfig +from nipoppy.config.main import Config +from nipoppy.layout import LayoutConfig +from nipoppy.tabular.bagel import BagelModel +from nipoppy.tabular.doughnut import DoughnutModel +from nipoppy.tabular.manifest import ManifestModel + +DPATH_SCHEMA = Path(__file__).parent / ".." / "source" / "schemas" + +MODEL_FILENAME_MAP = { + BoutiquesConfig: "boutiques.json", + Config: "config.json", + LayoutConfig: "layout.json", + BagelModel: "bagel.json", + DoughnutModel: "doughnut.json", + ManifestModel: "manifest.json", +} + +if __name__ == "__main__": + # make sure schemas directory exists + if not DPATH_SCHEMA.exists(): + print(f"\tCreating {DPATH_SCHEMA}") + DPATH_SCHEMA.mkdir(parents=True) + + # generate schema files + for model, filename in MODEL_FILENAME_MAP.items(): + print(f"\tWriting JSON schema for {model.__name__} to {filename}") + fpath_schema = DPATH_SCHEMA / filename + + schema = model.model_json_schema() + + # move singularity config to last property + try: + CONTAINER_CONFIG = schema["properties"]["CONTAINER_CONFIG"] + del schema["properties"]["CONTAINER_CONFIG"] + schema["properties"]["CONTAINER_CONFIG"] = CONTAINER_CONFIG + except KeyError: + pass + + # # TODO figure out $ref/$$target things + # try: + # for subschema_name, subschema in schema["definitions"].items(): + # pass + # except KeyError: + # pass + + schema_str = json.dumps(schema, indent=4) + fpath_schema.write_text(schema_str) diff --git a/nipoppy_cli/docs/source/_static/img/nipoppy_protocol.jpg b/nipoppy_cli/docs/source/_static/img/nipoppy_protocol.jpg new file mode 100644 index 00000000..0a6f5637 Binary files /dev/null and b/nipoppy_cli/docs/source/_static/img/nipoppy_protocol.jpg differ diff --git a/nipoppy_cli/docs/source/_static/img/nipoppy_specification.jpg b/nipoppy_cli/docs/source/_static/img/nipoppy_specification.jpg new file mode 100644 index 00000000..09b13cce Binary files /dev/null and b/nipoppy_cli/docs/source/_static/img/nipoppy_specification.jpg differ diff --git a/nipoppy_cli/docs/source/_static/theme.css b/nipoppy_cli/docs/source/_static/theme.css new file mode 100644 index 00000000..729600cc --- /dev/null +++ b/nipoppy_cli/docs/source/_static/theme.css @@ -0,0 +1,7 @@ +.wy-table-responsive table td { + white-space: normal !important; +} + +.wy-table-responsive { + overflow: visible !important; +} diff --git a/nipoppy_cli/docs/source/_templates/autoapi/index.rst b/nipoppy_cli/docs/source/_templates/autoapi/index.rst new file mode 100644 index 00000000..8fe95514 --- /dev/null +++ b/nipoppy_cli/docs/source/_templates/autoapi/index.rst @@ -0,0 +1,15 @@ +Python API +========== + +This page contains auto-generated API reference documentation [#f1]_. + +.. toctree:: + :titlesonly: + + {% for page in pages %} + {% if page.top_level_object and page.display %} + {{ page.include_path }} + {% endif %} + {% endfor %} + +.. [#f1] Created with `sphinx-autoapi `_ diff --git a/nipoppy_cli/docs/source/cli_reference/convert.md b/nipoppy_cli/docs/source/cli_reference/convert.md new file mode 100644 index 00000000..eadacebd --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/convert.md @@ -0,0 +1,14 @@ +# `nipoppy convert` + +```{note} +This command calls the {py:class}`nipoppy.workflows.bids_conversion.BidsConversionRunner` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: bidsify +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/doughnut.md b/nipoppy_cli/docs/source/cli_reference/doughnut.md new file mode 100644 index 00000000..97b3fbb3 --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/doughnut.md @@ -0,0 +1,14 @@ +# `nipoppy doughnut` + +```{note} +This command calls the {py:class}`nipoppy.workflows.doughnut.DoughnutWorkflow` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: doughnut +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/index.md b/nipoppy_cli/docs/source/cli_reference/index.md new file mode 100644 index 00000000..067b77d2 --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/index.md @@ -0,0 +1,26 @@ +# Command-line interface + +```{toctree} +--- +hidden: +includehidden: +titlesonly: +--- + +init.md +doughnut.md +reorg.md +convert.md +run.md +track.md +``` + + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nosubcommands: true +noepilog: +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/init.md b/nipoppy_cli/docs/source/cli_reference/init.md new file mode 100644 index 00000000..a5d33dff --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/init.md @@ -0,0 +1,14 @@ +# `nipoppy init` + +```{note} +This command calls the {py:class}`nipoppy.workflows.dataset_init.InitWorkflow` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: init +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/reorg.md b/nipoppy_cli/docs/source/cli_reference/reorg.md new file mode 100644 index 00000000..7fb7d511 --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/reorg.md @@ -0,0 +1,14 @@ +# `nipoppy reorg` + +```{note} +This command calls the {py:class}`nipoppy.workflows.dicom_reorg.DicomReorgWorkflow` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: reorg +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/run.md b/nipoppy_cli/docs/source/cli_reference/run.md new file mode 100644 index 00000000..4f943f1e --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/run.md @@ -0,0 +1,14 @@ +# `nipoppy run` + +```{note} +This command calls the {py:class}`nipoppy.workflows.runner.PipelineRunner` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: run +--- +``` diff --git a/nipoppy_cli/docs/source/cli_reference/track.md b/nipoppy_cli/docs/source/cli_reference/track.md new file mode 100644 index 00000000..cdf2ae59 --- /dev/null +++ b/nipoppy_cli/docs/source/cli_reference/track.md @@ -0,0 +1,14 @@ +# `nipoppy track` + +```{note} +This command calls the {py:class}`nipoppy.workflows.tracker.PipelineTracker` class from the Python {term}`API` internally. +``` + +```{argparse} +--- +ref: nipoppy.cli.parser.get_global_parser +prog: nipoppy +nodefault: true +path: track +--- +``` diff --git a/nipoppy_cli/docs/source/conf.py b/nipoppy_cli/docs/source/conf.py new file mode 100644 index 00000000..3620d600 --- /dev/null +++ b/nipoppy_cli/docs/source/conf.py @@ -0,0 +1,108 @@ +"""Configuration file for the Sphinx documentation builder. + +For the full list of built-in configuration values, see the documentation: +https://www.sphinx-doc.org/en/master/usage/configuration.html +""" + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "Nipoppy" +copyright = "2024, NeuroDataScience-ORIGAMI Lab" +author = "NeuroDataScience-ORIGAMI Lab" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "autoapi.extension", + "myst_parser", + "sphinxarg.ext", + "sphinx_copybutton", + "sphinx-jsonschema", + "sphinx_togglebutton", + "sphinx.ext.autodoc.typehints", + "sphinx.ext.intersphinx", + "sphinx.ext.napoleon", +] + +templates_path = ["_templates"] +exclude_patterns = [] + +nitpicky = True + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "furo" +html_static_path = ["_static"] + +# -- Furo configuration ------------------------------------------------------ +# https://pradyunsg.me/furo/customisation/#customisation +html_theme_options = { + "source_repository": "https://github.com/neurodatascience/nipoppy", + "source_branch": "main", + "source_directory": "nipoppy_cli/docs/source", +} + +# -- Intersphinx configuration ------------------------------------------------ +# https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html + +intersphinx_mapping = { + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "pydantic": ("https://docs.pydantic.dev/latest/", None), + "python": ("https://docs.python.org/3", None), +} + +# -- MyST configuration ------------------------------------------------------- + +myst_enable_extensions = ["fieldlist"] + +# -- Autodoc/AutoAPI configuration ---------------------------------------------------- + +autodoc_typehints = "description" + +autoapi_dirs = ["../../nipoppy"] +autoapi_options = [ + "members", + "undoc-members", + # "private-members", + "show-inheritance", + # "show-module-summary", + # "special-members", + "imported-members", +] +autoapi_member_order = "groupwise" +autoapi_own_page_level = "class" +autoapi_template_dir = "_templates/autoapi" + +# ignore some auto doc related warnings +# see https://github.com/sphinx-doc/sphinx/issues/10785 +nitpick_ignore = [ + ("py:class", "Path"), + ("py:class", "optional"), + ("py:class", "pd.DataFrame"), + ("py:class", "bids.BIDSLayout"), + ("py:class", "argparse.HelpFormatter"), + ("py:class", "argparse._SubParsersAction"), + ("py:class", "argparse._ActionsContainer"), +] + +# -- Copybutton configuration ------------------------------------------------- +copybutton_exclude = ".linenos, .gp" + +# -- JSON Schema configuration ------------------------------------------------ +jsonschema_options = { + "lift_definitions": True, + "auto_reference": True, + "auto_target": True, +} + +# # TODO +# def linkcode_resolve(domain, info): +# if domain != "py": +# return None +# if not info["module"]: +# return None +# filename = info["module"].replace(".", "/") +# return f"https://github.com/"" diff --git a/nipoppy_cli/docs/source/contributing.md b/nipoppy_cli/docs/source/contributing.md new file mode 100644 index 00000000..8619acd6 --- /dev/null +++ b/nipoppy_cli/docs/source/contributing.md @@ -0,0 +1,35 @@ +# Contributing + +Nipoppy is under active development, and we welcome outside contributions! + +Please report bugs or start a conversation about potential enhancements/new features by opening a new [GitHub issue](https://github.com/neurodatascience/nipoppy/issues/new). + +## Developer setup + +Fork the [repository](https://github.com/neurodatascience/nipoppy) on GitHub, then clone it and **install it with `dev` dependencies**, following instructions from [here](#github-install-section). + +Set up [`pre-commit`](https://pre-commit.com/) to apply automatic formatting/linting/etc. when making a new commit: +```{code-block} console +$ pre-commit install +``` + +## Running the test suite + +Within the root directory of the repo, run: +```{code-block} console +$ python -m pytest +``` + +## Building the documentation + +Move the the `docs` directory: +```{code-block} console +$ cd docs +``` + +Run: +```{code-block} console +$ make html +``` + +Then open the `build/html/index.html` file in a browser. diff --git a/nipoppy_cli/docs/source/glossary.md b/nipoppy_cli/docs/source/glossary.md new file mode 100644 index 00000000..8c0c3858 --- /dev/null +++ b/nipoppy_cli/docs/source/glossary.md @@ -0,0 +1,34 @@ +# Glossary + +```{glossary} + +API + Application Programming Interface, how software interacts with other software. + +CLI + Command-line interface, i.e. software that can be run in the Terminal. + +BIDS + The Brain Imaging Data Structure, a community standard for organizing neuroimaging (and other) data. See the [BIDS website](https://bids.neuroimaging.io/) for more information. + +Boutiques + A flexible framework for describing and executing command-line tools. Boutiques is based on JSON *descriptor* files that list tool inputs, outputs, error codes, and more. JSON *invocation* files are used to specify runtime parameters. See the [website](https://boutiques.github.io/) for more information. + +`conda` + An package and environment manager for Python (and other) environments. See the [`conda` website](https://conda.io/projects/conda/en/latest/user-guide/getting-started.html) for more information. + +HPC + High-perfomance computing system, i.e. a compute cluster or supercomputer. + +JSON + JavaScript Object Notation, a file format for storing and sharing data. JSON structures are combinations of *objects* (key-value pairs) and *arrays* (ordered lists). See the [website](https://www.json.org/json-en.html) for more information. + +MRI + Magnetic resonance imaging, the most widely used neuroimaging modality. + +PyPI + The [Python Package Index](https://pypi.org/), a repository of Python packages that are `pip`-installable. + +`venv` + A Python built-in library for creating Python virtual environments. See the [Python documentation](https://docs.python.org/3/library/venv.html) for more information. +``` diff --git a/nipoppy_cli/docs/source/index.md b/nipoppy_cli/docs/source/index.md new file mode 100644 index 00000000..845a8660 --- /dev/null +++ b/nipoppy_cli/docs/source/index.md @@ -0,0 +1,52 @@ +```{include} ../../../README.md +--- +relative-docs: nipoppy_cli/docs/ +relative-images: +end-before: See the [documentation website] +--- +``` + +To get started, see the [Installation instructions](#installation) and/or the [Quickstart guide](#quickstart). + +```{toctree} +--- +hidden: +includehidden: +titlesonly: +caption: Overview +--- +installation +quickstart +``` + +```{toctree} +--- +hidden: +includehidden: +titlesonly: +caption: Detailed user guide +--- +``` + +```{toctree} +--- +hidden: +includehidden: +titlesonly: +caption: Reference +--- +cli_reference/index +autoapi/index +schemas/index +``` + +```{toctree} +--- +hidden: +includehidden: +titlesonly: +caption: Other +--- +contributing +glossary +``` diff --git a/nipoppy_cli/docs/source/installation.md b/nipoppy_cli/docs/source/installation.md new file mode 100644 index 00000000..6ecfb890 --- /dev/null +++ b/nipoppy_cli/docs/source/installation.md @@ -0,0 +1,129 @@ +(installation-instructions)= +# Installation + +Nipoppy is a Python package. We recommend installing it in a new or existing Python environment. The most common ways to create Python environments are through {term}`conda` and {term}`venv`. + +```{note} +If you already have an existing Python environment setup, you can go directly to the [](#pip-install-section) section. +``` + +## Setting up a Python environment + +```{tip} +If you do not already have Python set up on your system and/or wish to run Nipoppy locally, we recommend using {term}`conda` instead of {term}`venv`. +``` + +### {term}`conda` setup + +Install `conda` (e.g. through Miniconda) following instructions from [here](https://conda.io/projects/conda/en/latest/user-guide/install/index.html). + +Create a new environment (if needed) with Python version of at least `3.9`. Here we call it `nipoppy_env`, but it can be named anything. In a Terminal window, run: +```{code-block} console +$ conda create --name nipoppy_env python=3.12 +``` + +```{tip} +The [conda cheatsheet](https://docs.conda.io/projects/conda/en/latest/user-guide/cheatsheet.html) is a useful reference for the most commonly used `conda` commands. +``` + +Activate the environment, e.g. by running: +```{code-block} console +$ conda activate nipoppy_env +``` + +### {term}`venv` setup + +*Note: These instructions assume you have an appropriate Python version installed.* + +Create the Python virtual environment in a directory of your choice. Here we call it `nipoppy_env`, but it can be named anything. In a Terminal window, run: +```{code-block} console +$ python -m venv nipoppy_env +``` + +```{note} +If you have multiple versions of Python installed, you should specify which one to use (e.g. `python3.12` instead of `python` in the previous command) +``` + +````{admonition} On Compute Canada/Digital Research Alliance of Canada systems +--- +class: dropdown +--- +If you are using one of the [Compute Canada/Digital Research Alliance of Canada](https://docs.alliancecan.ca/wiki/Technical_documentation) {term}`HPC` systems, you should instead use `virtualenv`: +```{code-block} console +$ virtualenv --no-download nipoppy_env +``` + +See the [Compute Canada wiki](https://docs.alliancecan.ca/wiki/Python#Creating_and_using_a_virtual_environment) for more information. +```` + +Activate the virtual environment, e.g. by running: +```{code-block} console +$ source nipoppy_env/bin/activate +``` + +(pip-install-section)= +## Installing the `nipoppy` package + +### From {term}`PyPI` + +% TODO +We are actively working on publishing the package on PyPI, but for now it can only be installed by cloning the GitHub repository (see next section). Come back later for updates! + +% The latest release of Nipoppy can be installed from {term}`PyPI`. In a Terminal window, run: +% ```{code-block} console +% $ pip install nipoppy +% ``` + +(github-install-section)= +### From GitHub + +If you wish to use the latest (potentially unstable) version of the package, you can get it from the [GitHub repository](https://github.com/neurodatascience/nipoppy). + +Clone the repository in a directory of your choice: +```{code-block} console +$ git clone https://github.com/neurodatascience/nipoppy.git +``` + +Move into that directory and the `nipoppy_cli` subdirectory: +```{code-block} console +$ cd nipoppy/nipoppy_cli +``` + +```{note} +The `nipoppy_cli` subdirectory contains the newer version of the code, which has been refactored into a CLI. Eventually, it will become the only maintained version of the code. For the moment, the soon-to-be legacy code is still at the top level of the GitHub repository. +``` + +Install from the local source code in editable mode: +```{code-block} console +$ pip install -e . +``` + +````{note} +You can also install the package with `dev` dependencies (e.g., for running tests and building documentation): +```{code-block} console +$ pip install -e '.[dev]' +``` +```` + +### Verifying the install + +% TODO replace with nipoppy --version once that is available +Nipoppy was installed successfully if the {term}`CLI` runs. The following command should print a usage message and exit without error: +```{code-block} console +$ nipoppy -h +``` + +## Troubleshooting + +Please create a [GitHub issue](https://github.com/neurodatascience/nipoppy/issues/new) for any error not covered below. + +### Error when installing `pydantic-core` + +The latest version of the `pydantic-core` package (required by `pydantic`) is written in Rust, not pure Python. If package needs to be compiled during the install, but Rust is not available, then there might be an error complaining that Rust and/or Cargo cannot be found. In that case, if you are on an {term}`HPC` system that uses `lmod`, try loading Rust before installing: +```{code-block} console +$ module load rust +``` + +## Next steps + +All done? See the [Quickstart guide](quickstart) next for instructions on how to set up a Nipoppy dataset and configure pipelines. diff --git a/nipoppy_cli/docs/source/quickstart.md b/nipoppy_cli/docs/source/quickstart.md new file mode 100644 index 00000000..56e5b1b3 --- /dev/null +++ b/nipoppy_cli/docs/source/quickstart.md @@ -0,0 +1,107 @@ +# Quickstart + +```{note} +See the [Installation instructions](#installation) first if you have not yet installed Nipoppy. +``` + +% TODO add note about starting from BIDS dataset once nipoppy import is implemented, maybe point to a tutorial page instead of cluttering the quickstart guide + +## Initializing a new dataset + +An empty directory tree can be generated by running [`nipoppy init`](#cli_reference/init) (replacing `` with the appropriate path): + +```{code-block} console +$ nipoppy init --dataset-root +``` + +```{warning} +There must not be anything already existing at ``, or the command will result in an error. +``` + +The newly created directory tree follows the Nipoppy specification. Other Nipoppy commands expect all these directories to exist -- they will throw an error if that is not the case. + +```{tip} +Each subdirectory contains a `README.md` file that briefly describes the purpose of the subdirectory and the type of data that it should contain. +``` + +## Creating/modifying required files + +Nipoppy requires two user-provided files in each dataset: a **{term}`JSON` configuration file** and a **tabular manifest file**. Commands will result in errors if either of these files does not exist or are invalid. + +```{note} +The [`nipoppy init`](#cli_reference/init) command copies examples of these files to the expected paths within a new dataset, but you will most likely have to modify/overwrite them. +``` + +(customizing-config)= +### Customizing the configuration file + +The configuration file at `/proc/global_configs.json` contains general information about a dataset (e.g. name, visit and session names) and configurations for running processing pipelines (e.g., pipeline version and runtime parameters). + +The example config file contains configurations for all BIDS conversion and image processing software that are supported out-of-the-box by Nipoppy. You should replace the placeholder strings/substrings (e.g. ``) by more appropriate values for your dataset. See dropdown below (lines that should be changed are highlighted). + +You can also delete (or not) any configuration for a software/version that you do not plan to use. + +````{admonition} The example config file +--- +class: dropdown +--- +Here is the default content of `/proc/global_configs.json`: +```{literalinclude} ../../nipoppy/data/examples/sample_global_configs.json +--- +linenos: True +emphasize-lines: 2, 4, 7, 28, 35, 51, 56, 69, 82, 91, 104 +language: json +--- +``` +```` + +% TODO point to config pages for detailed guide once those are written + +```{tip} +See the {ref}`schema reference ` for more information about each config field. +``` + +### Generating the manifest file + +The manifest file at `/tabular/manifest.csv` contains *ground truth* information about the participants and visits/sessions available for a dataset. + +There must be only **one row** per unique participant/visit combination. + +The example manifest looks like this: +```{csv-table} +--- +file: ../../nipoppy/data/examples/sample_manifest.csv +header-rows: 1 +--- +``` + +````{admonition} Raw content of the example manifest file +--- +class: dropdown +--- +```{literalinclude} ../../nipoppy/data/examples/sample_manifest.csv +--- +linenos: True +language: csv +--- +``` +```` + +It is extremely unlikely that this manifest file works for your dataset, so you will have to generate one yourself. We recommend writing a script, for the purpose of reproducibility and easy updates if the more data is added to the dataset. + + +% TODO point to example script for generating a manifest + +```{tip} +See the {ref}`schema reference ` for more information about each column. +``` + +## Next steps + +The rest of this documentation is still work in progress. At a starting point, refer to the [commands](cli_reference/index) associated with the data organization or processing step(s) you wish to perform. + +![Nipoppy protocol](_static/img/nipoppy_protocol.jpg) + +% TODO point to tutorials once they are created (running BIDS conversion, running a pipeline) + +% TODO mention that all other commands create log files in scratch/logs (?) diff --git a/nipoppy_cli/docs/source/schemas/index.rst b/nipoppy_cli/docs/source/schemas/index.rst new file mode 100644 index 00000000..dac0317d --- /dev/null +++ b/nipoppy_cli/docs/source/schemas/index.rst @@ -0,0 +1,108 @@ +.. Note: This page has to be in ReST format, not Markdown +.. because otherwise there are issues with linking between the schemas + +File schemas +============ + +This page contains auto-generated :term:`JSON` schemas [#f1]_ for the text files used or generated by Nipoppy. + +.. _config-schema: + +Global configuration file +------------------------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +.. admonition:: Info + + ``PROC_PIPELINES`` and ``BIDS`` are nested objects (i.e. dictionaries), + where the final "leaf" values are ``PipelineConfig`` objects. All keys should + be strings. + + ``PROC_PIPELINES`` expects two levels of nesting: one for the **pipeline name**, + and the other for the **pipeline version**. + + ``BIDS`` expects three levels of nesting: one for the **pipeline name**, one for + the **pipeline version**, and the last one for the **name of the** :term:`BIDS` **conversion step**. + + See the :ref:`Quickstart guide ` for an example config file + that shows these nested structures. + +Below is the schema used for the global configuration :term:`JSON` file. + +.. jsonschema:: config.json + +.. _manifest-schema: + +Manifest file +------------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +Here is the schema used for each row in the manifest file: + +.. jsonschema:: manifest.json + :lift_title: False + +Doughnut file +------------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +Here is the schema used for each row in the doughnut file: + +.. jsonschema:: doughnut.json + :lift_title: False + +Bagel file +---------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +Here is the schema used for each row in the bagel file: + +.. jsonschema:: bagel.json + :lift_title: False + + +Layout configuration +-------------------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +.. admonition:: The default Nipoppy layout + :class: dropdown + + The default Nipoppy layout uses the following configuration: + + .. literalinclude:: ../../../nipoppy/data/layouts/layout-default.json + :linenos: + :language: json + +Here is the schema used for :term:`JSON` files that define the layout of the dataset: + +.. jsonschema:: layout.json + :lift_title: False + +Custom Nipoppy configurations in a Boutiques descriptor +------------------------------------------------------- + +.. include:: schema_howto.md + :parser: myst_parser.sphinx_ + +Nipoppy can read additional configurations from a Boutiques descriptor, under the +``custom\nipoppy`` key. The :term:`JSON` object is expected to comply with the following schema: + +.. This has a link to the ContainerConfig schema pointing to the one that appears +.. under the Config schema, but there is another ContainerConfig schema under the +.. BoutiquesConfig schema +.. jsonschema:: boutiques.json + :lift_title: False + +.. Footer +.. [#f1] Created with `sphinx-jsonschema `_ diff --git a/nipoppy_cli/docs/source/schemas/schema_howto.md b/nipoppy_cli/docs/source/schemas/schema_howto.md new file mode 100644 index 00000000..f7e3b6f9 --- /dev/null +++ b/nipoppy_cli/docs/source/schemas/schema_howto.md @@ -0,0 +1,11 @@ +```{admonition} How to read the schema table +--- +class: note +--- +- Read the table from top to bottom, left to right +- **Required properties** are in **bold** +- The *type* of each property is in *italics* + - Unless if that property is an object described by another schema, in which case it is a link to that schema +- Default values are shown for optional properties (`None` if empty) +- See the [JSON schema docs](https://www.learnjsonschema.com/) more details about keyword meanings +``` diff --git a/nipoppy_cli/nipoppy/config/container.py b/nipoppy_cli/nipoppy/config/container.py index 63e80c96..e9f813c9 100644 --- a/nipoppy_cli/nipoppy/config/container.py +++ b/nipoppy_cli/nipoppy/config/container.py @@ -8,7 +8,7 @@ from pathlib import Path from typing import Any, Optional -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field from nipoppy.logger import get_logger @@ -21,11 +21,33 @@ class ContainerConfig(BaseModel): """Model for container configuration.""" - COMMAND: str = "apptainer" - SUBCOMMAND: str = "run" - ARGS: list[str] = [] - ENV_VARS: dict[str, str] = {} - INHERIT: bool = True + COMMAND: str = Field( + default="apptainer", + description="Name of or path to Apptainer/Singularity executable", + ) + SUBCOMMAND: str = Field( + default="run", description="Subcommand for Apptainer/Singularity call" + ) + ARGS: list[str] = Field( + default=[], + description=( + "Arguments for Apptainer/Singularity call" + " (to be appended after the subcommand)" + ), + ) + ENV_VARS: dict[str, str] = Field( + default={}, + description=( + "Environment variables that should be available inside the container" + ), + ) + INHERIT: bool = Field( + default=True, + description=( + "Whether this config should inherit from higher-lever container configs." + " If false, will override higher-level configs" + ), + ) model_config = ConfigDict(extra="forbid") diff --git a/nipoppy_cli/nipoppy/config/main.py b/nipoppy_cli/nipoppy/config/main.py index 7de6d033..0836c12f 100644 --- a/nipoppy_cli/nipoppy/config/main.py +++ b/nipoppy_cli/nipoppy/config/main.py @@ -1,9 +1,9 @@ """Dataset configuration.""" from pathlib import Path -from typing import Any, Self +from typing import Any, Optional, Self -from pydantic import ConfigDict, model_validator +from pydantic import ConfigDict, Field, model_validator from nipoppy.config.container import ModelWithContainerConfig from nipoppy.config.pipeline import PipelineConfig @@ -13,11 +13,21 @@ class Config(ModelWithContainerConfig): """Model for dataset configuration.""" - DATASET_NAME: str - VISITS: list[str] - SESSIONS: list[str] = [] - BIDS: dict[str, dict[str, dict[str, PipelineConfig]]] = {} - PROC_PIPELINES: dict[str, dict[str, PipelineConfig]] + DATASET_NAME: str = Field(description="Name of the dataset") + VISITS: list[str] = Field(description="List of visits available in the study") + SESSIONS: Optional[list[str]] = Field( + default=None, + description=( + "List of sessions available in the study" + " (inferred from VISITS if not given)" + ), + ) + BIDS: dict[str, dict[str, dict[str, PipelineConfig]]] = Field( + default={}, description="Configurations for BIDS converters, if any" + ) + PROC_PIPELINES: dict[str, dict[str, PipelineConfig]] = Field( + description="Configurations for processing pipelines" + ) model_config = ConfigDict(extra="allow") diff --git a/nipoppy_cli/nipoppy/data/layouts/layout-0.1.0.json b/nipoppy_cli/nipoppy/data/layouts/layout-0.1.0.json index c131eec3..9e030498 100644 --- a/nipoppy_cli/nipoppy/data/layouts/layout-0.1.0.json +++ b/nipoppy_cli/nipoppy/data/layouts/layout-0.1.0.json @@ -17,7 +17,7 @@ }, "dpath_releases": { "path": "releases", - "description": "This directory is contains dataset releases (e.g., associated with specific publications)." + "description": "This directory contains dataset releases (e.g., associated with specific publications)." }, "dpath_proc": { "path": "proc", diff --git a/nipoppy_cli/nipoppy/data/layouts/layout-default.json b/nipoppy_cli/nipoppy/data/layouts/layout-default.json index 92159610..e7b68aef 100644 --- a/nipoppy_cli/nipoppy/data/layouts/layout-default.json +++ b/nipoppy_cli/nipoppy/data/layouts/layout-default.json @@ -9,7 +9,7 @@ }, "dpath_sourcedata": { "path": "sourcedata", - "description": "This directory contains the organized source data that is ready for BIDS conversion." + "description": "This directory is for the organized source data that is ready for BIDS conversion." }, "dpath_downloads": { "path": "downloads", @@ -17,15 +17,15 @@ }, "dpath_releases": { "path": "releases", - "description": "This directory is contains dataset releases (e.g., associated with specific publications)." + "description": "This directory is for dataset releases (e.g., associated with specific publications)." }, "dpath_proc": { "path": "proc", - "description": "This directory contains the Nipoppy configuration information and custom code." + "description": "This directory is for storing the Nipoppy configuration information and custom code." }, "dpath_containers": { "path": "proc/containers", - "description": "This directory contains the container image files for the processing pipelines." + "description": "This directory is for storing the container image files for the BIDS conversion/processing pipelines." }, "dpath_descriptors": { "path": "proc/descriptors", @@ -41,7 +41,7 @@ }, "dpath_pybids": { "path": "proc/pybids", - "description": "This directory contains PyBIDS-specific data/configurations." + "description": "This directory is for PyBIDS-specific data/configurations." }, "dpath_bids_db": { "path": "proc/pybids/bids_db", @@ -65,15 +65,15 @@ }, "dpath_tabular": { "path": "tabular", - "description": "This directory contains tabular/phenotypic data." + "description": "This directory is for tabular/phenotypic data." }, "dpath_assessments": { "path": "tabular/assessments", - "description": "This directory contains assessments data." + "description": "This directory is for assessments data." }, "dpath_demographics": { "path": "tabular/demographics", - "description": "This directory contains demographics data." + "description": "This directory is for demographics data." }, "fpath_config": { "path": "proc/global_configs.json", diff --git a/nipoppy_cli/nipoppy/layout.py b/nipoppy_cli/nipoppy/layout.py index d46326fd..2bda1169 100644 --- a/nipoppy_cli/nipoppy/layout.py +++ b/nipoppy_cli/nipoppy/layout.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Any, Optional, Tuple -from pydantic import BaseModel, ConfigDict +from pydantic import BaseModel, ConfigDict, Field from nipoppy.base import Base from nipoppy.utils import FPATH_DEFAULT_LAYOUT, get_pipeline_tag, load_json @@ -16,8 +16,11 @@ class PathInfo(BaseModel): _is_directory: bool _is_required: bool = True - path: Path - description: Optional[str] = None + path: Path = Field(description="Relative path to the file or directory") + description: Optional[str] = Field( + default=None, + description="Description of the function of the file or directory", + ) class DpathInfo(PathInfo): @@ -43,30 +46,69 @@ class LayoutConfig(BaseModel): model_config = ConfigDict(extra="forbid") - dpath_bids: DpathInfo - dpath_derivatives: DpathInfo - dpath_sourcedata: DpathInfo - dpath_downloads: DpathInfo - dpath_proc: DpathInfo - dpath_releases: DpathInfo - dpath_containers: DpathInfo - dpath_descriptors: DpathInfo - dpath_invocations: DpathInfo - dpath_scripts: DpathInfo - dpath_pybids: DpathInfo - dpath_bids_db: DpathInfo - dpath_bids_ignore_patterns: DpathInfo - dpath_scratch: DpathInfo - dpath_raw_dicom: DpathInfo - dpath_logs: DpathInfo - dpath_tabular: DpathInfo - dpath_assessments: DpathInfo - dpath_demographics: DpathInfo - - fpath_config: FpathInfo - fpath_manifest: FpathInfo - fpath_doughnut: OptionalFpathInfo - fpath_imaging_bagel: OptionalFpathInfo + dpath_bids: DpathInfo = Field(description="Directory for raw imaging data in BIDS") + dpath_derivatives: DpathInfo = Field( + description="Directory for imaging derivatives" + ) + dpath_sourcedata: DpathInfo = Field( + description="Directory for raw imaging data that is not yet in BIDS" + ) + dpath_downloads: DpathInfo = Field(description="Directory for downloaded data") + dpath_proc: DpathInfo = Field( + description=( + "Directory for configurations or other files needed when or generated" + " before running pipelines" + ) + ) + dpath_releases: DpathInfo = Field(description="Directory for data releases") + dpath_containers: DpathInfo = Field( + description="Directory for storing container images" + ) + dpath_descriptors: DpathInfo = Field( + description="Directory for storing Boutiques descriptors for pipelines" + ) + dpath_invocations: DpathInfo = Field( + description="Directory for storing Boutiques invocations for pipelines" + ) + dpath_scripts: DpathInfo = Field(description="Directory for storing custom scripts") + dpath_pybids: DpathInfo = Field( + description="Directory for storing PyBIDS configurations/databases" + ) + dpath_bids_db: DpathInfo = Field( + description=( + "Directory for PyBIDS databases (should be a under ``dpath_pybids``)" + ) + ) + dpath_bids_ignore_patterns: DpathInfo = Field( + description="Directory for PyBIDS indexing configurations" + ) + dpath_scratch: DpathInfo = Field(description="Directory for temporary files") + dpath_raw_dicom: DpathInfo = Field( + description="Directory for raw, unorganized DICOM files" + ) + dpath_logs: DpathInfo = Field(description="Directory for logs generated by Nipoppy") + dpath_tabular: DpathInfo = Field(description="Directory for tabular data") + dpath_assessments: DpathInfo = Field( + description="Directory for tabular assessment data" + ) + dpath_demographics: DpathInfo = Field( + description="Directory for tabular demographic data" + ) + + fpath_config: FpathInfo = Field(description="Path to the configuration file") + fpath_manifest: FpathInfo = Field(description="Path to the manifest file") + fpath_doughnut: OptionalFpathInfo = Field( + description=( + "Path to the doughnut file (for tracking the " + "DICOM-to-BIDS conversion process)" + ) + ) + fpath_imaging_bagel: OptionalFpathInfo = Field( + description=( + "Path to the imaging bagel file (for tracking imaging derivative " + "availability at the participant level)" + ) + ) @cached_property def path_labels(self) -> list[str]: diff --git a/nipoppy_cli/nipoppy/tabular/bagel.py b/nipoppy_cli/nipoppy/tabular/bagel.py index cd5d0107..a42c6539 100644 --- a/nipoppy_cli/nipoppy/tabular/bagel.py +++ b/nipoppy_cli/nipoppy/tabular/bagel.py @@ -2,10 +2,10 @@ from typing import Optional -from pydantic import field_validator, model_validator +from pydantic import Field, field_validator, model_validator from nipoppy.tabular.base import BaseTabular, BaseTabularModel -from nipoppy.utils import participant_id_to_bids_id +from nipoppy.utils import FIELD_DESCRIPTION_MAP, participant_id_to_bids_id STATUS_SUCCESS = "SUCCESS" STATUS_FAIL = "FAIL" @@ -14,14 +14,27 @@ class BagelModel(BaseTabularModel): - """Model for the bagel file.""" - - participant_id: str - bids_id: Optional[str] = None - session: str - pipeline_name: str - pipeline_version: str - pipeline_complete: str + """ + A file generated by the trackers. + + Contains process statuses for image processing pipelines. + """ + + participant_id: str = Field( + title="Participant ID", + description=f"{FIELD_DESCRIPTION_MAP['participant_id']} (as in the manifest)", + ) + bids_id: Optional[str] = Field( + default=None, title="BIDS ID", description=FIELD_DESCRIPTION_MAP["bids_id"] + ) + session: str = Field(description=FIELD_DESCRIPTION_MAP["session"]) + pipeline_name: str = Field(description="The name of the pipeline being tracked") + pipeline_version: str = Field( + description="The version of the pipeline being tracked" + ) + pipeline_complete: str = Field( + description="The status of the pipeline run for this participant-visit pair" + ) @field_validator("pipeline_complete") @classmethod diff --git a/nipoppy_cli/nipoppy/tabular/doughnut.py b/nipoppy_cli/nipoppy/tabular/doughnut.py index 382ff778..e70e0817 100644 --- a/nipoppy_cli/nipoppy/tabular/doughnut.py +++ b/nipoppy_cli/nipoppy/tabular/doughnut.py @@ -4,20 +4,44 @@ from pathlib import Path from typing import Optional, Self +from pydantic import Field + from nipoppy.logger import get_logger from nipoppy.tabular.manifest import Manifest, ManifestModel -from nipoppy.utils import participant_id_to_bids_id, participant_id_to_dicom_id +from nipoppy.utils import ( + FIELD_DESCRIPTION_MAP, + participant_id_to_bids_id, + participant_id_to_dicom_id, +) class DoughnutModel(ManifestModel): - """Model for the doughnut file.""" - - participant_dicom_dir: str - dicom_id: str - bids_id: str - downloaded: bool - organized: bool - bidsified: bool + """ + An internally- or user-generated file to keep track of the BIDS conversion process. + + Should contain exactly the same data as the manifest, with some additional columns. + """ + + participant_dicom_dir: str = Field( + title="Participant DICOM directory", + description=( + "Path to the directory containing raw DICOMs " + "(in potentially messy tree structure) for the participant-visit pair, " + "relative to the raw data directory" + ), + ) + dicom_id: str = Field( + title="DICOM ID", + description="Participant identifier used in DICOM file names/paths", + ) + bids_id: str = Field(title="BIDS ID", description=FIELD_DESCRIPTION_MAP["bids_id"]) + downloaded: bool = Field(description="Whether files are available on disk") + organized: bool = Field( + description="Whether files have been organized in the sourcedata directory" + ) + bidsified: bool = Field( + title="BIDSified", description="Whether files have been converted to BIDS" + ) class Doughnut(Manifest): diff --git a/nipoppy_cli/nipoppy/tabular/manifest.py b/nipoppy_cli/nipoppy/tabular/manifest.py index 2ceb8245..590ed297 100644 --- a/nipoppy_cli/nipoppy/tabular/manifest.py +++ b/nipoppy_cli/nipoppy/tabular/manifest.py @@ -3,18 +3,26 @@ from typing import Optional, Self import pandas as pd -from pydantic import ConfigDict +from pydantic import ConfigDict, Field from nipoppy.tabular.base import BaseTabular, BaseTabularModel +from nipoppy.utils import FIELD_DESCRIPTION_MAP class ManifestModel(BaseTabularModel): - """Model for the manifest.""" - - participant_id: str - visit: str - session: Optional[str] - datatype: Optional[list[str]] + """A user-provided listing of participant and visits available in the dataset.""" + + participant_id: str = Field( + title="Participant ID", description=FIELD_DESCRIPTION_MAP["participant_id"] + ) + visit: str = Field(description=FIELD_DESCRIPTION_MAP["visit"]) + session: Optional[str] = Field(description=FIELD_DESCRIPTION_MAP["session"]) + datatype: Optional[list[str]] = Field( + description=( + "Imaging datatype, as recognized by BIDS (see " + "https://bids-specification.readthedocs.io/en/stable/common-principles.html)" # noqa E501 + ) + ) @classmethod def validate_fields(cls, data: dict): diff --git a/nipoppy_cli/nipoppy/utils.py b/nipoppy_cli/nipoppy/utils.py index f78b504f..085df8e9 100644 --- a/nipoppy_cli/nipoppy/utils.py +++ b/nipoppy_cli/nipoppy/utils.py @@ -26,6 +26,14 @@ DPATH_LAYOUTS = DPATH_DATA / "layouts" FPATH_DEFAULT_LAYOUT = DPATH_LAYOUTS / "layout-default.json" +# descriptions for common fields in the Pydantic models +FIELD_DESCRIPTION_MAP = { + "bids_id": "BIDS-compliant participant identifier (e.g., sub-01)", + "participant_id": "Participant identifier", + "session": "BIDS-compliant identifier imaging session (e.g., ses-1)", + "visit": "Visit identifier", +} + def participant_id_to_dicom_id(participant_id: str): """Convert a participant ID to a BIDS-compatible DICOM ID.""" diff --git a/nipoppy_cli/pyproject.toml b/nipoppy_cli/pyproject.toml index ab77d570..f37c82d5 100644 --- a/nipoppy_cli/pyproject.toml +++ b/nipoppy_cli/pyproject.toml @@ -30,17 +30,29 @@ dependencies = [ description = "Standardized organization and processing of neuroimaging-clinical datasets" license = { file = "LICENSE" } name = "nipoppy" -readme = "README.md" +readme = "../README.md" # TODO requires_python version = "1.0.0" # TODO eventually use dynamic versioning [project.optional-dependencies] -dev = ["nipoppy[test]", "pre-commit"] +dev = ["nipoppy[doc]", "nipoppy[test]", "pre-commit"] +doc = [ + "furo>=2024.1.29", + "pygments-csv-lexer>=0.1.3", + "sphinx>=7.2.6", + "sphinx-argparse>=0.4.0", + "sphinx-autoapi>=3.0.0", + "sphinx-copybutton>=0.5.2", + "sphinx-jsonschema>=1.19.1", + "sphinx-togglebutton>=0.3.2", + "mdit-py-plugins>=0.4.0", + "myst-parser>=2.0.0", +] test = [ "pytest>=6.0.0", "pytest-cov", "pytest-mock", - "fids@git+https://github.com/neurodatascience/FIDS.git", + "fids>=0.1.0", ] tests = ["nipoppy[test]"] # alias in case of typo