From 9f3d30744d9785f6b9343d1561f927a2f8dbd3c4 Mon Sep 17 00:00:00 2001 From: Nikhil Bhagwat Date: Wed, 13 Dec 2023 23:57:11 -0500 Subject: [PATCH] removed version (i.e. v) prefix from run scripts to match trackers and refactored FS extractor (#188) * moving sample_global_configs.json and tree.json to the nipoppy subdir * fixed import paths after refactor * fixed import paths after refactor * refactored and cleaned up mriqc run script * refactored and cleaned up mriqc run script * Started tracker refactoring to use doughnut * added a catalog function to identify new proc-participants from bagel and doughnut * added a catalog function to identify new proc-participants from bagel and doughnut * added custom (new subjects only) pybids layout (sqldb) generation * fixed sessions loop and incorporated utils.load_status into run_tracker.py * fixed import path errors and incorporated tracker.py for status flags * fixed global var imports and logging levels * updated sample_run_nipoppy to set log-level and prototypical mriqc run with tracker * updated bids_tracker to match proc_pipe tracker schema * minor fixes and comments * fixed Pandas future warning on setting an item of incompatible dtype * fixed another Pandas future warning on setting an item of incompatible dtype * 1) Updated mriqc and fmirpre run scripts to bind complete bids_dir path, 2) added sqldb generation with ignore list for subjects and datatype+acq, 3) updated sample_run_nipoppy.py to show the these two functionalities. * fixed fmriprep pytest * fixed codespell * fixed NM filename pattern * added functionality to custom map participant_id to bids_id * fixed minor codespell errors * fixed errors from previous merge conflict resolution * updated sample_run_nipoppy to run tractoflow, renamed and moved check_dicom_status, and fixed minor bugs and logging in run_tracker * fixed session_id typo and optimized tracker runs * fixed FS utils function * added acq option to all trackers * added acq option to all trackers (fixed merge conflict) * fixed typos and added support for acq tag in mriqc tracker * fixed tractoflow subject dir path issues and added INCOMPLETE status * refactored FS extraction using brainload package * fixed hemisphere naming * fixed aseg extraction * remove version (i.e. v) prefix from run scripts to match trackers * fixed a typo and removed legacy FS extractor script --- nipoppy/extractors/fmriprep/run_FC.py | 4 +- .../freesurfer/run_structural_measures.py | 157 ++++++++++++++++++ .../freesurfer/sample_FS_configs.json | 9 + .../extractors/maget_brain/prepare_data.py | 4 +- nipoppy/sample_run_nipoppy.py | 2 +- .../proc_pipe/fmriprep/run_fmriprep.py | 4 +- nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py | 4 +- .../proc_pipe/tractoflow/run_tractoflow.py | 2 +- 8 files changed, 176 insertions(+), 10 deletions(-) create mode 100644 nipoppy/extractors/freesurfer/run_structural_measures.py create mode 100644 nipoppy/extractors/freesurfer/sample_FS_configs.json diff --git a/nipoppy/extractors/fmriprep/run_FC.py b/nipoppy/extractors/fmriprep/run_FC.py index 540be208..d77a5121 100644 --- a/nipoppy/extractors/fmriprep/run_FC.py +++ b/nipoppy/extractors/fmriprep/run_FC.py @@ -222,8 +222,8 @@ def run(participant_id: str, if output_dir is None: output_dir = f"{DATASET_ROOT}/derivatives/" - fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{FMRIPREP_VERSION}/output" - DKT_dir = f"{DATASET_ROOT}/derivatives/networks/v0.9.0/output" + fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{FMRIPREP_VERSION}/output" + DKT_dir = f"{DATASET_ROOT}/derivatives/networks/0.9.0/output" FC_dir = f"{output_dir}/FC" # assess FC diff --git a/nipoppy/extractors/freesurfer/run_structural_measures.py b/nipoppy/extractors/freesurfer/run_structural_measures.py new file mode 100644 index 00000000..3034f98d --- /dev/null +++ b/nipoppy/extractors/freesurfer/run_structural_measures.py @@ -0,0 +1,157 @@ +import numpy as np +import pandas as pd +import json +import os +import glob +import argparse +import brainload as bl +from nipoppy.workflow.utils import ( + COL_CONV_STATUS, + COL_SESSION_MANIFEST, + COL_BIDS_ID_MANIFEST, +) + +# Globals +# Brainload has two separate functions to extract aseg data. +measure_column_names = ["StructName","Structure","Description","Volume_mm3", "unit"] +aseg_cols = ["StructName", "Volume_mm3"] +dkt_cols = ["StructName", "ThickAvg"] + +def get_aseg_stats(participant_stats_dir, aseg_cols): + aseg_cols = ["StructName", "Volume_mm3"] + aseg_stats = bl.stat(f'{participant_stats_dir}/aseg.stats') + table_df = pd.DataFrame(aseg_stats["table_data"], columns=aseg_stats["table_column_headers"])[aseg_cols] + measure_df = pd.DataFrame(data=aseg_stats["measures"], columns=measure_column_names)[aseg_cols] + _df = pd.concat([table_df,measure_df],axis=0) + return _df + +def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"): + hemi = "lh" + stat_file = f"{hemi}.{parcel}.stats" + lh_dkt_stats = bl.stat(f'{participant_stats_dir}/{stat_file}') + lh_df = pd.DataFrame(lh_dkt_stats["table_data"], columns=lh_dkt_stats["table_column_headers"])[aparc_cols] + lh_df["hemi"] = hemi + + hemi = "rh" + stat_file = f"{hemi}.{parcel}.stats" + rh_dkt_stats = bl.stat(f'{participant_stats_dir}/rh.aparc.DKTatlas.stats') + rh_df = pd.DataFrame(rh_dkt_stats["table_data"], columns=rh_dkt_stats["table_column_headers"])[aparc_cols] + rh_df["hemi"] = hemi + + _df = pd.concat([lh_df,rh_df], axis=0) + + return _df + +HELPTEXT = """ +Script to parse and collate FreeSurfer stats files across subjects +""" + +parser = argparse.ArgumentParser(description=HELPTEXT) + +parser.add_argument('--global_config', type=str, help='path to global configs for a given nipoppy dataset', required=True) +parser.add_argument('--FS_config', type=str, help='path to freesurfer configs for a given nipoppy dataset', required=True) +parser.add_argument('--participants_list', default=None, help='path to participants list (csv or tsv') +parser.add_argument('--session_id', type=str, help='session id for the participant', required=True) +parser.add_argument('--save_dir', default='./', help='path to save_dir') + +args = parser.parse_args() + +global_config_file = args.global_config +FS_config_file = args.FS_config +participants_list = args.participants_list +session_id = args.session_id +save_dir = args.save_dir + +session = f"ses-{session_id}" + +# Read global configs +with open(global_config_file, 'r') as f: + global_configs = json.load(f) + +# Read FS configs +with open(FS_config_file, 'r') as f: + FS_configs = json.load(f) + +DATASET_ROOT = global_configs["DATASET_ROOT"] +FS_version = FS_configs["version"] +stat_configs = FS_configs["stat_configs"] +stat_config_names = stat_configs.keys() + +print(f"Using dataset root: {DATASET_ROOT} and FreeSurfer version: {FS_version}") +print(f"Using stat configs: {stat_config_names}") + +if participants_list == None: + # use doughnut + doughnut_file = f"{DATASET_ROOT}/scratch/raw_dicom/doughnut.csv" + doughnut_df = pd.read_csv(doughnut_file) + doughnut_df[COL_CONV_STATUS] = doughnut_df[COL_CONV_STATUS].astype(bool) + bids_participants = doughnut_df[(doughnut_df[COL_SESSION_MANIFEST]==session) & (doughnut_df[COL_CONV_STATUS])][COL_BIDS_ID_MANIFEST].unique() + n_bids_participants = len(bids_participants) + print(f"Running all {n_bids_participants} participants in doughnut with session: {session}") +else: + # use custom list + bids_participants = list(pd.read_csv(participants_list)["participant_id"]) + + n_bids_participants = len(bids_participants) + print(f"Running {n_bids_participants} participants from the list with session: {session}") + + +# Extract stats for each participant +fs_output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/{FS_version}/output/{session}/" + +aseg_df = pd.DataFrame() +aparc_df = pd.DataFrame() +for participant_id in bids_participants: + participant_stats_dir = f"{fs_output_dir}{participant_id}/stats/" + print(f"Extracting stats for participant: {participant_id}") + + for config_name, config_cols in stat_configs.items(): + print(f"Extracting data for config: {config_name}") + if config_name.strip() == "aseg": + try: + _df = get_aseg_stats(participant_stats_dir, config_cols) + # transpose it to wideform + names_col = config_cols[0] + values_col = config_cols[1] + cols = ["participant_id"] + list(_df[names_col].values) + vals = [participant_id] + list(_df[values_col].values) + _df_wide = pd.DataFrame(columns=cols) + _df_wide.loc[0] = vals + aseg_df = pd.concat([aseg_df,_df_wide], axis=0) + + except: + print(f"Error parsing aseg data for {participant_id}") + + elif config_name.strip() == "aparc": + try: + _df = get_aparc_stats(participant_stats_dir, config_cols) + # transpose it to wideform + names_col = config_cols[0] + values_col = config_cols[1] + cols = ["participant_id"] + list(_df["hemi"] + "." + _df[names_col]) + vals = [participant_id] + list(_df[values_col]) + _df_wide = pd.DataFrame(columns=cols) + _df_wide.loc[0] = vals + aparc_df = pd.concat([aparc_df,_df_wide], axis=0) + + except Exception as e: + print(f"Error parsing aparc data for {participant_id} with exception: {e}") + + else: + print(f"Unknown stat config: {config_name}") + +# Save configs +print(f"Saving collated stat tables at: {save_dir}") +aseg_csv = f"{save_dir}/aseg.csv" +aparc_csv = f"{save_dir}/aparc.csv" + +if len(aseg_df) > 0: + aseg_df.to_csv(aseg_csv, index=None) +else: + print("aseg_df is empty") + +if len(aparc_df) > 0: + aparc_df.to_csv(aparc_csv, index=None) +else: + print("aparc_df is empty") + diff --git a/nipoppy/extractors/freesurfer/sample_FS_configs.json b/nipoppy/extractors/freesurfer/sample_FS_configs.json new file mode 100644 index 00000000..395c124f --- /dev/null +++ b/nipoppy/extractors/freesurfer/sample_FS_configs.json @@ -0,0 +1,9 @@ +{ + "version": "v6.0.1", + "stat_configs": { + "aseg": ["StructName", "Volume_mm3"], + "aparc": ["StructName", "ThickAvg"] + }, + "run": "run-1", + "space": "fsaverage" +} \ No newline at end of file diff --git a/nipoppy/extractors/maget_brain/prepare_data.py b/nipoppy/extractors/maget_brain/prepare_data.py index 19c6fbd1..24be4969 100644 --- a/nipoppy/extractors/maget_brain/prepare_data.py +++ b/nipoppy/extractors/maget_brain/prepare_data.py @@ -47,8 +47,8 @@ def get_masked_image(img_path, mask_path, masked_img_path): fmriprep_version = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"] maget_version = global_configs["PROC_PIPELINES"]["maget_brain"]["VERSION"] -fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{fmriprep_version}/output/" -maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/v{maget_version}/output/" +fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{fmriprep_version}/output/" +maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/{maget_version}/output/" maget_preproc_T1w_nii_dir = f"{maget_dir}/ses-{session_id}/preproc_T1w_nii/" # Check / create maget subdirs diff --git a/nipoppy/sample_run_nipoppy.py b/nipoppy/sample_run_nipoppy.py index d308eb0c..b210b8ce 100644 --- a/nipoppy/sample_run_nipoppy.py +++ b/nipoppy/sample_run_nipoppy.py @@ -80,7 +80,7 @@ def refresh_bids_db(global_configs, session_id, pipeline, ignore_patterns, logge # bids_db_path FMRIPREP_VERSION = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"] output_dir = f"{DATASET_ROOT}/derivatives/" -fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}" +fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}" session_id = args.session_id session = f"ses-{session_id}" diff --git a/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py b/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py index 558040a8..f9394a78 100644 --- a/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py +++ b/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py @@ -130,10 +130,10 @@ def run(participant_id: str, bids_dir = f"{DATASET_ROOT}/bids/" proc_dir = f"{DATASET_ROOT}/proc/" - fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}" + fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}" # Check and create session_dirs for freesurfer since it won't happen automatically - fs_dir = f"{output_dir}/freesurfer/v{FS_VERSION}/output/ses-{session_id}" + fs_dir = f"{output_dir}/freesurfer/{FS_VERSION}/output/ses-{session_id}" Path(fs_dir).mkdir(parents=True, exist_ok=True) # Copy FS license in the session specific output dir (to be seen by Singularity container) diff --git a/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py b/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py index 3e3bb298..04d91f78 100644 --- a/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py +++ b/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py @@ -37,11 +37,11 @@ def run(participant_id, global_configs, session_id, output_dir, modalities, bids output_dir = f"{DATASET_ROOT}/derivatives" # create output dir - mriqc_output_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/output/" + mriqc_output_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/output/" Path(mriqc_output_dir).mkdir(parents=True, exist_ok=True) # create working dir (intermediate files) - mriqc_work_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/work/" + mriqc_work_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/work/" Path(mriqc_work_dir).mkdir(parents=True, exist_ok=True) logger.info("Starting mriqc run...") diff --git a/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py b/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py index d4bcc6bf..fdff1cf8 100644 --- a/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py +++ b/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py @@ -445,7 +445,7 @@ def run(participant_id, global_configs, session_id, output_dir, use_bids_filter, ## build paths to files bids_dir = f"{DATASET_ROOT}/bids" - tractoflow_dir = f"{output_dir}/tractoflow/v{TRACTOFLOW_VERSION}" + tractoflow_dir = f"{output_dir}/tractoflow/{TRACTOFLOW_VERSION}" ## Copy bids_filter.json if use_bids_filter: