From 9f3d30744d9785f6b9343d1561f927a2f8dbd3c4 Mon Sep 17 00:00:00 2001
From: Nikhil Bhagwat <nikhil153@users.noreply.github.com>
Date: Wed, 13 Dec 2023 23:57:11 -0500
Subject: [PATCH] removed version (i.e. v) prefix from run scripts to match
 trackers and refactored FS extractor (#188)

* moving sample_global_configs.json and tree.json to the nipoppy subdir

* fixed import paths after refactor

* fixed import paths after refactor

* refactored and cleaned up mriqc run script

* refactored and cleaned up mriqc run script

* Started tracker refactoring to use doughnut

* added a catalog function to identify new proc-participants from bagel and doughnut

* added a catalog function to identify new proc-participants from bagel and doughnut

* added custom (new subjects only) pybids layout (sqldb) generation

* fixed sessions loop and incorporated utils.load_status into run_tracker.py

* fixed import path errors and incorporated tracker.py for status flags

* fixed global var imports and logging levels

* updated sample_run_nipoppy to set log-level and prototypical mriqc run with tracker

* updated bids_tracker to match proc_pipe tracker schema

* minor fixes and comments

* fixed Pandas future warning on setting an item of incompatible dtype

* fixed another Pandas future warning on setting an item of incompatible dtype

* 1) Updated mriqc and fmirpre run scripts to bind complete bids_dir path, 2) added sqldb generation with ignore list for subjects and datatype+acq, 3) updated sample_run_nipoppy.py to show the these two functionalities.

* fixed fmriprep pytest

* fixed codespell

* fixed NM filename pattern

* added functionality to custom map participant_id to bids_id

* fixed minor codespell errors

* fixed errors from previous merge conflict resolution

* updated sample_run_nipoppy to run tractoflow, renamed and moved check_dicom_status, and fixed minor bugs and logging in run_tracker

* fixed session_id typo and optimized tracker runs

* fixed FS utils function

* added acq option to all trackers

* added acq option to all trackers (fixed merge conflict)

* fixed typos and added support for acq tag in mriqc tracker

* fixed tractoflow subject dir path issues and added INCOMPLETE status

* refactored FS extraction using brainload package

* fixed hemisphere naming

* fixed aseg extraction

* remove version (i.e. v) prefix from run scripts to match trackers

* fixed a typo and removed legacy FS extractor script
---
 nipoppy/extractors/fmriprep/run_FC.py         |   4 +-
 .../freesurfer/run_structural_measures.py     | 157 ++++++++++++++++++
 .../freesurfer/sample_FS_configs.json         |   9 +
 .../extractors/maget_brain/prepare_data.py    |   4 +-
 nipoppy/sample_run_nipoppy.py                 |   2 +-
 .../proc_pipe/fmriprep/run_fmriprep.py        |   4 +-
 nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py |   4 +-
 .../proc_pipe/tractoflow/run_tractoflow.py    |   2 +-
 8 files changed, 176 insertions(+), 10 deletions(-)
 create mode 100644 nipoppy/extractors/freesurfer/run_structural_measures.py
 create mode 100644 nipoppy/extractors/freesurfer/sample_FS_configs.json

diff --git a/nipoppy/extractors/fmriprep/run_FC.py b/nipoppy/extractors/fmriprep/run_FC.py
index 540be208..d77a5121 100644
--- a/nipoppy/extractors/fmriprep/run_FC.py
+++ b/nipoppy/extractors/fmriprep/run_FC.py
@@ -222,8 +222,8 @@ def run(participant_id: str,
 	if output_dir is None:
 		output_dir = f"{DATASET_ROOT}/derivatives/"
 
-	fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{FMRIPREP_VERSION}/output"
-	DKT_dir = f"{DATASET_ROOT}/derivatives/networks/v0.9.0/output"
+	fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{FMRIPREP_VERSION}/output"
+	DKT_dir = f"{DATASET_ROOT}/derivatives/networks/0.9.0/output"
 	FC_dir = f"{output_dir}/FC"
 
 	# assess FC
diff --git a/nipoppy/extractors/freesurfer/run_structural_measures.py b/nipoppy/extractors/freesurfer/run_structural_measures.py
new file mode 100644
index 00000000..3034f98d
--- /dev/null
+++ b/nipoppy/extractors/freesurfer/run_structural_measures.py
@@ -0,0 +1,157 @@
+import numpy as np
+import pandas as pd
+import json
+import os
+import glob
+import argparse
+import brainload as bl
+from nipoppy.workflow.utils import (
+    COL_CONV_STATUS,
+    COL_SESSION_MANIFEST,
+    COL_BIDS_ID_MANIFEST,
+)
+
+# Globals
+# Brainload has two separate functions to extract aseg data. 
+measure_column_names = ["StructName","Structure","Description","Volume_mm3", "unit"]
+aseg_cols = ["StructName", "Volume_mm3"]
+dkt_cols = ["StructName", "ThickAvg"]
+
+def get_aseg_stats(participant_stats_dir, aseg_cols):
+    aseg_cols = ["StructName", "Volume_mm3"]
+    aseg_stats = bl.stat(f'{participant_stats_dir}/aseg.stats')
+    table_df = pd.DataFrame(aseg_stats["table_data"], columns=aseg_stats["table_column_headers"])[aseg_cols]
+    measure_df = pd.DataFrame(data=aseg_stats["measures"], columns=measure_column_names)[aseg_cols]
+    _df = pd.concat([table_df,measure_df],axis=0)
+    return _df
+
+def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"):
+    hemi = "lh"
+    stat_file = f"{hemi}.{parcel}.stats"
+    lh_dkt_stats = bl.stat(f'{participant_stats_dir}/{stat_file}')
+    lh_df = pd.DataFrame(lh_dkt_stats["table_data"], columns=lh_dkt_stats["table_column_headers"])[aparc_cols]
+    lh_df["hemi"] = hemi
+
+    hemi = "rh"
+    stat_file = f"{hemi}.{parcel}.stats"
+    rh_dkt_stats = bl.stat(f'{participant_stats_dir}/rh.aparc.DKTatlas.stats')
+    rh_df = pd.DataFrame(rh_dkt_stats["table_data"], columns=rh_dkt_stats["table_column_headers"])[aparc_cols]
+    rh_df["hemi"] = hemi
+    
+    _df = pd.concat([lh_df,rh_df], axis=0)
+
+    return _df
+
+HELPTEXT = """
+Script to parse and collate FreeSurfer stats files across subjects
+"""
+
+parser = argparse.ArgumentParser(description=HELPTEXT)
+
+parser.add_argument('--global_config', type=str, help='path to global configs for a given nipoppy dataset', required=True)
+parser.add_argument('--FS_config', type=str, help='path to freesurfer configs for a given nipoppy dataset', required=True)
+parser.add_argument('--participants_list', default=None, help='path to participants list (csv or tsv')
+parser.add_argument('--session_id', type=str, help='session id for the participant', required=True)    
+parser.add_argument('--save_dir', default='./', help='path to save_dir')
+
+args = parser.parse_args()
+
+global_config_file = args.global_config
+FS_config_file = args.FS_config
+participants_list = args.participants_list
+session_id = args.session_id
+save_dir = args.save_dir
+
+session = f"ses-{session_id}"
+
+# Read global configs
+with open(global_config_file, 'r') as f:
+    global_configs = json.load(f)
+
+# Read FS configs
+with open(FS_config_file, 'r') as f:
+    FS_configs = json.load(f)
+
+DATASET_ROOT = global_configs["DATASET_ROOT"]
+FS_version = FS_configs["version"]
+stat_configs = FS_configs["stat_configs"]
+stat_config_names = stat_configs.keys()
+
+print(f"Using dataset root: {DATASET_ROOT} and FreeSurfer version: {FS_version}")
+print(f"Using stat configs: {stat_config_names}")
+
+if participants_list == None:
+    # use doughnut
+    doughnut_file = f"{DATASET_ROOT}/scratch/raw_dicom/doughnut.csv"
+    doughnut_df = pd.read_csv(doughnut_file)
+    doughnut_df[COL_CONV_STATUS] = doughnut_df[COL_CONV_STATUS].astype(bool)
+    bids_participants = doughnut_df[(doughnut_df[COL_SESSION_MANIFEST]==session) & (doughnut_df[COL_CONV_STATUS])][COL_BIDS_ID_MANIFEST].unique()
+    n_bids_participants = len(bids_participants)
+    print(f"Running all {n_bids_participants} participants in doughnut with session: {session}")
+else:
+    # use custom list 
+    bids_participants = list(pd.read_csv(participants_list)["participant_id"])
+
+    n_bids_participants = len(bids_participants)
+    print(f"Running {n_bids_participants} participants from the list with session: {session}")
+
+
+# Extract stats for each participant
+fs_output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/{FS_version}/output/{session}/"
+
+aseg_df = pd.DataFrame()
+aparc_df = pd.DataFrame()
+for participant_id in bids_participants:
+    participant_stats_dir = f"{fs_output_dir}{participant_id}/stats/"
+    print(f"Extracting stats for participant: {participant_id}")
+    
+    for config_name, config_cols in stat_configs.items():
+        print(f"Extracting data for config: {config_name}")
+        if config_name.strip() == "aseg":
+            try:
+                _df = get_aseg_stats(participant_stats_dir, config_cols) 
+                # transpose it to wideform               
+                names_col = config_cols[0]
+                values_col = config_cols[1]                
+                cols = ["participant_id"] + list(_df[names_col].values)
+                vals = [participant_id] + list(_df[values_col].values)                
+                _df_wide = pd.DataFrame(columns=cols)
+                _df_wide.loc[0] = vals
+                aseg_df = pd.concat([aseg_df,_df_wide], axis=0)
+            
+            except:
+                print(f"Error parsing aseg data for {participant_id}")
+
+        elif config_name.strip() == "aparc":
+            try:
+                _df = get_aparc_stats(participant_stats_dir, config_cols)
+                # transpose it to wideform               
+                names_col = config_cols[0]
+                values_col = config_cols[1]                
+                cols = ["participant_id"] + list(_df["hemi"] + "." + _df[names_col])
+                vals = [participant_id] + list(_df[values_col])                
+                _df_wide = pd.DataFrame(columns=cols)
+                _df_wide.loc[0] = vals
+                aparc_df = pd.concat([aparc_df,_df_wide], axis=0)
+
+            except Exception as e:
+                print(f"Error parsing aparc data for {participant_id} with exception: {e}")
+            
+        else:
+            print(f"Unknown stat config: {config_name}")
+
+# Save configs
+print(f"Saving collated stat tables at: {save_dir}")
+aseg_csv = f"{save_dir}/aseg.csv"
+aparc_csv = f"{save_dir}/aparc.csv"
+
+if len(aseg_df) > 0: 
+    aseg_df.to_csv(aseg_csv, index=None)
+else:
+    print("aseg_df is empty")
+
+if len(aparc_df) > 0:
+    aparc_df.to_csv(aparc_csv, index=None)
+else:
+    print("aparc_df is empty")
+
diff --git a/nipoppy/extractors/freesurfer/sample_FS_configs.json b/nipoppy/extractors/freesurfer/sample_FS_configs.json
new file mode 100644
index 00000000..395c124f
--- /dev/null
+++ b/nipoppy/extractors/freesurfer/sample_FS_configs.json
@@ -0,0 +1,9 @@
+{
+    "version": "v6.0.1",
+    "stat_configs": {
+        "aseg": ["StructName", "Volume_mm3"],
+        "aparc": ["StructName", "ThickAvg"]
+    },
+    "run": "run-1",
+    "space": "fsaverage"
+}
\ No newline at end of file
diff --git a/nipoppy/extractors/maget_brain/prepare_data.py b/nipoppy/extractors/maget_brain/prepare_data.py
index 19c6fbd1..24be4969 100644
--- a/nipoppy/extractors/maget_brain/prepare_data.py
+++ b/nipoppy/extractors/maget_brain/prepare_data.py
@@ -47,8 +47,8 @@ def get_masked_image(img_path, mask_path, masked_img_path):
 fmriprep_version = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"]
 maget_version = global_configs["PROC_PIPELINES"]["maget_brain"]["VERSION"]
 
-fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{fmriprep_version}/output/"
-maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/v{maget_version}/output/"
+fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{fmriprep_version}/output/"
+maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/{maget_version}/output/"
 maget_preproc_T1w_nii_dir = f"{maget_dir}/ses-{session_id}/preproc_T1w_nii/"
 
 # Check / create maget subdirs
diff --git a/nipoppy/sample_run_nipoppy.py b/nipoppy/sample_run_nipoppy.py
index d308eb0c..b210b8ce 100644
--- a/nipoppy/sample_run_nipoppy.py
+++ b/nipoppy/sample_run_nipoppy.py
@@ -80,7 +80,7 @@ def refresh_bids_db(global_configs, session_id, pipeline, ignore_patterns, logge
 # bids_db_path
 FMRIPREP_VERSION = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"]
 output_dir = f"{DATASET_ROOT}/derivatives/"
-fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}"
+fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}"
 
 session_id = args.session_id
 session = f"ses-{session_id}"
diff --git a/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py b/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py
index 558040a8..f9394a78 100644
--- a/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py
+++ b/nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py
@@ -130,10 +130,10 @@ def run(participant_id: str,
 
     bids_dir = f"{DATASET_ROOT}/bids/"
     proc_dir = f"{DATASET_ROOT}/proc/"
-    fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}"
+    fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}"
 
     # Check and create session_dirs for freesurfer since it won't happen automatically
-    fs_dir = f"{output_dir}/freesurfer/v{FS_VERSION}/output/ses-{session_id}"
+    fs_dir = f"{output_dir}/freesurfer/{FS_VERSION}/output/ses-{session_id}"
     Path(fs_dir).mkdir(parents=True, exist_ok=True)
 
     # Copy FS license in the session specific output dir (to be seen by Singularity container)
diff --git a/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py b/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py
index 3e3bb298..04d91f78 100644
--- a/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py
+++ b/nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py
@@ -37,11 +37,11 @@ def run(participant_id, global_configs, session_id, output_dir, modalities, bids
         output_dir = f"{DATASET_ROOT}/derivatives"
 
     # create output dir
-    mriqc_output_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/output/"
+    mriqc_output_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/output/"
     Path(mriqc_output_dir).mkdir(parents=True, exist_ok=True)
 
     # create working dir (intermediate files)
-    mriqc_work_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/work/"
+    mriqc_work_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/work/"
     Path(mriqc_work_dir).mkdir(parents=True, exist_ok=True)
 
     logger.info("Starting mriqc run...")
diff --git a/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py b/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py
index d4bcc6bf..fdff1cf8 100644
--- a/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py
+++ b/nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py
@@ -445,7 +445,7 @@ def run(participant_id, global_configs, session_id, output_dir, use_bids_filter,
 
     ## build paths to files
     bids_dir = f"{DATASET_ROOT}/bids"
-    tractoflow_dir = f"{output_dir}/tractoflow/v{TRACTOFLOW_VERSION}"
+    tractoflow_dir = f"{output_dir}/tractoflow/{TRACTOFLOW_VERSION}"
 
     ## Copy bids_filter.json 
     if use_bids_filter: