Skip to content

Commit

Permalink
removed version (i.e. v) prefix from run scripts to match trackers an…
Browse files Browse the repository at this point in the history
…d refactored FS extractor (nipoppy#188)

* moving sample_global_configs.json and tree.json to the nipoppy subdir

* fixed import paths after refactor

* fixed import paths after refactor

* refactored and cleaned up mriqc run script

* refactored and cleaned up mriqc run script

* Started tracker refactoring to use doughnut

* added a catalog function to identify new proc-participants from bagel and doughnut

* added a catalog function to identify new proc-participants from bagel and doughnut

* added custom (new subjects only) pybids layout (sqldb) generation

* fixed sessions loop and incorporated utils.load_status into run_tracker.py

* fixed import path errors and incorporated tracker.py for status flags

* fixed global var imports and logging levels

* updated sample_run_nipoppy to set log-level and prototypical mriqc run with tracker

* updated bids_tracker to match proc_pipe tracker schema

* minor fixes and comments

* fixed Pandas future warning on setting an item of incompatible dtype

* fixed another Pandas future warning on setting an item of incompatible dtype

* 1) Updated mriqc and fmirpre run scripts to bind complete bids_dir path, 2) added sqldb generation with ignore list for subjects and datatype+acq, 3) updated sample_run_nipoppy.py to show the these two functionalities.

* fixed fmriprep pytest

* fixed codespell

* fixed NM filename pattern

* added functionality to custom map participant_id to bids_id

* fixed minor codespell errors

* fixed errors from previous merge conflict resolution

* updated sample_run_nipoppy to run tractoflow, renamed and moved check_dicom_status, and fixed minor bugs and logging in run_tracker

* fixed session_id typo and optimized tracker runs

* fixed FS utils function

* added acq option to all trackers

* added acq option to all trackers (fixed merge conflict)

* fixed typos and added support for acq tag in mriqc tracker

* fixed tractoflow subject dir path issues and added INCOMPLETE status

* refactored FS extraction using brainload package

* fixed hemisphere naming

* fixed aseg extraction

* remove version (i.e. v) prefix from run scripts to match trackers

* fixed a typo and removed legacy FS extractor script
  • Loading branch information
nikhil153 authored Dec 14, 2023
1 parent f0fa91c commit 9f3d307
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 10 deletions.
4 changes: 2 additions & 2 deletions nipoppy/extractors/fmriprep/run_FC.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,8 @@ def run(participant_id: str,
if output_dir is None:
output_dir = f"{DATASET_ROOT}/derivatives/"

fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{FMRIPREP_VERSION}/output"
DKT_dir = f"{DATASET_ROOT}/derivatives/networks/v0.9.0/output"
fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{FMRIPREP_VERSION}/output"
DKT_dir = f"{DATASET_ROOT}/derivatives/networks/0.9.0/output"
FC_dir = f"{output_dir}/FC"

# assess FC
Expand Down
157 changes: 157 additions & 0 deletions nipoppy/extractors/freesurfer/run_structural_measures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import numpy as np
import pandas as pd
import json
import os
import glob
import argparse
import brainload as bl
from nipoppy.workflow.utils import (
COL_CONV_STATUS,
COL_SESSION_MANIFEST,
COL_BIDS_ID_MANIFEST,
)

# Globals
# Brainload has two separate functions to extract aseg data.
measure_column_names = ["StructName","Structure","Description","Volume_mm3", "unit"]
aseg_cols = ["StructName", "Volume_mm3"]
dkt_cols = ["StructName", "ThickAvg"]

def get_aseg_stats(participant_stats_dir, aseg_cols):
aseg_cols = ["StructName", "Volume_mm3"]
aseg_stats = bl.stat(f'{participant_stats_dir}/aseg.stats')
table_df = pd.DataFrame(aseg_stats["table_data"], columns=aseg_stats["table_column_headers"])[aseg_cols]
measure_df = pd.DataFrame(data=aseg_stats["measures"], columns=measure_column_names)[aseg_cols]
_df = pd.concat([table_df,measure_df],axis=0)
return _df

def get_aparc_stats(participant_stats_dir, aparc_cols, parcel="aparc.DKTatlas"):
hemi = "lh"
stat_file = f"{hemi}.{parcel}.stats"
lh_dkt_stats = bl.stat(f'{participant_stats_dir}/{stat_file}')
lh_df = pd.DataFrame(lh_dkt_stats["table_data"], columns=lh_dkt_stats["table_column_headers"])[aparc_cols]
lh_df["hemi"] = hemi

hemi = "rh"
stat_file = f"{hemi}.{parcel}.stats"
rh_dkt_stats = bl.stat(f'{participant_stats_dir}/rh.aparc.DKTatlas.stats')
rh_df = pd.DataFrame(rh_dkt_stats["table_data"], columns=rh_dkt_stats["table_column_headers"])[aparc_cols]
rh_df["hemi"] = hemi

_df = pd.concat([lh_df,rh_df], axis=0)

return _df

HELPTEXT = """
Script to parse and collate FreeSurfer stats files across subjects
"""

parser = argparse.ArgumentParser(description=HELPTEXT)

parser.add_argument('--global_config', type=str, help='path to global configs for a given nipoppy dataset', required=True)
parser.add_argument('--FS_config', type=str, help='path to freesurfer configs for a given nipoppy dataset', required=True)
parser.add_argument('--participants_list', default=None, help='path to participants list (csv or tsv')
parser.add_argument('--session_id', type=str, help='session id for the participant', required=True)
parser.add_argument('--save_dir', default='./', help='path to save_dir')

args = parser.parse_args()

global_config_file = args.global_config
FS_config_file = args.FS_config
participants_list = args.participants_list
session_id = args.session_id
save_dir = args.save_dir

session = f"ses-{session_id}"

# Read global configs
with open(global_config_file, 'r') as f:
global_configs = json.load(f)

# Read FS configs
with open(FS_config_file, 'r') as f:
FS_configs = json.load(f)

DATASET_ROOT = global_configs["DATASET_ROOT"]
FS_version = FS_configs["version"]
stat_configs = FS_configs["stat_configs"]
stat_config_names = stat_configs.keys()

print(f"Using dataset root: {DATASET_ROOT} and FreeSurfer version: {FS_version}")
print(f"Using stat configs: {stat_config_names}")

if participants_list == None:
# use doughnut
doughnut_file = f"{DATASET_ROOT}/scratch/raw_dicom/doughnut.csv"
doughnut_df = pd.read_csv(doughnut_file)
doughnut_df[COL_CONV_STATUS] = doughnut_df[COL_CONV_STATUS].astype(bool)
bids_participants = doughnut_df[(doughnut_df[COL_SESSION_MANIFEST]==session) & (doughnut_df[COL_CONV_STATUS])][COL_BIDS_ID_MANIFEST].unique()
n_bids_participants = len(bids_participants)
print(f"Running all {n_bids_participants} participants in doughnut with session: {session}")
else:
# use custom list
bids_participants = list(pd.read_csv(participants_list)["participant_id"])

n_bids_participants = len(bids_participants)
print(f"Running {n_bids_participants} participants from the list with session: {session}")


# Extract stats for each participant
fs_output_dir = f"{DATASET_ROOT}/derivatives/freesurfer/{FS_version}/output/{session}/"

aseg_df = pd.DataFrame()
aparc_df = pd.DataFrame()
for participant_id in bids_participants:
participant_stats_dir = f"{fs_output_dir}{participant_id}/stats/"
print(f"Extracting stats for participant: {participant_id}")

for config_name, config_cols in stat_configs.items():
print(f"Extracting data for config: {config_name}")
if config_name.strip() == "aseg":
try:
_df = get_aseg_stats(participant_stats_dir, config_cols)
# transpose it to wideform
names_col = config_cols[0]
values_col = config_cols[1]
cols = ["participant_id"] + list(_df[names_col].values)
vals = [participant_id] + list(_df[values_col].values)
_df_wide = pd.DataFrame(columns=cols)
_df_wide.loc[0] = vals
aseg_df = pd.concat([aseg_df,_df_wide], axis=0)

except:
print(f"Error parsing aseg data for {participant_id}")

elif config_name.strip() == "aparc":
try:
_df = get_aparc_stats(participant_stats_dir, config_cols)
# transpose it to wideform
names_col = config_cols[0]
values_col = config_cols[1]
cols = ["participant_id"] + list(_df["hemi"] + "." + _df[names_col])
vals = [participant_id] + list(_df[values_col])
_df_wide = pd.DataFrame(columns=cols)
_df_wide.loc[0] = vals
aparc_df = pd.concat([aparc_df,_df_wide], axis=0)

except Exception as e:
print(f"Error parsing aparc data for {participant_id} with exception: {e}")

else:
print(f"Unknown stat config: {config_name}")

# Save configs
print(f"Saving collated stat tables at: {save_dir}")
aseg_csv = f"{save_dir}/aseg.csv"
aparc_csv = f"{save_dir}/aparc.csv"

if len(aseg_df) > 0:
aseg_df.to_csv(aseg_csv, index=None)
else:
print("aseg_df is empty")

if len(aparc_df) > 0:
aparc_df.to_csv(aparc_csv, index=None)
else:
print("aparc_df is empty")

9 changes: 9 additions & 0 deletions nipoppy/extractors/freesurfer/sample_FS_configs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"version": "v6.0.1",
"stat_configs": {
"aseg": ["StructName", "Volume_mm3"],
"aparc": ["StructName", "ThickAvg"]
},
"run": "run-1",
"space": "fsaverage"
}
4 changes: 2 additions & 2 deletions nipoppy/extractors/maget_brain/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ def get_masked_image(img_path, mask_path, masked_img_path):
fmriprep_version = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"]
maget_version = global_configs["PROC_PIPELINES"]["maget_brain"]["VERSION"]

fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/v{fmriprep_version}/output/"
maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/v{maget_version}/output/"
fmriprep_dir = f"{DATASET_ROOT}/derivatives/fmriprep/{fmriprep_version}/output/"
maget_dir = f"{DATASET_ROOT}/derivatives/maget_brain/{maget_version}/output/"
maget_preproc_T1w_nii_dir = f"{maget_dir}/ses-{session_id}/preproc_T1w_nii/"

# Check / create maget subdirs
Expand Down
2 changes: 1 addition & 1 deletion nipoppy/sample_run_nipoppy.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def refresh_bids_db(global_configs, session_id, pipeline, ignore_patterns, logge
# bids_db_path
FMRIPREP_VERSION = global_configs["PROC_PIPELINES"]["fmriprep"]["VERSION"]
output_dir = f"{DATASET_ROOT}/derivatives/"
fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}"
fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}"

session_id = args.session_id
session = f"ses-{session_id}"
Expand Down
4 changes: 2 additions & 2 deletions nipoppy/workflow/proc_pipe/fmriprep/run_fmriprep.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,10 @@ def run(participant_id: str,

bids_dir = f"{DATASET_ROOT}/bids/"
proc_dir = f"{DATASET_ROOT}/proc/"
fmriprep_dir = f"{output_dir}/fmriprep/v{FMRIPREP_VERSION}"
fmriprep_dir = f"{output_dir}/fmriprep/{FMRIPREP_VERSION}"

# Check and create session_dirs for freesurfer since it won't happen automatically
fs_dir = f"{output_dir}/freesurfer/v{FS_VERSION}/output/ses-{session_id}"
fs_dir = f"{output_dir}/freesurfer/{FS_VERSION}/output/ses-{session_id}"
Path(fs_dir).mkdir(parents=True, exist_ok=True)

# Copy FS license in the session specific output dir (to be seen by Singularity container)
Expand Down
4 changes: 2 additions & 2 deletions nipoppy/workflow/proc_pipe/mriqc/run_mriqc.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def run(participant_id, global_configs, session_id, output_dir, modalities, bids
output_dir = f"{DATASET_ROOT}/derivatives"

# create output dir
mriqc_output_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/output/"
mriqc_output_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/output/"
Path(mriqc_output_dir).mkdir(parents=True, exist_ok=True)

# create working dir (intermediate files)
mriqc_work_dir = f"{output_dir}/mriqc/v{MRIQC_VERSION}/work/"
mriqc_work_dir = f"{output_dir}/mriqc/{MRIQC_VERSION}/work/"
Path(mriqc_work_dir).mkdir(parents=True, exist_ok=True)

logger.info("Starting mriqc run...")
Expand Down
2 changes: 1 addition & 1 deletion nipoppy/workflow/proc_pipe/tractoflow/run_tractoflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,7 @@ def run(participant_id, global_configs, session_id, output_dir, use_bids_filter,

## build paths to files
bids_dir = f"{DATASET_ROOT}/bids"
tractoflow_dir = f"{output_dir}/tractoflow/v{TRACTOFLOW_VERSION}"
tractoflow_dir = f"{output_dir}/tractoflow/{TRACTOFLOW_VERSION}"

## Copy bids_filter.json
if use_bids_filter:
Expand Down

0 comments on commit 9f3d307

Please sign in to comment.