+
+ {# Carpet/line plot for pre- and post-regression, concatenate across runs. #}
+ {% include "concatenated_task_static_plots.html.jinja" %}
+
{% for run_dict in task_files %}
- {% set task = run_dict["task"] %}
- {% set run = run_dict["run"] %}
+ {% set key = run_dict["key"] %}
{% set registration_files = run_dict["registration_files"] %}
{% set registration_titles = run_dict["registration_titles"] %}
{% set bold = run_dict["bold"] %}
@@ -38,7 +42,7 @@
Add the task name for the next few rows.
#}
-
task-{{ task }} run-{{ run }}:
+
{{ key }}:
{# Full rows for registration files #}
diff --git a/xcp_d/interfaces/execsummary.py b/xcp_d/interfaces/execsummary.py
index 6f8bb3c42..2dccd93c6 100644
--- a/xcp_d/interfaces/execsummary.py
+++ b/xcp_d/interfaces/execsummary.py
@@ -4,6 +4,8 @@
import re
from pathlib import Path
+import numpy as np
+import pandas as pd
from bids.layout import BIDSLayout, Query
from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader, Markup
@@ -144,6 +146,24 @@ def collect_inputs(self):
self.structural_files_ = structural_files
+ # Collect figures for concatenated resting-state data (if any)
+ concatenated_rest_files = {}
+
+ query = {
+ "subject": self.subject_id,
+ "task": "rest",
+ "run": Query.NONE,
+ "desc": "preprocESQC",
+ "suffix": "bold",
+ "extension": ".svg",
+ }
+ concatenated_rest_files["preproc_carpet"] = self._get_bids_file(query)
+
+ query["desc"] = "postprocESQC"
+ concatenated_rest_files["postproc_carpet"] = self._get_bids_file(query)
+
+ self.concatenated_rest_files_ = concatenated_rest_files
+
# Determine the unique entity-sets for the task data.
postproc_files = self.layout.get(
subject=self.subject_id,
@@ -166,34 +186,42 @@ def collect_inputs(self):
task_entity_sets = []
for entity_set in unique_entity_sets:
for entity in ORDERING:
- entity_set[entity] = entity_set.get(entity, Query.NONE)
+ entity_set[entity] = entity_set.get(entity, np.nan)
task_entity_sets.append(entity_set)
- concatenated_rest_files = {}
+ # Now sort the entity sets by each entity
+ task_entity_sets = pd.DataFrame(task_entity_sets)
+ task_entity_sets = task_entity_sets.sort_values(by=task_entity_sets.columns.tolist())
+ task_entity_sets = task_entity_sets.fillna(Query.NONE)
- query = {
- "subject": self.subject_id,
- "task": "rest",
- "run": Query.NONE,
- "desc": "preprocESQC",
- "suffix": "bold",
- "extension": ".svg",
- }
- concatenated_rest_files["preproc_carpet"] = self._get_bids_file(query)
-
- query["desc"] = "postcarpetplot"
- concatenated_rest_files["postproc_carpet"] = self._get_bids_file(query)
+ # Extract entities with variability
+ # This lets us name the sections based on multiple entities (not just task and run)
+ nunique = task_entity_sets.nunique()
+ nunique.loc["task"] = 2 # ensure we keep task
+ nunique.loc["run"] = 2 # ensure we keep run
+ cols_to_drop = nunique[nunique == 1].index
+ task_entity_namer = task_entity_sets.drop(cols_to_drop, axis=1)
- self.concatenated_rest_files_ = concatenated_rest_files
+ # Convert back to dictionary
+ task_entity_sets = task_entity_sets.to_dict(orient="records")
+ task_entity_namer = task_entity_namer.to_dict(orient="records")
task_files = []
- for task_entity_set in task_entity_sets:
- task_file_figures = task_entity_set.copy()
- task_file_figures[
- "key"
- ] = f"task-{task_entity_set['task']}_run-{task_entity_set.get('run', 0)}"
+ for i_set, task_entity_set in enumerate(task_entity_sets):
+ task_file_figures = {}
+
+ # Convert any floats in the name to ints
+ temp_dict = {}
+ for k, v in task_entity_namer[i_set].items():
+ try:
+ temp_dict[k] = int(v)
+ except (ValueError, TypeError):
+ temp_dict[k] = v
+
+ # String used for subsection headers
+ task_file_figures["key"] = " ".join([f"{k}-{v}" for k, v in temp_dict.items()])
query = {
"subject": self.subject_id,
@@ -222,10 +250,12 @@ def collect_inputs(self):
task_file_figures["registration_files"].append(found_file)
- task_files.append(task_file_figures)
+ # If there no mean BOLD figure, then the "run" was made by the concatenation workflow.
+ # Skip the concatenated resting-state scan, since it has its own section.
+ if query["task"] == "rest" and not task_file_figures["bold"]:
+ continue
- # Sort the files by the desired key
- task_files = sorted(task_files, key=lambda d: d["key"])
+ task_files.append(task_file_figures)
self.task_files_ = task_files
diff --git a/xcp_d/interfaces/plotting.py b/xcp_d/interfaces/plotting.py
index e0bb072d3..0d1d39322 100644
--- a/xcp_d/interfaces/plotting.py
+++ b/xcp_d/interfaces/plotting.py
@@ -277,7 +277,7 @@ class QCPlots(SimpleInterface):
output_spec = _QCPlotsOutputSpec
def _run_interface(self, runtime):
- # Load confound matrix and load motion with motion filtering
+ # Load confound matrix and load motion without motion filtering
confounds_df = pd.read_table(self.inputs.fmriprep_confounds_file)
preproc_motion_df = load_motion(
confounds_df.copy(),
@@ -297,6 +297,7 @@ def _run_interface(self, runtime):
censoring_df = pd.read_table(self.inputs.temporal_mask)
tmask_arr = censoring_df["framewise_displacement"].values
num_censored_volumes = int(tmask_arr.sum())
+ num_retained_volumes = int((tmask_arr == 0).sum())
# Apply temporal mask to interpolated/full data
rmsd_censored = rmsd[tmask_arr == 0]
@@ -382,40 +383,57 @@ def _run_interface(self, runtime):
# Calculate QC measures
mean_fd = np.mean(preproc_fd_timeseries)
- mean_rms = np.nanmean(rmsd_censored) # first value can be NaN if no dummy scans
+ mean_fd_post_censoring = np.mean(postproc_fd_timeseries)
+ mean_relative_rms = np.nanmean(rmsd_censored) # first value can be NaN if no dummy scans
mean_dvars_before_processing = np.mean(dvars_before_processing)
mean_dvars_after_processing = np.mean(dvars_after_processing)
- motionDVCorrInit = np.corrcoef(preproc_fd_timeseries, dvars_before_processing)[0][1]
- motionDVCorrFinal = np.corrcoef(postproc_fd_timeseries, dvars_after_processing)[0][1]
+ fd_dvars_correlation_initial = np.corrcoef(preproc_fd_timeseries, dvars_before_processing)[
+ 0, 1
+ ]
+ fd_dvars_correlation_final = np.corrcoef(postproc_fd_timeseries, dvars_after_processing)[
+ 0, 1
+ ]
rmsd_max_value = np.nanmax(rmsd_censored)
# A summary of all the values
qc_values_dict.update(
{
- "meanFD": [mean_fd],
- "relMeansRMSMotion": [mean_rms],
- "relMaxRMSMotion": [rmsd_max_value],
- "meanDVInit": [mean_dvars_before_processing],
- "meanDVFinal": [mean_dvars_after_processing],
+ "mean_fd": [mean_fd],
+ "mean_fd_post_censoring": [mean_fd_post_censoring],
+ "mean_relative_rms": [mean_relative_rms],
+ "max_relative_rms": [rmsd_max_value],
+ "mean_dvars_initial": [mean_dvars_before_processing],
+ "mean_dvars_final": [mean_dvars_after_processing],
+ "num_dummy_volumes": [dummy_scans],
"num_censored_volumes": [num_censored_volumes],
- "nVolsRemoved": [dummy_scans],
- "motionDVCorrInit": [motionDVCorrInit],
- "motionDVCorrFinal": [motionDVCorrFinal],
+ "num_retained_volumes": [num_retained_volumes],
+ "fd_dvars_correlation_initial": [fd_dvars_correlation_initial],
+ "fd_dvars_correlation_final": [fd_dvars_correlation_final],
}
)
qc_metadata = {
- "meanFD": {
+ "mean_fd": {
"LongName": "Mean Framewise Displacement",
"Description": (
"Average framewise displacement without any motion parameter filtering. "
"This value includes high-motion outliers, but not dummy volumes. "
"FD is calculated according to the Power definition."
),
- "Units": "mm",
+ "Units": "mm / volume",
+ "Term URL": "https://doi.org/10.1016/j.neuroimage.2011.10.018",
+ },
+ "mean_fd_post_censoring": {
+ "LongName": "Mean Framewise Displacement After Censoring",
+ "Description": (
+ "Average framewise displacement without any motion parameter filtering. "
+ "This value does not include high-motion outliers or dummy volumes. "
+ "FD is calculated according to the Power definition."
+ ),
+ "Units": "mm / volume",
"Term URL": "https://doi.org/10.1016/j.neuroimage.2011.10.018",
},
- "relMeansRMSMotion": {
+ "mean_relative_rms": {
"LongName": "Mean Relative Root Mean Squared",
"Description": (
"Average relative root mean squared calculated from motion parameters, "
@@ -424,7 +442,7 @@ def _run_interface(self, runtime):
),
"Units": "arbitrary",
},
- "relMaxRMSMotion": {
+ "max_relative_rms": {
"LongName": "Maximum Relative Root Mean Squared",
"Description": (
"Maximum relative root mean squared calculated from motion parameters, "
@@ -433,7 +451,7 @@ def _run_interface(self, runtime):
),
"Units": "arbitrary",
},
- "meanDVInit": {
+ "mean_dvars_initial": {
"LongName": "Mean DVARS Before Postprocessing",
"Description": (
"Average DVARS (temporal derivative of root mean squared variance over "
@@ -441,7 +459,7 @@ def _run_interface(self, runtime):
),
"TermURL": "https://doi.org/10.1016/j.neuroimage.2011.02.073",
},
- "meanDVFinal": {
+ "mean_dvars_final": {
"LongName": "Mean DVARS After Postprocessing",
"Description": (
"Average DVARS (temporal derivative of root mean squared variance over "
@@ -449,6 +467,12 @@ def _run_interface(self, runtime):
),
"TermURL": "https://doi.org/10.1016/j.neuroimage.2011.02.073",
},
+ "num_dummy_volumes": {
+ "LongName": "Number of Dummy Volumes",
+ "Description": (
+ "The number of non-steady state volumes removed from the time series by XCP-D."
+ ),
+ },
"num_censored_volumes": {
"LongName": "Number of Censored Volumes",
"Description": (
@@ -456,13 +480,14 @@ def _run_interface(self, runtime):
"This does not include dummy volumes."
),
},
- "nVolsRemoved": {
- "LongName": "Number of Dummy Volumes",
+ "num_retained_volumes": {
+ "LongName": "Number of Retained Volumes",
"Description": (
- "The number of non-steady state volumes removed from the time series by XCP-D."
+ "The number of volumes retained in the denoised dataset. "
+ "This does not include dummy volumes or high-motion outliers."
),
},
- "motionDVCorrInit": {
+ "fd_dvars_correlation_initial": {
"LongName": "FD-DVARS Correlation Before Postprocessing",
"Description": (
"The Pearson correlation coefficient between framewise displacement and DVARS "
@@ -470,7 +495,7 @@ def _run_interface(self, runtime):
"after removal of dummy volumes, but before removal of high-motion outliers."
),
},
- "motionDVCorrFinal": {
+ "fd_dvars_correlation_final": {
"LongName": "FD-DVARS Correlation After Postprocessing",
"Description": (
"The Pearson correlation coefficient between framewise displacement and DVARS "
diff --git a/xcp_d/interfaces/report.py b/xcp_d/interfaces/report.py
index 54ed64bf4..e960cac8f 100644
--- a/xcp_d/interfaces/report.py
+++ b/xcp_d/interfaces/report.py
@@ -30,12 +30,13 @@
\t\t
\t\t\t- BOLD volume space: {space}
\t\t\t- Repetition Time (TR): {TR:.03g}
-\t\t\t- Mean Framewise Displacement: {meanFD}
-\t\t\t- Mean Relative RMS Motion: {meanRMS}
-\t\t\t- Max Relative RMS Motion: {maxRMS}
+\t\t\t- Mean Framewise Displacement: {mean_fd}
+\t\t\t- Mean Relative RMS Motion: {mean_relative_rms}
+\t\t\t- Max Relative RMS Motion: {max_relative_rms}
\t\t\t- DVARS Before and After Processing : {dvars_before_after}
-\t\t\t- Correlation between DVARS and FD Before and After Processing : {corrfddv}
-\t\t\t- Number of Volumes Censored : {volcensored}
+\t\t\t- Correlation between DVARS and FD Before and After Processing :
+{fd_dvars_correlation}
+\t\t\t- Number of Volumes Censored : {num_vols_censored}
\t\t
"""
@@ -145,25 +146,28 @@ class FunctionalSummary(SummaryInterface):
def _generate_segment(self):
space = get_entity(self.inputs.bold_file, "space")
qcfile = pd.read_csv(self.inputs.qc_file)
- meanFD = str(round(qcfile["meanFD"][0], 4))
- meanRMS = str(round(qcfile["relMeansRMSMotion"][0], 4))
- maxRMS = str(round(qcfile["relMaxRMSMotion"][0], 4))
- dvars = f"{round(qcfile['meanDVInit'][0], 4)}, {round(qcfile['meanDVFinal'][0], 4)}"
+ mean_fd = str(round(qcfile["mean_fd"][0], 4))
+ mean_relative_rms = str(round(qcfile["mean_relative_rms"][0], 4))
+ max_relative_rms = str(round(qcfile["max_relative_rms"][0], 4))
+ dvars = (
+ f"{round(qcfile['mean_dvars_initial'][0], 4)}, "
+ f"{round(qcfile['mean_dvars_final'][0], 4)}"
+ )
fd_dvars_correlation = (
- f"{round(qcfile['motionDVCorrInit'][0], 4)}, "
- f"{round(qcfile['motionDVCorrFinal'][0], 4)}"
+ f"{round(qcfile['fd_dvars_correlation_initial'][0], 4)}, "
+ f"{round(qcfile['fd_dvars_correlation_final'][0], 4)}"
)
num_vols_censored = str(round(qcfile["num_censored_volumes"][0], 4))
return QC_TEMPLATE.format(
space=space,
TR=self.inputs.TR,
- meanFD=meanFD,
- meanRMS=meanRMS,
- maxRMS=maxRMS,
+ mean_fd=mean_fd,
+ mean_relative_rms=mean_relative_rms,
+ max_relative_rms=max_relative_rms,
dvars_before_after=dvars,
- corrfddv=fd_dvars_correlation,
- volcensored=num_vols_censored,
+ fd_dvars_correlation=fd_dvars_correlation,
+ num_vols_censored=num_vols_censored,
)
diff --git a/xcp_d/tests/test_cli.py b/xcp_d/tests/test_cli.py
index d919d8c7d..35a52a006 100644
--- a/xcp_d/tests/test_cli.py
+++ b/xcp_d/tests/test_cli.py
@@ -5,6 +5,7 @@
import numpy as np
import pandas as pd
import pytest
+from nipype import logging
from pkg_resources import resource_filename as pkgrf
from xcp_d.cli import combineqc, parser_utils, run
@@ -17,6 +18,8 @@
run_command,
)
+LOGGER = logging.getLogger("nipype.utils")
+
@pytest.mark.ds001419_nifti
def test_ds001419_nifti(data_dir, output_dir, working_dir):
@@ -210,6 +213,17 @@ def test_pnc_cifti(data_dir, output_dir, working_dir):
test_data_dir = get_test_data_path()
filter_file = os.path.join(test_data_dir, "pnc_cifti_filter.json")
+ # Make the last few volumes outliers to check https://github.com/PennLINC/xcp_d/issues/949
+ motion_file = os.path.join(
+ dataset_dir,
+ "sub-1648798153/ses-PNC1/func/"
+ "sub-1648798153_ses-PNC1_task-rest_acq-singleband_desc-confounds_timeseries.tsv",
+ )
+ motion_df = pd.read_table(motion_file)
+ motion_df.loc[56:, "trans_x"] = np.arange(1, 5) * 20
+ motion_df.to_csv(motion_file, sep="\t", index=False)
+ LOGGER.warning(f"Overwrote confounds file at {motion_file}.")
+
parameters = [
dataset_dir,
out_dir,
@@ -218,6 +232,7 @@ def test_pnc_cifti(data_dir, output_dir, working_dir):
"--nthreads=2",
"--omp-nthreads=2",
f"--bids-filter-file={filter_file}",
+ "--min-time=60",
"--nuisance-regressors=acompcor_gsr",
"--despike",
"--head_radius=40",
diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py
index 1c31851e1..f7560f5fc 100644
--- a/xcp_d/tests/test_utils_bids.py
+++ b/xcp_d/tests/test_utils_bids.py
@@ -240,3 +240,52 @@ def test_get_entity(datasets):
)
with pytest.raises(ValueError, match="Unknown space"):
xbids.get_entity(fname, "space")
+
+
+def test_group_across_runs():
+ """Test group_across_runs."""
+ in_files = [
+ "/path/sub-01_task-axcpt_run-03_bold.nii.gz",
+ "/path/sub-01_task-rest_run-03_bold.nii.gz",
+ "/path/sub-01_task-rest_run-01_bold.nii.gz",
+ "/path/sub-01_task-axcpt_run-02_bold.nii.gz",
+ "/path/sub-01_task-rest_run-02_bold.nii.gz",
+ "/path/sub-01_task-axcpt_run-01_bold.nii.gz",
+ ]
+ grouped_files = xbids.group_across_runs(in_files)
+ assert isinstance(grouped_files, list)
+ assert len(grouped_files[0]) == 3
+ assert grouped_files[0] == [
+ "/path/sub-01_task-axcpt_run-01_bold.nii.gz",
+ "/path/sub-01_task-axcpt_run-02_bold.nii.gz",
+ "/path/sub-01_task-axcpt_run-03_bold.nii.gz",
+ ]
+ assert len(grouped_files[1]) == 3
+ assert grouped_files[1] == [
+ "/path/sub-01_task-rest_run-01_bold.nii.gz",
+ "/path/sub-01_task-rest_run-02_bold.nii.gz",
+ "/path/sub-01_task-rest_run-03_bold.nii.gz",
+ ]
+
+ in_files = [
+ "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
+ "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
+ "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
+ ]
+ grouped_files = xbids.group_across_runs(in_files)
+ assert isinstance(grouped_files, list)
+ assert len(grouped_files[0]) == 2
+ assert grouped_files[0] == [
+ "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
+ "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
+ ]
+ assert len(grouped_files[1]) == 4
+ assert grouped_files[1] == [
+ "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
+ "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
+ ]
diff --git a/xcp_d/tests/test_utils_utils.py b/xcp_d/tests/test_utils_utils.py
index ddac086d2..33043d09a 100644
--- a/xcp_d/tests/test_utils_utils.py
+++ b/xcp_d/tests/test_utils_utils.py
@@ -152,6 +152,37 @@ def test_denoise_with_nilearn(ds001419_data, tmp_path_factory):
assert uncensored_denoised_bold.shape == (n_volumes, n_voxels)
assert interpolated_filtered_bold.shape == (n_volumes, n_voxels)
+ # Ensure that interpolation + filtering doesn't cause problems at beginning/end of scan
+ # Create an updated censoring file with outliers at first and last two volumes
+ censoring_df = confounds_df[["framewise_displacement"]]
+ censoring_df["framewise_displacement"] = False
+ censoring_df.iloc[:2]["framewise_displacement"] = True
+ censoring_df.iloc[-2:]["framewise_displacement"] = True
+ n_censored_volumes = censoring_df["framewise_displacement"].sum()
+ assert n_censored_volumes == 4
+ temporal_mask = os.path.join(tmpdir, "censoring.tsv")
+ censoring_df.to_csv(temporal_mask, sep="\t", index=False)
+
+ # Run without denoising or filtering
+ _, interpolated_filtered_bold = utils.denoise_with_nilearn(
+ preprocessed_bold=preprocessed_bold_arr,
+ confounds_file=None,
+ temporal_mask=temporal_mask,
+ low_pass=None,
+ high_pass=None,
+ filter_order=0,
+ TR=TR,
+ )
+ assert interpolated_filtered_bold.shape == (n_volumes, n_voxels)
+ # The first two volumes should be the same as the third (first non-outlier) volume
+ assert np.array_equal(interpolated_filtered_bold[0, :], interpolated_filtered_bold[2, :])
+ assert np.array_equal(interpolated_filtered_bold[1, :], interpolated_filtered_bold[2, :])
+ assert not np.array_equal(interpolated_filtered_bold[2, :], interpolated_filtered_bold[3, :])
+ # The last volume should be the same as the third-to-last (last non-outlier) volume
+ assert np.array_equal(interpolated_filtered_bold[-1, :], interpolated_filtered_bold[-3, :])
+ assert np.array_equal(interpolated_filtered_bold[-2, :], interpolated_filtered_bold[-3, :])
+ assert not np.array_equal(interpolated_filtered_bold[-3, :], interpolated_filtered_bold[-4, :])
+
def test_list_to_str():
"""Test the list_to_str function."""
diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py
index 5e38fe999..64613dd90 100644
--- a/xcp_d/utils/bids.py
+++ b/xcp_d/utils/bids.py
@@ -874,7 +874,11 @@ def get_entity(filename, entity):
def group_across_runs(in_files):
- """Group preprocessed BOLD files by unique sets of entities, ignoring run.
+ """Group preprocessed BOLD files by unique sets of entities, ignoring run and direction.
+
+ We only ignore direction for the sake of HCP.
+ This may lead to small problems for non-HCP datasets that differentiate scans based on
+ both run and direction.
Parameters
----------
@@ -891,20 +895,31 @@ def group_across_runs(in_files):
# First, extract run information and sort the input files by the runs,
# so that any cases where files are not already in ascending run order get fixed.
- run_numbers = []
+ run_numbers, directions = [], []
for in_file in in_files:
run = get_entity(in_file, "run")
if run is None:
run = 0
+ direction = get_entity(in_file, "dir")
+ if direction is None:
+ direction = "none"
+
run_numbers.append(int(run))
+ directions.append(direction)
+
+ # Combine the three lists into a list of tuples
+ combined_data = list(zip(run_numbers, directions, in_files))
+
+ # Sort the list of tuples first by run and then by direction
+ sorted_data = sorted(combined_data, key=lambda x: (x[0], x[1], x[2]))
- # Sort the files by the run numbers.
- zipped_pairs = zip(run_numbers, in_files)
- sorted_in_files = [x for _, x in sorted(zipped_pairs)]
+ # Sort the file list
+ sorted_in_files = [item[2] for item in sorted_data]
- # Extract the unique sets of entities (i.e., the filename, minus the run entity).
+ # Extract the unique sets of entities (i.e., the filename, minus the run and dir entities).
unique_filenames = [re.sub("_run-[0-9]+_", "_", os.path.basename(f)) for f in sorted_in_files]
+ unique_filenames = [re.sub("_dir-[0-9a-zA-Z]+_", "_", f) for f in unique_filenames]
# Assign each in_file to a group of files with the same entities, except run.
out_files, grouped_unique_filenames = [], []
diff --git a/xcp_d/utils/hcp2fmriprep.py b/xcp_d/utils/hcp2fmriprep.py
index a11ae1829..21db0fe0e 100644
--- a/xcp_d/utils/hcp2fmriprep.py
+++ b/xcp_d/utils/hcp2fmriprep.py
@@ -3,6 +3,7 @@
"""Functions for converting HCP-format data to fMRIPrep format."""
import glob
import os
+import re
import nibabel as nb
import pandas as pd
@@ -199,8 +200,12 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
for base_task_name in task_names:
LOGGER.info(f"Processing {base_task_name}")
# NOTE: What is the first element in the folder name?
- _, task_id, dir_id = base_task_name.split("_")
+ _, base_task_id, dir_id = base_task_name.split("_")
+ match = re.match(r"([A-Za-z0-9]+[a-zA-Z]+)(\d+)$", base_task_id)
+ task_id = match.group(1).lower()
+ run_id = int(match.group(2))
task_ent = f"task-{task_id}"
+ run_ent = f"run-{run_id}"
dir_ent = f"dir-{dir_id}"
task_dir_orig = os.path.join(func_dir_orig, base_task_name)
@@ -209,7 +214,10 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
sbref_orig = os.path.join(task_dir_orig, "SBRef_dc.nii.gz")
boldref_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_boldref.nii.gz",
+ (
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}_"
+ f"boldref.nii.gz"
+ ),
)
copy_dictionary[sbref_orig] = [boldref_fmriprep]
@@ -217,7 +225,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
bold_nifti_fmriprep = os.path.join(
func_dir_fmriprep,
(
- f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_"
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}_"
"desc-preproc_bold.nii.gz"
),
)
@@ -229,20 +237,20 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
)
bold_cifti_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_space-fsLR_den-91k_bold.dtseries.nii",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_space-fsLR_den-91k_bold.dtseries.nii",
)
copy_dictionary[bold_cifti_orig] = [bold_cifti_fmriprep]
# More identity transforms
native_to_t1w_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_from-scanner_to-T1w_mode-image_xfm.txt",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_from-scanner_to-T1w_mode-image_xfm.txt",
)
copy_dictionary[identity_xfm].append(native_to_t1w_fmriprep)
t1w_to_native_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_from-T1w_to-scanner_mode-image_xfm.txt",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_from-T1w_to-scanner_mode-image_xfm.txt",
)
copy_dictionary[identity_xfm].append(t1w_to_native_fmriprep)
@@ -253,7 +261,10 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
}
bold_nifti_json_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_desc-preproc_bold.json",
+ (
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}"
+ "_desc-preproc_bold.json"
+ ),
)
write_json(bold_metadata, bold_nifti_json_fmriprep)
@@ -268,7 +279,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
)
bold_cifti_json_fmriprep = os.path.join(
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}_space-fsLR_den-91k_bold.dtseries.json",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_space-fsLR_den-91k_bold.dtseries.json",
)
write_json(bold_metadata, bold_cifti_json_fmriprep)
@@ -276,7 +287,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
collect_confounds(
task_dir_orig,
func_dir_fmriprep,
- f"{subses_ents}_{task_ent}_{dir_ent}",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}",
work_dir=work_dir,
bold_file=bold_nifti_orig,
brainmask_file=os.path.join(task_dir_orig, "brainmask_fs.2.nii.gz"),
@@ -289,7 +300,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent):
os.makedirs(figdir, exist_ok=True)
bbref_fig_fmriprep = os.path.join(
figdir,
- f"{subses_ents}_{task_ent}_{dir_ent}_desc-bbregister_bold.svg",
+ f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_desc-bbregister_bold.svg",
)
t1w = os.path.join(anat_dir_orig, "T1w.nii.gz")
ribbon = os.path.join(anat_dir_orig, "ribbon.nii.gz")
diff --git a/xcp_d/utils/qcmetrics.py b/xcp_d/utils/qcmetrics.py
index 842996ff0..9619b8b1a 100644
--- a/xcp_d/utils/qcmetrics.py
+++ b/xcp_d/utils/qcmetrics.py
@@ -46,15 +46,15 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
template_mask_arr = nb.load(template_mask).get_fdata()
reg_qc = {
- "coregDice": [dice(bold2t1w_mask_arr, t1w_mask_arr)],
- "coregPearson": [pearson(bold2t1w_mask_arr, t1w_mask_arr)],
- "coregCoverage": [overlap(bold2t1w_mask_arr, t1w_mask_arr)],
- "normDice": [dice(bold2template_mask_arr, template_mask_arr)],
- "normPearson": [pearson(bold2template_mask_arr, template_mask_arr)],
- "normCoverage": [overlap(bold2template_mask_arr, template_mask_arr)],
+ "coreg_dice": [dice(bold2t1w_mask_arr, t1w_mask_arr)],
+ "coreg_correlation": [pearson(bold2t1w_mask_arr, t1w_mask_arr)],
+ "coreg_overlap": [overlap(bold2t1w_mask_arr, t1w_mask_arr)],
+ "norm_dice": [dice(bold2template_mask_arr, template_mask_arr)],
+ "norm_correlation": [pearson(bold2template_mask_arr, template_mask_arr)],
+ "norm_overlap": [overlap(bold2template_mask_arr, template_mask_arr)],
}
qc_metadata = {
- "coregDice": {
+ "coreg_dice": {
"LongName": "Coregistration Sørensen-Dice Coefficient",
"Description": (
"The Sørensen-Dice coefficient calculated between the binary brain masks from the "
@@ -64,7 +64,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
),
"Term URL": "https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient",
},
- "coregPearson": {
+ "coreg_correlation": {
"LongName": "Coregistration Pearson Correlation",
"Description": (
"The Pearson correlation coefficient calculated between the binary brain masks "
@@ -74,7 +74,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
),
"Term URL": "https://en.wikipedia.org/wiki/Pearson_correlation_coefficient",
},
- "coregCoverage": {
+ "coreg_overlap": {
"LongName": "Coregistration Coverage Metric",
"Description": (
"The Szymkiewicz-Simpson overlap coefficient calculated between the binary brain "
@@ -83,7 +83,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
),
"Term URL": "https://en.wikipedia.org/wiki/Overlap_coefficient",
},
- "normDice": {
+ "norm_dice": {
"LongName": "Normalization Sørensen-Dice Coefficient",
"Description": (
"The Sørensen-Dice coefficient calculated between the binary brain masks from the "
@@ -93,7 +93,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
),
"Term URL": "https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient",
},
- "normPearson": {
+ "norm_correlation": {
"LongName": "Normalization Pearson Correlation",
"Description": (
"The Pearson correlation coefficient calculated between the binary brain masks "
@@ -103,7 +103,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t
),
"Term URL": "https://en.wikipedia.org/wiki/Pearson_correlation_coefficient",
},
- "normCoverage": {
+ "norm_overlap": {
"LongName": "Normalization Overlap Coefficient",
"Description": (
"The Szymkiewicz-Simpson overlap coefficient calculated between the binary brain "
diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py
index 06397ab3c..a1662b6c0 100644
--- a/xcp_d/utils/utils.py
+++ b/xcp_d/utils/utils.py
@@ -464,6 +464,37 @@ def denoise_with_nilearn(
t_r=TR,
)
+ # Replace any high-motion volumes at the beginning or end of the run with the closest
+ # low-motion volume's data.
+ outlier_idx = list(np.where(~sample_mask)[0])
+ if outlier_idx:
+ # Use https://stackoverflow.com/a/48106843/2589328 to group consecutive blocks of outliers.
+ gaps = [[s, e] for s, e in zip(outlier_idx, outlier_idx[1:]) if s + 1 < e]
+ edges = iter(outlier_idx[:1] + sum(gaps, []) + outlier_idx[-1:])
+ consecutive_outliers_idx = list(zip(edges, edges))
+ first_outliers = consecutive_outliers_idx[0]
+ last_outliers = consecutive_outliers_idx[-1]
+
+ # Replace outliers at beginning of run
+ if first_outliers[0] == 0:
+ LOGGER.warning(
+ f"Outlier volumes at beginning of run ({first_outliers[0]}-{first_outliers[1]}) "
+ "will be replaced with first non-outlier volume's values."
+ )
+ interpolated_unfiltered_bold[
+ : first_outliers[1] + 1, :
+ ] = interpolated_unfiltered_bold[first_outliers[1] + 1, :]
+
+ # Replace outliers at end of run
+ if last_outliers[1] == n_volumes - 1:
+ LOGGER.warning(
+ f"Outlier volumes at end of run ({last_outliers[0]}-{last_outliers[1]}) "
+ "will be replaced with last non-outlier volume's values."
+ )
+ interpolated_unfiltered_bold[last_outliers[0] :, :] = interpolated_unfiltered_bold[
+ last_outliers[0] - 1, :
+ ]
+
# Now apply the bandpass filter to the interpolated, denoised data
if low_pass is not None and high_pass is not None:
# TODO: Replace with nilearn.signal.butterworth once 0.10.1 is released.
diff --git a/xcp_d/workflows/base.py b/xcp_d/workflows/base.py
index 7573d57ca..3c80040fa 100644
--- a/xcp_d/workflows/base.py
+++ b/xcp_d/workflows/base.py
@@ -635,7 +635,7 @@ def init_subject_wf(
)
n_runs = len(preproc_files)
- preproc_files = group_across_runs(preproc_files)
+ preproc_files = group_across_runs(preproc_files) # group files across runs and directions
run_counter = 0
for ent_set, task_files in enumerate(preproc_files):
# Assuming TR is constant across runs for a given combination of entities.
diff --git a/xcp_d/workflows/concatenation.py b/xcp_d/workflows/concatenation.py
index 29b4755c2..1e4a95b2a 100644
--- a/xcp_d/workflows/concatenation.py
+++ b/xcp_d/workflows/concatenation.py
@@ -28,7 +28,7 @@ def init_concatenate_data_wf(
dcan_qc,
name="concatenate_data_wf",
):
- """Concatenate postprocessed data.
+ """Concatenate postprocessed data across runs and directions.
Workflow Graph
.. workflow::
@@ -99,7 +99,7 @@ def init_concatenate_data_wf(
workflow = Workflow(name=name)
workflow.__desc__ = """
-Postprocessing derivatives from multi-run tasks were then concatenated across runs.
+Postprocessing derivatives from multi-run tasks were then concatenated across runs and directions.
"""
inputnode = pe.Node(