diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 075d7f23c..fb5501f19 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -32,6 +32,6 @@ jobs: run: python -m pip install \ flake8 flake8-absolute-import flake8-black flake8-docstrings \ flake8-isort flake8-pyproject flake8-unused-arguments \ - flake8-use-fstring flake8-warnings pep8-naming + flake8-use-fstring pep8-naming - name: Check xcp_d run: python -m flake8 xcp_d diff --git a/docs/workflows.rst b/docs/workflows.rst index 9f7cfce97..4d6b68aa9 100644 --- a/docs/workflows.rst +++ b/docs/workflows.rst @@ -277,13 +277,16 @@ Denoising ========= :class:`~xcp_d.interfaces.nilearn.DenoiseNifti`, :class:`~xcp_d.interfaces.nilearn.DenoiseCifti` -Temporal censoring ------------------- +Temporal censoring [OPTIONAL] +----------------------------- Prior to confound regression, high-motion volumes will be removed from the BOLD data. These volumes will also be removed from the nuisance regressors. Please refer to :footcite:t:`power2012spurious` for more information. +.. tip:: + Censoring can be disabled by setting ``--fd-thresh 0``. + Confound regression ------------------- @@ -341,6 +344,17 @@ An interpolated version of the ``denoised BOLD`` is then created by filling in t outlier volumes with cubic spline interpolated data, as implemented in ``Nilearn``. The resulting ``interpolated, denoised BOLD`` is primarily used for bandpass filtering. +.. warning:: + In versions 0.4.0rc2 - 0.5.0, XCP-D used cubic spline interpolation, + followed by bandpass filtering. + + However, cubic spline interpolation can introduce large spikes and drops in the signal + when the censored volumes are at the beginning or end of the run, + which are then propagated to the filtered data. + + To address this, XCP-D now replaces interpolated volumes at the edges of the run with the + closest non-outlier volume's data, as of 0.5.1. + Bandpass filtering [OPTIONAL] ----------------------------- diff --git a/pyproject.toml b/pyproject.toml index 5c75e2284..b0dbb10e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "matplotlib ~= 3.4.2", "networkx ~= 2.8.8", # nipype needs networkx, but 3+ isn't compatible with nipype 1.8.5 "nibabel >= 3.2.1", - "nilearn ~= 0.10.0", + "nilearn == 0.10.1", # 0.10.2 raises error with compcor from fMRIPrep 23 "nipype ~= 1.8.5", "niworkflows == 1.7.3", "num2words", # for boilerplates @@ -74,7 +74,6 @@ tests = [ "flake8-pyproject", "flake8-unused-arguments", "flake8-use-fstring", - "flake8-warnings", "pep8-naming", "pytest", "pytest-cov", diff --git a/xcp_d/data/executive_summary_templates/executive_summary.html.jinja b/xcp_d/data/executive_summary_templates/executive_summary.html.jinja index 52d4dd0c4..4570bbdf7 100644 --- a/xcp_d/data/executive_summary_templates/executive_summary.html.jinja +++ b/xcp_d/data/executive_summary_templates/executive_summary.html.jinja @@ -138,16 +138,15 @@ #} {% include "anatomical_registration_plots.html.jinja" %} - {# Carpet/line plot for pre- and post-regression, concatenate across runs. #} - {% include "concatenated_task_static_plots.html.jinja" %} - {# - Task static plots. One section per run of each task. - 1. Task in T1 - 2. T1 in Task - 3. BOLD mean(?) image on the left - 4. BOLD reference image on the left - 3/4. Pre and post regression carpet/line plots on right. + "Functional Data" section, with BOLD figures. + 1. Concatenated resting-state carpet plots. + 2. One section per run of each task. + 1. Task in T1 + 2. T1 in Task + 3. BOLD mean(?) image on the left + 4. BOLD reference image on the left + 3/4. Pre and post regression carpet/line plots on right. #} {% include "task_static_plots.html.jinja" %} diff --git a/xcp_d/data/executive_summary_templates/task_static_plots.html.jinja b/xcp_d/data/executive_summary_templates/task_static_plots.html.jinja index df985f533..e0b9fa49b 100644 --- a/xcp_d/data/executive_summary_templates/task_static_plots.html.jinja +++ b/xcp_d/data/executive_summary_templates/task_static_plots.html.jinja @@ -2,8 +2,9 @@ Start the tasks section and put in the column headings for the task-specific data. Inputs: - - task_files[]{"task"} - - task_files[]{"run"} + - concatenated_rest_files{"preproc_carpet"} + - concatenated_rest_files{"postproc_carpet"} + - task_files[]{"key"} - task_files[]{"registration_files"} - task_files[]{"registration_titles"} - task_files[]{"bold"} @@ -22,11 +23,14 @@

Functional Data

+ + {# Carpet/line plot for pre- and post-regression, concatenate across runs. #} + {% include "concatenated_task_static_plots.html.jinja" %} +
{% for run_dict in task_files %} - {% set task = run_dict["task"] %} - {% set run = run_dict["run"] %} + {% set key = run_dict["key"] %} {% set registration_files = run_dict["registration_files"] %} {% set registration_titles = run_dict["registration_titles"] %} {% set bold = run_dict["bold"] %} @@ -38,7 +42,7 @@ Add the task name for the next few rows. #}
-
task-{{ task }} run-{{ run }}:
+
{{ key }}:
{# Full rows for registration files #} diff --git a/xcp_d/interfaces/execsummary.py b/xcp_d/interfaces/execsummary.py index 6f8bb3c42..2dccd93c6 100644 --- a/xcp_d/interfaces/execsummary.py +++ b/xcp_d/interfaces/execsummary.py @@ -4,6 +4,8 @@ import re from pathlib import Path +import numpy as np +import pandas as pd from bids.layout import BIDSLayout, Query from bs4 import BeautifulSoup from jinja2 import Environment, FileSystemLoader, Markup @@ -144,6 +146,24 @@ def collect_inputs(self): self.structural_files_ = structural_files + # Collect figures for concatenated resting-state data (if any) + concatenated_rest_files = {} + + query = { + "subject": self.subject_id, + "task": "rest", + "run": Query.NONE, + "desc": "preprocESQC", + "suffix": "bold", + "extension": ".svg", + } + concatenated_rest_files["preproc_carpet"] = self._get_bids_file(query) + + query["desc"] = "postprocESQC" + concatenated_rest_files["postproc_carpet"] = self._get_bids_file(query) + + self.concatenated_rest_files_ = concatenated_rest_files + # Determine the unique entity-sets for the task data. postproc_files = self.layout.get( subject=self.subject_id, @@ -166,34 +186,42 @@ def collect_inputs(self): task_entity_sets = [] for entity_set in unique_entity_sets: for entity in ORDERING: - entity_set[entity] = entity_set.get(entity, Query.NONE) + entity_set[entity] = entity_set.get(entity, np.nan) task_entity_sets.append(entity_set) - concatenated_rest_files = {} + # Now sort the entity sets by each entity + task_entity_sets = pd.DataFrame(task_entity_sets) + task_entity_sets = task_entity_sets.sort_values(by=task_entity_sets.columns.tolist()) + task_entity_sets = task_entity_sets.fillna(Query.NONE) - query = { - "subject": self.subject_id, - "task": "rest", - "run": Query.NONE, - "desc": "preprocESQC", - "suffix": "bold", - "extension": ".svg", - } - concatenated_rest_files["preproc_carpet"] = self._get_bids_file(query) - - query["desc"] = "postcarpetplot" - concatenated_rest_files["postproc_carpet"] = self._get_bids_file(query) + # Extract entities with variability + # This lets us name the sections based on multiple entities (not just task and run) + nunique = task_entity_sets.nunique() + nunique.loc["task"] = 2 # ensure we keep task + nunique.loc["run"] = 2 # ensure we keep run + cols_to_drop = nunique[nunique == 1].index + task_entity_namer = task_entity_sets.drop(cols_to_drop, axis=1) - self.concatenated_rest_files_ = concatenated_rest_files + # Convert back to dictionary + task_entity_sets = task_entity_sets.to_dict(orient="records") + task_entity_namer = task_entity_namer.to_dict(orient="records") task_files = [] - for task_entity_set in task_entity_sets: - task_file_figures = task_entity_set.copy() - task_file_figures[ - "key" - ] = f"task-{task_entity_set['task']}_run-{task_entity_set.get('run', 0)}" + for i_set, task_entity_set in enumerate(task_entity_sets): + task_file_figures = {} + + # Convert any floats in the name to ints + temp_dict = {} + for k, v in task_entity_namer[i_set].items(): + try: + temp_dict[k] = int(v) + except (ValueError, TypeError): + temp_dict[k] = v + + # String used for subsection headers + task_file_figures["key"] = " ".join([f"{k}-{v}" for k, v in temp_dict.items()]) query = { "subject": self.subject_id, @@ -222,10 +250,12 @@ def collect_inputs(self): task_file_figures["registration_files"].append(found_file) - task_files.append(task_file_figures) + # If there no mean BOLD figure, then the "run" was made by the concatenation workflow. + # Skip the concatenated resting-state scan, since it has its own section. + if query["task"] == "rest" and not task_file_figures["bold"]: + continue - # Sort the files by the desired key - task_files = sorted(task_files, key=lambda d: d["key"]) + task_files.append(task_file_figures) self.task_files_ = task_files diff --git a/xcp_d/interfaces/plotting.py b/xcp_d/interfaces/plotting.py index e0bb072d3..0d1d39322 100644 --- a/xcp_d/interfaces/plotting.py +++ b/xcp_d/interfaces/plotting.py @@ -277,7 +277,7 @@ class QCPlots(SimpleInterface): output_spec = _QCPlotsOutputSpec def _run_interface(self, runtime): - # Load confound matrix and load motion with motion filtering + # Load confound matrix and load motion without motion filtering confounds_df = pd.read_table(self.inputs.fmriprep_confounds_file) preproc_motion_df = load_motion( confounds_df.copy(), @@ -297,6 +297,7 @@ def _run_interface(self, runtime): censoring_df = pd.read_table(self.inputs.temporal_mask) tmask_arr = censoring_df["framewise_displacement"].values num_censored_volumes = int(tmask_arr.sum()) + num_retained_volumes = int((tmask_arr == 0).sum()) # Apply temporal mask to interpolated/full data rmsd_censored = rmsd[tmask_arr == 0] @@ -382,40 +383,57 @@ def _run_interface(self, runtime): # Calculate QC measures mean_fd = np.mean(preproc_fd_timeseries) - mean_rms = np.nanmean(rmsd_censored) # first value can be NaN if no dummy scans + mean_fd_post_censoring = np.mean(postproc_fd_timeseries) + mean_relative_rms = np.nanmean(rmsd_censored) # first value can be NaN if no dummy scans mean_dvars_before_processing = np.mean(dvars_before_processing) mean_dvars_after_processing = np.mean(dvars_after_processing) - motionDVCorrInit = np.corrcoef(preproc_fd_timeseries, dvars_before_processing)[0][1] - motionDVCorrFinal = np.corrcoef(postproc_fd_timeseries, dvars_after_processing)[0][1] + fd_dvars_correlation_initial = np.corrcoef(preproc_fd_timeseries, dvars_before_processing)[ + 0, 1 + ] + fd_dvars_correlation_final = np.corrcoef(postproc_fd_timeseries, dvars_after_processing)[ + 0, 1 + ] rmsd_max_value = np.nanmax(rmsd_censored) # A summary of all the values qc_values_dict.update( { - "meanFD": [mean_fd], - "relMeansRMSMotion": [mean_rms], - "relMaxRMSMotion": [rmsd_max_value], - "meanDVInit": [mean_dvars_before_processing], - "meanDVFinal": [mean_dvars_after_processing], + "mean_fd": [mean_fd], + "mean_fd_post_censoring": [mean_fd_post_censoring], + "mean_relative_rms": [mean_relative_rms], + "max_relative_rms": [rmsd_max_value], + "mean_dvars_initial": [mean_dvars_before_processing], + "mean_dvars_final": [mean_dvars_after_processing], + "num_dummy_volumes": [dummy_scans], "num_censored_volumes": [num_censored_volumes], - "nVolsRemoved": [dummy_scans], - "motionDVCorrInit": [motionDVCorrInit], - "motionDVCorrFinal": [motionDVCorrFinal], + "num_retained_volumes": [num_retained_volumes], + "fd_dvars_correlation_initial": [fd_dvars_correlation_initial], + "fd_dvars_correlation_final": [fd_dvars_correlation_final], } ) qc_metadata = { - "meanFD": { + "mean_fd": { "LongName": "Mean Framewise Displacement", "Description": ( "Average framewise displacement without any motion parameter filtering. " "This value includes high-motion outliers, but not dummy volumes. " "FD is calculated according to the Power definition." ), - "Units": "mm", + "Units": "mm / volume", + "Term URL": "https://doi.org/10.1016/j.neuroimage.2011.10.018", + }, + "mean_fd_post_censoring": { + "LongName": "Mean Framewise Displacement After Censoring", + "Description": ( + "Average framewise displacement without any motion parameter filtering. " + "This value does not include high-motion outliers or dummy volumes. " + "FD is calculated according to the Power definition." + ), + "Units": "mm / volume", "Term URL": "https://doi.org/10.1016/j.neuroimage.2011.10.018", }, - "relMeansRMSMotion": { + "mean_relative_rms": { "LongName": "Mean Relative Root Mean Squared", "Description": ( "Average relative root mean squared calculated from motion parameters, " @@ -424,7 +442,7 @@ def _run_interface(self, runtime): ), "Units": "arbitrary", }, - "relMaxRMSMotion": { + "max_relative_rms": { "LongName": "Maximum Relative Root Mean Squared", "Description": ( "Maximum relative root mean squared calculated from motion parameters, " @@ -433,7 +451,7 @@ def _run_interface(self, runtime): ), "Units": "arbitrary", }, - "meanDVInit": { + "mean_dvars_initial": { "LongName": "Mean DVARS Before Postprocessing", "Description": ( "Average DVARS (temporal derivative of root mean squared variance over " @@ -441,7 +459,7 @@ def _run_interface(self, runtime): ), "TermURL": "https://doi.org/10.1016/j.neuroimage.2011.02.073", }, - "meanDVFinal": { + "mean_dvars_final": { "LongName": "Mean DVARS After Postprocessing", "Description": ( "Average DVARS (temporal derivative of root mean squared variance over " @@ -449,6 +467,12 @@ def _run_interface(self, runtime): ), "TermURL": "https://doi.org/10.1016/j.neuroimage.2011.02.073", }, + "num_dummy_volumes": { + "LongName": "Number of Dummy Volumes", + "Description": ( + "The number of non-steady state volumes removed from the time series by XCP-D." + ), + }, "num_censored_volumes": { "LongName": "Number of Censored Volumes", "Description": ( @@ -456,13 +480,14 @@ def _run_interface(self, runtime): "This does not include dummy volumes." ), }, - "nVolsRemoved": { - "LongName": "Number of Dummy Volumes", + "num_retained_volumes": { + "LongName": "Number of Retained Volumes", "Description": ( - "The number of non-steady state volumes removed from the time series by XCP-D." + "The number of volumes retained in the denoised dataset. " + "This does not include dummy volumes or high-motion outliers." ), }, - "motionDVCorrInit": { + "fd_dvars_correlation_initial": { "LongName": "FD-DVARS Correlation Before Postprocessing", "Description": ( "The Pearson correlation coefficient between framewise displacement and DVARS " @@ -470,7 +495,7 @@ def _run_interface(self, runtime): "after removal of dummy volumes, but before removal of high-motion outliers." ), }, - "motionDVCorrFinal": { + "fd_dvars_correlation_final": { "LongName": "FD-DVARS Correlation After Postprocessing", "Description": ( "The Pearson correlation coefficient between framewise displacement and DVARS " diff --git a/xcp_d/interfaces/report.py b/xcp_d/interfaces/report.py index 54ed64bf4..e960cac8f 100644 --- a/xcp_d/interfaces/report.py +++ b/xcp_d/interfaces/report.py @@ -30,12 +30,13 @@ \t\t
    \t\t\t
  • BOLD volume space: {space}
  • \t\t\t
  • Repetition Time (TR): {TR:.03g}
  • -\t\t\t
  • Mean Framewise Displacement: {meanFD}
  • -\t\t\t
  • Mean Relative RMS Motion: {meanRMS}
  • -\t\t\t
  • Max Relative RMS Motion: {maxRMS}
  • +\t\t\t
  • Mean Framewise Displacement: {mean_fd}
  • +\t\t\t
  • Mean Relative RMS Motion: {mean_relative_rms}
  • +\t\t\t
  • Max Relative RMS Motion: {max_relative_rms}
  • \t\t\t
  • DVARS Before and After Processing : {dvars_before_after}
  • -\t\t\t
  • Correlation between DVARS and FD Before and After Processing : {corrfddv}
  • -\t\t\t
  • Number of Volumes Censored : {volcensored}
  • +\t\t\t
  • Correlation between DVARS and FD Before and After Processing : +{fd_dvars_correlation}
  • +\t\t\t
  • Number of Volumes Censored : {num_vols_censored}
  • \t\t
""" @@ -145,25 +146,28 @@ class FunctionalSummary(SummaryInterface): def _generate_segment(self): space = get_entity(self.inputs.bold_file, "space") qcfile = pd.read_csv(self.inputs.qc_file) - meanFD = str(round(qcfile["meanFD"][0], 4)) - meanRMS = str(round(qcfile["relMeansRMSMotion"][0], 4)) - maxRMS = str(round(qcfile["relMaxRMSMotion"][0], 4)) - dvars = f"{round(qcfile['meanDVInit'][0], 4)}, {round(qcfile['meanDVFinal'][0], 4)}" + mean_fd = str(round(qcfile["mean_fd"][0], 4)) + mean_relative_rms = str(round(qcfile["mean_relative_rms"][0], 4)) + max_relative_rms = str(round(qcfile["max_relative_rms"][0], 4)) + dvars = ( + f"{round(qcfile['mean_dvars_initial'][0], 4)}, " + f"{round(qcfile['mean_dvars_final'][0], 4)}" + ) fd_dvars_correlation = ( - f"{round(qcfile['motionDVCorrInit'][0], 4)}, " - f"{round(qcfile['motionDVCorrFinal'][0], 4)}" + f"{round(qcfile['fd_dvars_correlation_initial'][0], 4)}, " + f"{round(qcfile['fd_dvars_correlation_final'][0], 4)}" ) num_vols_censored = str(round(qcfile["num_censored_volumes"][0], 4)) return QC_TEMPLATE.format( space=space, TR=self.inputs.TR, - meanFD=meanFD, - meanRMS=meanRMS, - maxRMS=maxRMS, + mean_fd=mean_fd, + mean_relative_rms=mean_relative_rms, + max_relative_rms=max_relative_rms, dvars_before_after=dvars, - corrfddv=fd_dvars_correlation, - volcensored=num_vols_censored, + fd_dvars_correlation=fd_dvars_correlation, + num_vols_censored=num_vols_censored, ) diff --git a/xcp_d/tests/test_cli.py b/xcp_d/tests/test_cli.py index d919d8c7d..35a52a006 100644 --- a/xcp_d/tests/test_cli.py +++ b/xcp_d/tests/test_cli.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd import pytest +from nipype import logging from pkg_resources import resource_filename as pkgrf from xcp_d.cli import combineqc, parser_utils, run @@ -17,6 +18,8 @@ run_command, ) +LOGGER = logging.getLogger("nipype.utils") + @pytest.mark.ds001419_nifti def test_ds001419_nifti(data_dir, output_dir, working_dir): @@ -210,6 +213,17 @@ def test_pnc_cifti(data_dir, output_dir, working_dir): test_data_dir = get_test_data_path() filter_file = os.path.join(test_data_dir, "pnc_cifti_filter.json") + # Make the last few volumes outliers to check https://github.com/PennLINC/xcp_d/issues/949 + motion_file = os.path.join( + dataset_dir, + "sub-1648798153/ses-PNC1/func/" + "sub-1648798153_ses-PNC1_task-rest_acq-singleband_desc-confounds_timeseries.tsv", + ) + motion_df = pd.read_table(motion_file) + motion_df.loc[56:, "trans_x"] = np.arange(1, 5) * 20 + motion_df.to_csv(motion_file, sep="\t", index=False) + LOGGER.warning(f"Overwrote confounds file at {motion_file}.") + parameters = [ dataset_dir, out_dir, @@ -218,6 +232,7 @@ def test_pnc_cifti(data_dir, output_dir, working_dir): "--nthreads=2", "--omp-nthreads=2", f"--bids-filter-file={filter_file}", + "--min-time=60", "--nuisance-regressors=acompcor_gsr", "--despike", "--head_radius=40", diff --git a/xcp_d/tests/test_utils_bids.py b/xcp_d/tests/test_utils_bids.py index 1c31851e1..f7560f5fc 100644 --- a/xcp_d/tests/test_utils_bids.py +++ b/xcp_d/tests/test_utils_bids.py @@ -240,3 +240,52 @@ def test_get_entity(datasets): ) with pytest.raises(ValueError, match="Unknown space"): xbids.get_entity(fname, "space") + + +def test_group_across_runs(): + """Test group_across_runs.""" + in_files = [ + "/path/sub-01_task-axcpt_run-03_bold.nii.gz", + "/path/sub-01_task-rest_run-03_bold.nii.gz", + "/path/sub-01_task-rest_run-01_bold.nii.gz", + "/path/sub-01_task-axcpt_run-02_bold.nii.gz", + "/path/sub-01_task-rest_run-02_bold.nii.gz", + "/path/sub-01_task-axcpt_run-01_bold.nii.gz", + ] + grouped_files = xbids.group_across_runs(in_files) + assert isinstance(grouped_files, list) + assert len(grouped_files[0]) == 3 + assert grouped_files[0] == [ + "/path/sub-01_task-axcpt_run-01_bold.nii.gz", + "/path/sub-01_task-axcpt_run-02_bold.nii.gz", + "/path/sub-01_task-axcpt_run-03_bold.nii.gz", + ] + assert len(grouped_files[1]) == 3 + assert grouped_files[1] == [ + "/path/sub-01_task-rest_run-01_bold.nii.gz", + "/path/sub-01_task-rest_run-02_bold.nii.gz", + "/path/sub-01_task-rest_run-03_bold.nii.gz", + ] + + in_files = [ + "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz", + ] + grouped_files = xbids.group_across_runs(in_files) + assert isinstance(grouped_files, list) + assert len(grouped_files[0]) == 2 + assert grouped_files[0] == [ + "/path/sub-01_task-axcpt_dir-LR_bold.nii.gz", + "/path/sub-01_task-axcpt_dir-RL_bold.nii.gz", + ] + assert len(grouped_files[1]) == 4 + assert grouped_files[1] == [ + "/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz", + "/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz", + "/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz", + ] diff --git a/xcp_d/tests/test_utils_utils.py b/xcp_d/tests/test_utils_utils.py index ddac086d2..33043d09a 100644 --- a/xcp_d/tests/test_utils_utils.py +++ b/xcp_d/tests/test_utils_utils.py @@ -152,6 +152,37 @@ def test_denoise_with_nilearn(ds001419_data, tmp_path_factory): assert uncensored_denoised_bold.shape == (n_volumes, n_voxels) assert interpolated_filtered_bold.shape == (n_volumes, n_voxels) + # Ensure that interpolation + filtering doesn't cause problems at beginning/end of scan + # Create an updated censoring file with outliers at first and last two volumes + censoring_df = confounds_df[["framewise_displacement"]] + censoring_df["framewise_displacement"] = False + censoring_df.iloc[:2]["framewise_displacement"] = True + censoring_df.iloc[-2:]["framewise_displacement"] = True + n_censored_volumes = censoring_df["framewise_displacement"].sum() + assert n_censored_volumes == 4 + temporal_mask = os.path.join(tmpdir, "censoring.tsv") + censoring_df.to_csv(temporal_mask, sep="\t", index=False) + + # Run without denoising or filtering + _, interpolated_filtered_bold = utils.denoise_with_nilearn( + preprocessed_bold=preprocessed_bold_arr, + confounds_file=None, + temporal_mask=temporal_mask, + low_pass=None, + high_pass=None, + filter_order=0, + TR=TR, + ) + assert interpolated_filtered_bold.shape == (n_volumes, n_voxels) + # The first two volumes should be the same as the third (first non-outlier) volume + assert np.array_equal(interpolated_filtered_bold[0, :], interpolated_filtered_bold[2, :]) + assert np.array_equal(interpolated_filtered_bold[1, :], interpolated_filtered_bold[2, :]) + assert not np.array_equal(interpolated_filtered_bold[2, :], interpolated_filtered_bold[3, :]) + # The last volume should be the same as the third-to-last (last non-outlier) volume + assert np.array_equal(interpolated_filtered_bold[-1, :], interpolated_filtered_bold[-3, :]) + assert np.array_equal(interpolated_filtered_bold[-2, :], interpolated_filtered_bold[-3, :]) + assert not np.array_equal(interpolated_filtered_bold[-3, :], interpolated_filtered_bold[-4, :]) + def test_list_to_str(): """Test the list_to_str function.""" diff --git a/xcp_d/utils/bids.py b/xcp_d/utils/bids.py index 5e38fe999..64613dd90 100644 --- a/xcp_d/utils/bids.py +++ b/xcp_d/utils/bids.py @@ -874,7 +874,11 @@ def get_entity(filename, entity): def group_across_runs(in_files): - """Group preprocessed BOLD files by unique sets of entities, ignoring run. + """Group preprocessed BOLD files by unique sets of entities, ignoring run and direction. + + We only ignore direction for the sake of HCP. + This may lead to small problems for non-HCP datasets that differentiate scans based on + both run and direction. Parameters ---------- @@ -891,20 +895,31 @@ def group_across_runs(in_files): # First, extract run information and sort the input files by the runs, # so that any cases where files are not already in ascending run order get fixed. - run_numbers = [] + run_numbers, directions = [], [] for in_file in in_files: run = get_entity(in_file, "run") if run is None: run = 0 + direction = get_entity(in_file, "dir") + if direction is None: + direction = "none" + run_numbers.append(int(run)) + directions.append(direction) + + # Combine the three lists into a list of tuples + combined_data = list(zip(run_numbers, directions, in_files)) + + # Sort the list of tuples first by run and then by direction + sorted_data = sorted(combined_data, key=lambda x: (x[0], x[1], x[2])) - # Sort the files by the run numbers. - zipped_pairs = zip(run_numbers, in_files) - sorted_in_files = [x for _, x in sorted(zipped_pairs)] + # Sort the file list + sorted_in_files = [item[2] for item in sorted_data] - # Extract the unique sets of entities (i.e., the filename, minus the run entity). + # Extract the unique sets of entities (i.e., the filename, minus the run and dir entities). unique_filenames = [re.sub("_run-[0-9]+_", "_", os.path.basename(f)) for f in sorted_in_files] + unique_filenames = [re.sub("_dir-[0-9a-zA-Z]+_", "_", f) for f in unique_filenames] # Assign each in_file to a group of files with the same entities, except run. out_files, grouped_unique_filenames = [], [] diff --git a/xcp_d/utils/hcp2fmriprep.py b/xcp_d/utils/hcp2fmriprep.py index a11ae1829..21db0fe0e 100644 --- a/xcp_d/utils/hcp2fmriprep.py +++ b/xcp_d/utils/hcp2fmriprep.py @@ -3,6 +3,7 @@ """Functions for converting HCP-format data to fMRIPrep format.""" import glob import os +import re import nibabel as nb import pandas as pd @@ -199,8 +200,12 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): for base_task_name in task_names: LOGGER.info(f"Processing {base_task_name}") # NOTE: What is the first element in the folder name? - _, task_id, dir_id = base_task_name.split("_") + _, base_task_id, dir_id = base_task_name.split("_") + match = re.match(r"([A-Za-z0-9]+[a-zA-Z]+)(\d+)$", base_task_id) + task_id = match.group(1).lower() + run_id = int(match.group(2)) task_ent = f"task-{task_id}" + run_ent = f"run-{run_id}" dir_ent = f"dir-{dir_id}" task_dir_orig = os.path.join(func_dir_orig, base_task_name) @@ -209,7 +214,10 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): sbref_orig = os.path.join(task_dir_orig, "SBRef_dc.nii.gz") boldref_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_boldref.nii.gz", + ( + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}_" + f"boldref.nii.gz" + ), ) copy_dictionary[sbref_orig] = [boldref_fmriprep] @@ -217,7 +225,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): bold_nifti_fmriprep = os.path.join( func_dir_fmriprep, ( - f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_" + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}_" "desc-preproc_bold.nii.gz" ), ) @@ -229,20 +237,20 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): ) bold_cifti_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_space-fsLR_den-91k_bold.dtseries.nii", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_space-fsLR_den-91k_bold.dtseries.nii", ) copy_dictionary[bold_cifti_orig] = [bold_cifti_fmriprep] # More identity transforms native_to_t1w_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_from-scanner_to-T1w_mode-image_xfm.txt", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_from-scanner_to-T1w_mode-image_xfm.txt", ) copy_dictionary[identity_xfm].append(native_to_t1w_fmriprep) t1w_to_native_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_from-T1w_to-scanner_mode-image_xfm.txt", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_from-T1w_to-scanner_mode-image_xfm.txt", ) copy_dictionary[identity_xfm].append(t1w_to_native_fmriprep) @@ -253,7 +261,10 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): } bold_nifti_json_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_{volspace_ent}_{RES_ENT}_desc-preproc_bold.json", + ( + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_{volspace_ent}_{RES_ENT}" + "_desc-preproc_bold.json" + ), ) write_json(bold_metadata, bold_nifti_json_fmriprep) @@ -268,7 +279,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): ) bold_cifti_json_fmriprep = os.path.join( func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}_space-fsLR_den-91k_bold.dtseries.json", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_space-fsLR_den-91k_bold.dtseries.json", ) write_json(bold_metadata, bold_cifti_json_fmriprep) @@ -276,7 +287,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): collect_confounds( task_dir_orig, func_dir_fmriprep, - f"{subses_ents}_{task_ent}_{dir_ent}", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}", work_dir=work_dir, bold_file=bold_nifti_orig, brainmask_file=os.path.join(task_dir_orig, "brainmask_fs.2.nii.gz"), @@ -289,7 +300,7 @@ def convert_hcp_to_bids_single_subject(in_dir, out_dir, sub_ent): os.makedirs(figdir, exist_ok=True) bbref_fig_fmriprep = os.path.join( figdir, - f"{subses_ents}_{task_ent}_{dir_ent}_desc-bbregister_bold.svg", + f"{subses_ents}_{task_ent}_{dir_ent}_{run_ent}_desc-bbregister_bold.svg", ) t1w = os.path.join(anat_dir_orig, "T1w.nii.gz") ribbon = os.path.join(anat_dir_orig, "ribbon.nii.gz") diff --git a/xcp_d/utils/qcmetrics.py b/xcp_d/utils/qcmetrics.py index 842996ff0..9619b8b1a 100644 --- a/xcp_d/utils/qcmetrics.py +++ b/xcp_d/utils/qcmetrics.py @@ -46,15 +46,15 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t template_mask_arr = nb.load(template_mask).get_fdata() reg_qc = { - "coregDice": [dice(bold2t1w_mask_arr, t1w_mask_arr)], - "coregPearson": [pearson(bold2t1w_mask_arr, t1w_mask_arr)], - "coregCoverage": [overlap(bold2t1w_mask_arr, t1w_mask_arr)], - "normDice": [dice(bold2template_mask_arr, template_mask_arr)], - "normPearson": [pearson(bold2template_mask_arr, template_mask_arr)], - "normCoverage": [overlap(bold2template_mask_arr, template_mask_arr)], + "coreg_dice": [dice(bold2t1w_mask_arr, t1w_mask_arr)], + "coreg_correlation": [pearson(bold2t1w_mask_arr, t1w_mask_arr)], + "coreg_overlap": [overlap(bold2t1w_mask_arr, t1w_mask_arr)], + "norm_dice": [dice(bold2template_mask_arr, template_mask_arr)], + "norm_correlation": [pearson(bold2template_mask_arr, template_mask_arr)], + "norm_overlap": [overlap(bold2template_mask_arr, template_mask_arr)], } qc_metadata = { - "coregDice": { + "coreg_dice": { "LongName": "Coregistration Sørensen-Dice Coefficient", "Description": ( "The Sørensen-Dice coefficient calculated between the binary brain masks from the " @@ -64,7 +64,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t ), "Term URL": "https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient", }, - "coregPearson": { + "coreg_correlation": { "LongName": "Coregistration Pearson Correlation", "Description": ( "The Pearson correlation coefficient calculated between the binary brain masks " @@ -74,7 +74,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t ), "Term URL": "https://en.wikipedia.org/wiki/Pearson_correlation_coefficient", }, - "coregCoverage": { + "coreg_overlap": { "LongName": "Coregistration Coverage Metric", "Description": ( "The Szymkiewicz-Simpson overlap coefficient calculated between the binary brain " @@ -83,7 +83,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t ), "Term URL": "https://en.wikipedia.org/wiki/Overlap_coefficient", }, - "normDice": { + "norm_dice": { "LongName": "Normalization Sørensen-Dice Coefficient", "Description": ( "The Sørensen-Dice coefficient calculated between the binary brain masks from the " @@ -93,7 +93,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t ), "Term URL": "https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient", }, - "normPearson": { + "norm_correlation": { "LongName": "Normalization Pearson Correlation", "Description": ( "The Pearson correlation coefficient calculated between the binary brain masks " @@ -103,7 +103,7 @@ def compute_registration_qc(bold2t1w_mask, anat_brainmask, bold2template_mask, t ), "Term URL": "https://en.wikipedia.org/wiki/Pearson_correlation_coefficient", }, - "normCoverage": { + "norm_overlap": { "LongName": "Normalization Overlap Coefficient", "Description": ( "The Szymkiewicz-Simpson overlap coefficient calculated between the binary brain " diff --git a/xcp_d/utils/utils.py b/xcp_d/utils/utils.py index 06397ab3c..a1662b6c0 100644 --- a/xcp_d/utils/utils.py +++ b/xcp_d/utils/utils.py @@ -464,6 +464,37 @@ def denoise_with_nilearn( t_r=TR, ) + # Replace any high-motion volumes at the beginning or end of the run with the closest + # low-motion volume's data. + outlier_idx = list(np.where(~sample_mask)[0]) + if outlier_idx: + # Use https://stackoverflow.com/a/48106843/2589328 to group consecutive blocks of outliers. + gaps = [[s, e] for s, e in zip(outlier_idx, outlier_idx[1:]) if s + 1 < e] + edges = iter(outlier_idx[:1] + sum(gaps, []) + outlier_idx[-1:]) + consecutive_outliers_idx = list(zip(edges, edges)) + first_outliers = consecutive_outliers_idx[0] + last_outliers = consecutive_outliers_idx[-1] + + # Replace outliers at beginning of run + if first_outliers[0] == 0: + LOGGER.warning( + f"Outlier volumes at beginning of run ({first_outliers[0]}-{first_outliers[1]}) " + "will be replaced with first non-outlier volume's values." + ) + interpolated_unfiltered_bold[ + : first_outliers[1] + 1, : + ] = interpolated_unfiltered_bold[first_outliers[1] + 1, :] + + # Replace outliers at end of run + if last_outliers[1] == n_volumes - 1: + LOGGER.warning( + f"Outlier volumes at end of run ({last_outliers[0]}-{last_outliers[1]}) " + "will be replaced with last non-outlier volume's values." + ) + interpolated_unfiltered_bold[last_outliers[0] :, :] = interpolated_unfiltered_bold[ + last_outliers[0] - 1, : + ] + # Now apply the bandpass filter to the interpolated, denoised data if low_pass is not None and high_pass is not None: # TODO: Replace with nilearn.signal.butterworth once 0.10.1 is released. diff --git a/xcp_d/workflows/base.py b/xcp_d/workflows/base.py index 7573d57ca..3c80040fa 100644 --- a/xcp_d/workflows/base.py +++ b/xcp_d/workflows/base.py @@ -635,7 +635,7 @@ def init_subject_wf( ) n_runs = len(preproc_files) - preproc_files = group_across_runs(preproc_files) + preproc_files = group_across_runs(preproc_files) # group files across runs and directions run_counter = 0 for ent_set, task_files in enumerate(preproc_files): # Assuming TR is constant across runs for a given combination of entities. diff --git a/xcp_d/workflows/concatenation.py b/xcp_d/workflows/concatenation.py index 29b4755c2..1e4a95b2a 100644 --- a/xcp_d/workflows/concatenation.py +++ b/xcp_d/workflows/concatenation.py @@ -28,7 +28,7 @@ def init_concatenate_data_wf( dcan_qc, name="concatenate_data_wf", ): - """Concatenate postprocessed data. + """Concatenate postprocessed data across runs and directions. Workflow Graph .. workflow:: @@ -99,7 +99,7 @@ def init_concatenate_data_wf( workflow = Workflow(name=name) workflow.__desc__ = """ -Postprocessing derivatives from multi-run tasks were then concatenated across runs. +Postprocessing derivatives from multi-run tasks were then concatenated across runs and directions. """ inputnode = pe.Node(