Skip to content

Commit

Permalink
Concatenate across directions as well as runs (PennLINC#965)
Browse files Browse the repository at this point in the history
  • Loading branch information
tsalo authored Oct 10, 2023
1 parent 273fa8c commit ac5a1b0
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 9 deletions.
49 changes: 49 additions & 0 deletions xcp_d/tests/test_utils_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,3 +245,52 @@ def test_get_entity(datasets):
)
with pytest.raises(ValueError, match="Unknown space"):
xbids.get_entity(fname, "space")


def test_group_across_runs():
"""Test group_across_runs."""
in_files = [
"/path/sub-01_task-axcpt_run-03_bold.nii.gz",
"/path/sub-01_task-rest_run-03_bold.nii.gz",
"/path/sub-01_task-rest_run-01_bold.nii.gz",
"/path/sub-01_task-axcpt_run-02_bold.nii.gz",
"/path/sub-01_task-rest_run-02_bold.nii.gz",
"/path/sub-01_task-axcpt_run-01_bold.nii.gz",
]
grouped_files = xbids.group_across_runs(in_files)
assert isinstance(grouped_files, list)
assert len(grouped_files[0]) == 3
assert grouped_files[0] == [
"/path/sub-01_task-axcpt_run-01_bold.nii.gz",
"/path/sub-01_task-axcpt_run-02_bold.nii.gz",
"/path/sub-01_task-axcpt_run-03_bold.nii.gz",
]
assert len(grouped_files[1]) == 3
assert grouped_files[1] == [
"/path/sub-01_task-rest_run-01_bold.nii.gz",
"/path/sub-01_task-rest_run-02_bold.nii.gz",
"/path/sub-01_task-rest_run-03_bold.nii.gz",
]

in_files = [
"/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
]
grouped_files = xbids.group_across_runs(in_files)
assert isinstance(grouped_files, list)
assert len(grouped_files[0]) == 2
assert grouped_files[0] == [
"/path/sub-01_task-axcpt_dir-LR_bold.nii.gz",
"/path/sub-01_task-axcpt_dir-RL_bold.nii.gz",
]
assert len(grouped_files[1]) == 4
assert grouped_files[1] == [
"/path/sub-01_task-rest_dir-LR_run-1_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-1_bold.nii.gz",
"/path/sub-01_task-rest_dir-LR_run-2_bold.nii.gz",
"/path/sub-01_task-rest_dir-RL_run-2_bold.nii.gz",
]
27 changes: 21 additions & 6 deletions xcp_d/utils/bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,7 +896,11 @@ def get_entity(filename, entity):


def group_across_runs(in_files):
"""Group preprocessed BOLD files by unique sets of entities, ignoring run.
"""Group preprocessed BOLD files by unique sets of entities, ignoring run and direction.
We only ignore direction for the sake of HCP.
This may lead to small problems for non-HCP datasets that differentiate scans based on
both run and direction.
Parameters
----------
Expand All @@ -913,20 +917,31 @@ def group_across_runs(in_files):

# First, extract run information and sort the input files by the runs,
# so that any cases where files are not already in ascending run order get fixed.
run_numbers = []
run_numbers, directions = [], []
for in_file in in_files:
run = get_entity(in_file, "run")
if run is None:
run = 0

direction = get_entity(in_file, "dir")
if direction is None:
direction = "none"

run_numbers.append(int(run))
directions.append(direction)

# Combine the three lists into a list of tuples
combined_data = list(zip(run_numbers, directions, in_files))

# Sort the list of tuples first by run and then by direction
sorted_data = sorted(combined_data, key=lambda x: (x[0], x[1], x[2]))

# Sort the files by the run numbers.
zipped_pairs = zip(run_numbers, in_files)
sorted_in_files = [x for _, x in sorted(zipped_pairs)]
# Sort the file list
sorted_in_files = [item[2] for item in sorted_data]

# Extract the unique sets of entities (i.e., the filename, minus the run entity).
# Extract the unique sets of entities (i.e., the filename, minus the run and dir entities).
unique_filenames = [re.sub("_run-[0-9]+_", "_", os.path.basename(f)) for f in sorted_in_files]
unique_filenames = [re.sub("_dir-[0-9a-zA-Z]+_", "_", f) for f in unique_filenames]

# Assign each in_file to a group of files with the same entities, except run.
out_files, grouped_unique_filenames = [], []
Expand Down
2 changes: 1 addition & 1 deletion xcp_d/workflows/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ def init_subject_wf(
)

n_runs = len(preproc_files)
preproc_files = group_across_runs(preproc_files)
preproc_files = group_across_runs(preproc_files) # group files across runs and directions
run_counter = 0
for ent_set, task_files in enumerate(preproc_files):
# Assuming TR is constant across runs for a given combination of entities.
Expand Down
4 changes: 2 additions & 2 deletions xcp_d/workflows/concatenation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def init_concatenate_data_wf(
dcan_qc,
name="concatenate_data_wf",
):
"""Concatenate postprocessed data.
"""Concatenate postprocessed data across runs and directions.
Workflow Graph
.. workflow::
Expand Down Expand Up @@ -99,7 +99,7 @@ def init_concatenate_data_wf(
workflow = Workflow(name=name)

workflow.__desc__ = """
Postprocessing derivatives from multi-run tasks were then concatenated across runs.
Postprocessing derivatives from multi-run tasks were then concatenated across runs and directions.
"""

inputnode = pe.Node(
Expand Down

0 comments on commit ac5a1b0

Please sign in to comment.