From 6763e0979f0cab3d83d90a65d5d7bd6ae5e691c9 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Mon, 23 Oct 2023 15:26:56 -0400 Subject: [PATCH] [REF] Refactor phenotypic/BIDS subjects check and add more intermediate stdout for `bagel bids` (#227) * add utility for getting subject list from BIDS directory names * add test for get_bids_subjects_simple() * refactor initial subject IDs check and add more intermediate messages for BIDS command * print provided inputs before initial bagel bids checks * print first message from bagel bids even earlier --- bagel/bids_utils.py | 13 +++++++++++++ bagel/cli.py | 26 +++++++++++++++----------- bagel/tests/test_utility.py | 16 ++++++++++++++++ 3 files changed, 44 insertions(+), 11 deletions(-) diff --git a/bagel/bids_utils.py b/bagel/bids_utils.py index 1cdc331..713b6b8 100644 --- a/bagel/bids_utils.py +++ b/bagel/bids_utils.py @@ -11,6 +11,19 @@ def map_term_to_namespace(term: str, namespace: dict) -> str: return namespace.get(term, False) +def get_bids_subjects_simple(bids_dir: Path) -> list: + """Returns list of subject IDs (in format of sub-) for a BIDS directory inferred from the names of non-empty subdirectories.""" + bids_subject_list = [] + for path in bids_dir.iterdir(): + if ( + path.name.startswith("sub-") + and path.is_dir() + and any(path.iterdir()) + ): + bids_subject_list.append(path.name) + return bids_subject_list + + def check_unique_bids_subjects(pheno_subjects: list, bids_subjects: list): """Raises informative error if subject IDs exist that are found only in the BIDS directory.""" unique_bids_subjects = set(bids_subjects).difference(pheno_subjects) diff --git a/bagel/cli.py b/bagel/cli.py index 534ebd9..9c1c5aa 100644 --- a/bagel/cli.py +++ b/bagel/cli.py @@ -201,13 +201,17 @@ def bids( # Check if output file already exists check_overwrite(output, overwrite) - jsonld = load_json(jsonld_path) - layout = BIDSLayout(bids_dir, validate=True) + space = 32 + print( + "Running initial checks of inputs...\n" + f" {'Phenotypic .jsonld to augment:' : <{space}} {jsonld_path}\n" + f" {'BIDS dataset directory:' : <{space}} {bids_dir}" + ) + jsonld = load_json(jsonld_path) # Strip and store context to be added back later, since it's not part of # (and can't be easily added) to the existing data model context = {"@context": jsonld.pop("@context")} - try: pheno_dataset = models.Dataset.parse_obj(jsonld) except ValidationError as err: @@ -217,21 +221,21 @@ def bids( pheno_subject.hasLabel: pheno_subject for pheno_subject in getattr(pheno_dataset, "hasSamples") } - bids_subject_list = ["sub-" + sub_id for sub_id in layout.get_subjects()] + # TODO: Revert to using Layout.get_subjects() to get BIDS subjects once pybids performance is improved butil.check_unique_bids_subjects( pheno_subjects=pheno_subject_dict.keys(), - bids_subjects=bids_subject_list, + bids_subjects=butil.get_bids_subjects_simple(bids_dir), ) + print("Initial checks of inputs passed.\n") + + print("Parsing and validating BIDS dataset. This may take a while...") + layout = BIDSLayout(bids_dir, validate=True) + print("BIDS parsing completed.\n") - # Display validated input paths to user - space = 32 print( - "Parsing BIDS metadata to be merged with phenotypic annotations:\n" - f" {'Phenotypic .jsonld to augment:' : <{space}} {jsonld_path}\n" - f" {'BIDS dataset directory:' : <{space}} {bids_dir}\n" + "Merging subject-level BIDS metadata with the phenotypic annotations...\n" ) - for bids_sub_id in layout.get_subjects(): pheno_subject = pheno_subject_dict.get(f"sub-{bids_sub_id}") session_list = [] diff --git a/bagel/tests/test_utility.py b/bagel/tests/test_utility.py index 5b7c5f1..52f5789 100644 --- a/bagel/tests/test_utility.py +++ b/bagel/tests/test_utility.py @@ -336,6 +336,22 @@ def test_generate_context(get_test_context, model, attributes): assert attribute in get_test_context["@context"] +@pytest.mark.parametrize( + "bids_dir", + ["synthetic", "ds000248"], +) +def test_get_bids_subjects_simple(bids_path, bids_dir): + """Test that get_bids_subjects_simple() correctly extracts subject IDs from a BIDS directory.""" + bids_subject_list = butil.get_bids_subjects_simple(bids_path / bids_dir) + expected_subjects = [ + f"sub-{sub_id}" + for sub_id in BIDSLayout( + bids_path / bids_dir, validate=True + ).get_subjects() + ] + assert sorted(bids_subject_list) == sorted(expected_subjects) + + @pytest.mark.parametrize( "bids_list, expectation", [