From c230792a05ba7f86357de2e73c6f7906a0734383 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 4 Oct 2024 15:32:21 -0400 Subject: [PATCH 01/21] Fix issue #283: Rename Key Group to Entity Set --- HISTORY.rst | 2 +- cubids/cli.py | 4 +- cubids/constants.py | 4 +- cubids/cubids.py | 170 +++++++++--------- cubids/data/config.yml | 4 +- cubids/metadata_merge.py | 10 +- cubids/tests/data/BIDS_Dataset/README | 2 +- .../CuBIDS_Toy_Dataset_Curation/v0_files.csv | 40 ++--- .../v0_summary.csv | 22 +-- .../CuBIDS_Toy_Dataset_Curation/v1_files.csv | 2 +- .../v1_summary.csv | 2 +- .../CuBIDS_Toy_Dataset_Curation/v2_files.csv | 2 +- .../v2_summary.csv | 2 +- cubids/tests/data/inconsistent/README | 2 +- cubids/tests/test_bond.py | 86 ++++----- cubids/tests/test_cubids.py | 28 +-- cubids/tests/utils.py | 8 +- docs/_static/PNC_example_edited.csv | 16 +- docs/_static/PNC_example_unedited.csv | 16 +- docs/_static/PNC_post_apply_summary.csv | 14 +- .../PNC_pre_apply_summary_dwi_run1.csv | 12 +- ...NC_pre_apply_summary_dwi_run1_deletion.csv | 12 +- docs/_static/v0_edited_summary.csv | 22 +-- docs/_static/v0_summary.csv | 2 +- docs/about.rst | 8 +- docs/example.rst | 6 +- docs/glossary.rst | 16 +- docs/notebooks/HTML_param_groups.ipynb | 46 ++--- docs/notebooks/Key_and_Param_Groups.ipynb | 30 ++-- docs/notebooks/keyparamgrouptest.ipynb | 32 ++-- docs/notebooks/workwithtestdata.ipynb | 26 +-- docs/usage.rst | 46 ++--- 32 files changed, 347 insertions(+), 347 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index db648f65b..dd7a50641 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -45,7 +45,7 @@ New Contributors * updated _update_json to no longer use pybids by @scovitz in https://github.com/PennLINC/CuBIDS/pull/232 * Minor tune ups: codespell'ing (fixes + tox + CI (github actions)), remove of unintended to be committed 2 files by @yarikoptic in https://github.com/PennLINC/CuBIDS/pull/239 * ENH: Make "NumVolumes" an integer for 3D images by @cookpa in https://github.com/PennLINC/CuBIDS/pull/211 -* adding note about fmap renamekeygroups by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 +* adding note about fmap renameentitysets by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 * Update usage.rst by @megardn in https://github.com/PennLINC/CuBIDS/pull/138 * printing erroneous jsons and only rounding float parameters by @scovitz in https://github.com/PennLINC/CuBIDS/pull/257 diff --git a/cubids/cli.py b/cubids/cli.py index 6fde0885e..f87ffc6c4 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -230,7 +230,7 @@ def _parse_apply(): action="store", help=( "path to the _summary.tsv that has been edited " - "in the MergeInto and RenameKeyGroup columns. If the " + "in the MergeInto and RenameEntitySet columns. If the " " summary table is located in the code/CuBIDS " "directory, then users can just pass the summary tsv " "filename instead of the full path to the tsv" @@ -242,7 +242,7 @@ def _parse_apply(): action="store", help=( "path to the _files.tsv that has been edited " - "in the MergeInto and RenameKeyGroup columns. If the " + "in the MergeInto and RenameEntitySet columns. If the " "files table is located in the code/CuBIDS " "directory, then users can just pass the files tsv " "filename instead of the full path to the tsv" diff --git a/cubids/constants.py b/cubids/constants.py index ec24b6691..dfbc2072b 100644 --- a/cubids/constants.py +++ b/cubids/constants.py @@ -1,9 +1,9 @@ """Constants for CuBIDS.""" # Names of identifier variables. -# Used to place KeyGroup and ParamGroup at the beginning of a dataframe, +# Used to place EntitySet and ParamGroup at the beginning of a dataframe, # but both are hardcoded in the relevant function. -ID_VARS = set(["KeyGroup", "ParamGroup", "FilePath"]) +ID_VARS = set(["EntitySet", "ParamGroup", "FilePath"]) # Entities that should not be used to group parameter sets NON_KEY_ENTITIES = set(["subject", "session", "extension"]) # Multi-dimensional keys SliceTiming XXX: what is this line about? diff --git a/cubids/cubids.py b/cubids/cubids.py index 9195163aa..f056f8fd2 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -55,7 +55,7 @@ class CuBIDS(object): _layout : :obj:`bids.layout.BIDSLayout` The BIDSLayout object. keys_files : :obj:`dict` - A dictionary of key groups and the files that belong to them. + A dictionary of entity sets and the files that belong to them. fieldmaps_cached : :obj:`bool` If True, the fieldmaps have been cached. datalad_ready : :obj:`bool` @@ -100,7 +100,7 @@ def __init__( self.fieldmaps_cached = False self.datalad_ready = False self.datalad_handle = None - self.old_filenames = [] # files whose key groups changed + self.old_filenames = [] # files whose entity sets changed self.new_filenames = [] # new filenames for files to change self.IF_rename_paths = [] # fmap jsons with rename intended fors self.grouping_config = load_config(grouping_config) @@ -309,7 +309,7 @@ def add_nifti_info(self): def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=True): """Apply changes documented in the edited summary tsv and generate the new tsv files. - This function looks at the RenameKeyGroup and MergeInto + This function looks at the RenameEntitySet and MergeInto columns and modifies the bids dataset according to the specified changs. @@ -346,8 +346,8 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == dest_id).all(1)] - source_files = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == source_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + source_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == source_id).all(1)] # Get a source json file img_full_path = self.path + source_files.iloc[0].FilePath @@ -361,7 +361,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[(files_df[["ParamGroup", "KeyGroup"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -372,21 +372,21 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T self._purge_associations(to_remove) # Now do the file renaming - change_keys_df = summary_df[summary_df.RenameKeyGroup.notnull()] + change_keys_df = summary_df[summary_df.RenameEntitySet.notnull()] move_ops = [] # return if nothing to change if len(change_keys_df) > 0: - key_groups = {} + entity_sets = {} for i in range(len(change_keys_df)): - new_key = change_keys_df.iloc[i]["RenameKeyGroup"] + new_key = change_keys_df.iloc[i]["RenameEntitySet"] old_key_param = change_keys_df.iloc[i]["KeyParamGroup"] # add to dictionary - key_groups[old_key_param] = new_key + entity_sets[old_key_param] = new_key - # orig key/param tuples that will have new key group - to_change = list(key_groups.keys()) + # orig key/param tuples that will have new entity set + to_change = list(entity_sets.keys()) for row in range(len(files_df)): file_path = self.path + files_df.loc[row, "FilePath"] @@ -396,11 +396,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T if key_param_group in to_change: orig_key_param = files_df.loc[row, "KeyParamGroup"] - new_key = key_groups[orig_key_param] + new_key = entity_sets[orig_key_param] - new_entities = _key_group_to_entities(new_key) + new_entities = _entity_set_to_entities(new_key) - # generate new filenames according to new key group + # generate new filenames according to new entity set self.change_filename(file_path, new_entities) # create string of mv command ; mv command for dlapi.run @@ -451,17 +451,17 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T subprocess.run(["rm", "-rf", "renames"]) def change_filename(self, filepath, entities): - """Apply changes to a filename based on the renamed key groups. + """Apply changes to a filename based on the renamed entity sets. - This function takes into account the new key group names - and renames all files whose key group names changed. + This function takes into account the new entity set names + and renames all files whose entity set names changed. Parameters ---------- filepath : :obj:`str` - Path prefix to a file in the affected key group change. + Path prefix to a file in the affected entity set change. entities : :obj:`dict` - A pybids dictionary of entities parsed from the new key group name. + A pybids dictionary of entities parsed from the new entity set name. Notes ----- @@ -904,13 +904,13 @@ def _cache_fieldmaps(self): # no intended for found return misfits - def get_param_groups_from_key_group(self, key_group): - """Split key groups into param groups based on json metadata. + def get_param_groups_from_entity_set(self, entity_set): + """Split entity sets into param groups based on json metadata. Parameters ---------- - key_group : str - Key group name. + entity_set : str + Entity set name. Returns ------- @@ -921,7 +921,7 @@ def get_param_groups_from_key_group(self, key_group): """ if not self.fieldmaps_cached: raise Exception("Fieldmaps must be cached to find parameter groups.") - key_entities = _key_group_to_entities(key_group) + key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" matching_files = self.layout.get( @@ -932,12 +932,12 @@ def get_param_groups_from_key_group(self, key_group): # entities do not also get added to matching_files to_include = [] for filepath in matching_files: - f_key_group = _file_to_key_group(filepath) + f_entity_set = _file_to_entity_set(filepath) - if f_key_group == key_group: + if f_entity_set == entity_set: to_include.append(filepath) - # get the modality associated with the key group + # get the modality associated with the entity set modalities = ["/dwi/", "/anat/", "/func/", "/perf/", "/fmap/"] modality = "" for mod in modalities: @@ -951,7 +951,7 @@ def get_param_groups_from_key_group(self, key_group): ret = _get_param_groups( to_include, self.fieldmap_lookup, - key_group, + entity_set, self.grouping_config, modality, self.keys_files, @@ -996,8 +996,8 @@ def create_data_dictionary(self): self.data_dict["Notes"]["Description"] = desc2 desc31 = "Auto-generated suggested rename of Non-Domiannt Groups" desc32 = " based on variant scanning parameters" - self.data_dict["RenameKeyGroup"] = {} - self.data_dict["RenameKeyGroup"]["Description"] = desc31 + desc32 + self.data_dict["RenameEntitySet"] = {} + self.data_dict["RenameEntitySet"]["Description"] = desc31 + desc32 desc4 = "Number of Files in the Parameter Group" self.data_dict["Counts"] = {} self.data_dict["Counts"]["Description"] = desc4 @@ -1008,19 +1008,19 @@ def create_data_dictionary(self): self.data_dict["MergeInto"]["Description"] = desc5 self.data_dict["FilePath"] = {} self.data_dict["FilePath"]["Description"] = "Location of file" - desc6 = "Number of participants in a Key Group" - self.data_dict["KeyGroupCount"] = {} - self.data_dict["KeyGroupCount"]["Description"] = desc6 + desc6 = "Number of participants in a Entity Set" + self.data_dict["EntitySetCount"] = {} + self.data_dict["EntitySetCount"]["Description"] = desc6 desc71 = "A set of scans whose filenames share all BIDS filename" desc72 = " key-value pairs, excluding subject and session" - self.data_dict["KeyGroup"] = {} - self.data_dict["KeyGroup"]["Description"] = desc71 + desc72 + self.data_dict["EntitySet"] = {} + self.data_dict["EntitySet"]["Description"] = desc71 + desc72 desc81 = "The set of scans with identical metadata parameters in their" - desc82 = " sidecars (defined within a Key Group and denoted" + desc82 = " sidecars (defined within a Entity Set and denoted" desc83 = " numerically)" self.data_dict["ParamGroup"] = {} self.data_dict["ParamGroup"]["Description"] = desc81 + desc82 + desc83 - desc91 = "Key Group name and Param Group number separated by a double" + desc91 = "Entity Set name and Param Group number separated by a double" desc92 = " underscore" self.data_dict["KeyParamGroup"] = {} self.data_dict["KeyParamGroup"]["Description"] = desc91 + desc92 @@ -1068,16 +1068,16 @@ def get_data_dictionary(self, df): def get_param_groups_dataframes(self): """Create DataFrames of files x param groups and a summary.""" - key_groups = self.get_key_groups() + entity_sets = self.get_entity_sets() labeled_files = [] param_group_summaries = [] - for key_group in key_groups: + for entity_set in entity_sets: try: ( labeled_file_params, param_summary, modality, - ) = self.get_param_groups_from_key_group(key_group) + ) = self.get_param_groups_from_entity_set(entity_set) except Exception: continue if labeled_file_params is None: @@ -1095,20 +1095,20 @@ def get_param_groups_dataframes(self): summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["KeyGroup"] + "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["KeyGroup"] + "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") big_df.insert(0, "KeyParamGroup", key_param_col) - summary.insert(0, "RenameKeyGroup", np.nan) + summary.insert(0, "RenameEntitySet", np.nan) summary.insert(0, "MergeInto", np.nan) summary.insert(0, "ManualCheck", np.nan) summary.insert(0, "Notes", np.nan) @@ -1122,7 +1122,7 @@ def get_param_groups_dataframes(self): relational = self.grouping_config.get("relational_params") # list of columns names that we account for in suggested renaming - summary["RenameKeyGroup"] = summary["RenameKeyGroup"].apply(str) + summary["RenameEntitySet"] = summary["RenameEntitySet"].apply(str) rename_cols = [] tolerance_cols = [] @@ -1160,7 +1160,7 @@ def get_param_groups_dataframes(self): if str(summary.loc[row, "ParamGroup"]) == "1": val = {} # grab col, all vals send to dict - key = summary.loc[row, "KeyGroup"] + key = summary.loc[row, "EntitySet"] for col in rename_cols: summary[col] = summary[col].apply(str) val[col] = summary.loc[row, col] @@ -1170,8 +1170,8 @@ def get_param_groups_dataframes(self): for row in range(len(summary)): # check to see if renaming has already happened renamed = False - entities = _key_group_to_entities(summary.loc[row, "KeyGroup"]) - if "VARIANT" in summary.loc[row, "KeyGroup"]: + entities = _entity_set_to_entities(summary.loc[row, "EntitySet"]) + if "VARIANT" in summary.loc[row, "EntitySet"]: renamed = True # if NumVolumes is nan, set to 1.0 @@ -1183,7 +1183,7 @@ def get_param_groups_dataframes(self): acq_str = "VARIANT" # now we know we have a deviant param group # check if TR is same as param group 1 - key = summary.loc[row, "KeyGroup"] + key = summary.loc[row, "EntitySet"] for col in rename_cols: summary[col] = summary[col].apply(str) if summary.loc[row, col] != dom_dict[key][col]: @@ -1206,20 +1206,20 @@ def get_param_groups_dataframes(self): if "acquisition" in entities.keys(): acq = f"acquisition-{entities['acquisition'] + acq_str}" - new_name = summary.loc[row, "KeyGroup"].replace( + new_name = summary.loc[row, "EntitySet"].replace( f"acquisition-{entities['acquisition']}", acq, ) else: acq = f"acquisition-{acq_str}" - new_name = acq + "_" + summary.loc[row, "KeyGroup"] + new_name = acq + "_" + summary.loc[row, "EntitySet"] - summary.at[row, "RenameKeyGroup"] = new_name + summary.at[row, "RenameEntitySet"] = new_name # convert all "nan" to empty str # so they don't show up in the summary tsv - if summary.loc[row, "RenameKeyGroup"] == "nan": - summary.at[row, "RenameKeyGroup"] = "" + if summary.loc[row, "RenameEntitySet"] == "nan": + summary.at[row, "RenameEntitySet"] = "" for col in rename_cols: if summary.loc[row, col] == "nan": @@ -1251,8 +1251,8 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) - big_df = big_df.sort_values(by=["Modality", "KeyGroupCount"], ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1275,12 +1275,12 @@ def get_tsvs(self, path_prefix): print(f"CuBIDS detected {len(summary)} Parameter Groups.") - def get_key_groups(self): - """Identify the key groups for the bids dataset.""" + def get_entity_sets(self): + """Identify the entity sets for the bids dataset.""" # reset self.keys_files self.keys_files = {} - key_groups = set() + entity_sets = set() for path in Path(self.path).rglob("sub-*/**/*.*"): # ignore all dot directories @@ -1288,17 +1288,17 @@ def get_key_groups(self): continue if str(path).endswith(".nii") or str(path).endswith(".nii.gz"): - key_groups.update((_file_to_key_group(path),)) + entity_sets.update((_file_to_entity_set(path),)) - # Fill the dictionary of key group, list of filenames pairrs - ret = _file_to_key_group(path) + # Fill the dictionary of entity set, list of filenames pairrs + ret = _file_to_entity_set(path) if ret not in self.keys_files.keys(): self.keys_files[ret] = [] self.keys_files[ret].append(path) - return sorted(key_groups) + return sorted(entity_sets) def change_metadata(self, filters, metadata): """Change metadata. @@ -1394,21 +1394,21 @@ def _update_json(json_file, metadata): print("INVALID JSON DATA") -def _key_group_to_entities(key_group): - """Split a key_group name into a pybids dictionary of entities.""" - return dict([group.split("-") for group in key_group.split("_")]) +def _entity_set_to_entities(entity_set): + """Split a entity_set name into a pybids dictionary of entities.""" + return dict([group.split("-") for group in entity_set.split("_")]) -def _entities_to_key_group(entities): - """Convert a pybids entities dictionary into a key group name.""" +def _entities_to_entity_set(entities): + """Convert a pybids entities dictionary into a entity set name.""" group_keys = sorted(entities.keys() - NON_KEY_ENTITIES) return "_".join([f"{key}-{entities[key]}" for key in group_keys]) -def _file_to_key_group(filename): - """Identify and return the key group of a bids valid filename.""" +def _file_to_entity_set(filename): + """Identify and return the entity set of a bids valid filename.""" entities = parse_file_entities(str(filename)) - return _entities_to_key_group(entities) + return _entities_to_entity_set(entities) def _get_intended_for_reference(scan): @@ -1418,7 +1418,7 @@ def _get_intended_for_reference(scan): def _get_param_groups( files, fieldmap_lookup, - key_group_name, + entity_set_name, grouping_config, modality, keys_files, @@ -1447,7 +1447,7 @@ def _get_param_groups( A data frame with param group summaries. """ if not files: - print("WARNING: no files for", key_group_name) + print("WARNING: no files for", entity_set_name) return None, None # Split the config into separate parts @@ -1475,12 +1475,12 @@ def _get_param_groups( wanted_keys = metadata.keys() & imaging_params example_data = {key: metadata[key] for key in wanted_keys} - example_data["KeyGroup"] = key_group_name + example_data["EntitySet"] = entity_set_name # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_key_group(fmap.path) for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1499,21 +1499,21 @@ def _get_param_groups( example_data["FilePath"] = path - # If it's a fieldmap, see what key group it's intended to correct + # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: - intended_key_groups = sorted( - [_file_to_key_group(intention) for intention in intentions] + intended_entity_sets = sorted( + [_file_to_entity_set(intention) for intention in intentions] ) # check if config says columns or bool if relational_params["IntendedForKey"]["display_mode"] == "bool": - if len(intended_key_groups) > 0: + if len(intended_entity_sets) > 0: example_data["UsedAsFieldmap"] = True else: example_data["UsedAsFieldmap"] = False else: - for intention_num, intention_key_group in enumerate(intended_key_groups): - example_data[f"IntendedForKey{intention_num:02d}"] = intention_key_group + for intention_num, intention_entity_set in enumerate(intended_entity_sets): + example_data[f"IntendedForKey{intention_num:02d}"] = intention_entity_set dfs.append(example_data) @@ -1544,8 +1544,8 @@ def _get_param_groups( # add the modality as a column deduped["Modality"] = modality - # add key group count column (will delete later) - deduped["KeyGroupCount"] = len(keys_files[key_group_name]) + # add entity set count column (will delete later) + deduped["EntitySetCount"] = len(keys_files[entity_set_name]) # Add the ParamGroup to the whole list of files labeled_files = pd.merge(df, deduped, on=check_cols) @@ -1684,7 +1684,7 @@ def format_params(param_group_df, config, modality): def _order_columns(df): """Organize columns of the summary and files DataFrames. - This ensures that KeyGroup and ParamGroup are the first two columns, + This ensures that EntitySet and ParamGroup are the first two columns, FilePath is the last, and the others are sorted alphabetically. Notes @@ -1695,7 +1695,7 @@ def _order_columns(df): """ cols = set(df.columns.to_list()) non_id_cols = cols - ID_VARS - new_columns = ["KeyGroup", "ParamGroup"] + sorted(non_id_cols) + new_columns = ["EntitySet", "ParamGroup"] + sorted(non_id_cols) if "FilePath" in cols: new_columns.append("FilePath") diff --git a/cubids/data/config.yml b/cubids/data/config.yml index eb442399f..9ebffc8e9 100644 --- a/cubids/data/config.yml +++ b/cubids/data/config.yml @@ -177,13 +177,13 @@ relational_params: FieldmapKey: # can be # "bool": a single column with true if an IntendedFor is present - # "columns": List all intended key groups in separate columns (IntendedForXX) + # "columns": List all intended entity sets in separate columns (IntendedForXX) display_mode: bool suggest_variant_rename: yes IntendedForKey: # can be # "bool": a single column with true if an IntendedFor is present - # "columns": List all intended key groups in separate columns (IntendedForXX) + # "columns": List all intended entity sets in separate columns (IntendedForXX) display_mode: bool suggest_variant_rename: yes # BIDS fields to directly include in the Parameter Groupings diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 5bd3c9579..2087d009c 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -54,10 +54,10 @@ def _check_sdc_cols(meta1, meta2): needs_merge = actions[np.isfinite(actions["MergeInto"])] for _, row_needs_merge in needs_merge.iterrows(): - source_param_key = tuple(row_needs_merge[["MergeInto", "KeyGroup"]]) - dest_param_key = tuple(row_needs_merge[["ParamGroup", "KeyGroup"]]) + source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) + dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[(actions[["ParamGroup", "KeyGroup"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -299,10 +299,10 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): if acq_group_level == "subject": acq_id = (file_entities.get("subject"), file_entities.get("session")) - acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup)) + acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) - acq_groups[acq_id].append((row.KeyGroup, row.ParamGroup, file_entities.get("session"))) + acq_groups[acq_id].append((row.EntitySet, row.ParamGroup, file_entities.get("session"))) # Map the contents to a list of subjects/sessions contents_to_subjects = defaultdict(list) diff --git a/cubids/tests/data/BIDS_Dataset/README b/cubids/tests/data/BIDS_Dataset/README index f96d811a8..42be407d1 100644 --- a/cubids/tests/data/BIDS_Dataset/README +++ b/cubids/tests/data/BIDS_Dataset/README @@ -1,7 +1,7 @@ Heavily downsampled CS-DSI testing datasets This data has had some parameters changed so that there are different -parameter groups in the same key groups. +parameter groups in the same entity sets. ## Changes diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv index 21717ed22..68ea3dc61 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_files.csv @@ -1,21 +1,21 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz -datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/dwi/sub-02_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude1.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude2.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_phasediff.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_phasediff.nii.gz -acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_phasediff.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_dir-PA_epi.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_dir-PA_epi.nii.gz -datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_dir-PA_epi.nii.gz -datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz +datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/dwi/sub-02_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude1.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_magnitude2.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_acq-v4_phasediff.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_acq-v4_phasediff.nii.gz +acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_acq-v4_phasediff.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/fmap/sub-01_ses-phdiff_dir-PA_epi.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/fmap/sub-02_ses-phdiff_dir-PA_epi.nii.gz +datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/fmap/sub-03_ses-phdiff_dir-PA_epi.nii.gz +datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/func/sub-03_ses-phdiff_task-rest_bold.nii.gz datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90.0,TRUE,2,func,36,10.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/func/sub-01_ses-phdiff_task-rest_bold.nii.gz \ No newline at end of file diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv index d308a33ba..7f77a4c5e 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v0_summary.csv @@ -1,12 +1,12 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 -,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 -,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-VARIANTIsUsed_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 -,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,FALSE,3,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 +,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.089,0.00069,90.0,TRUE,3,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,3,36,43,36,0.004,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,3,36,43,36,0.006,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,3,36,43,36,,,60.0,FALSE,3,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 +,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,2,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-VARIANTIsUsed_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90.0,FALSE,3,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 +,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,1,36,43,36,0.089,0.00069,90.0,FALSE,2,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 ,,,acquisition-VARIANTNumVolumesHasFmap_datatype-func_suffix-bold_task-rest,datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90.0,TRUE,2,func,36,10.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 \ No newline at end of file diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv index 06c1da069..047809e3a 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_files.csv @@ -1,4 +1,4 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv index a551d9017..000b64fa2 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v1_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,1,36,43,36,0.04,0.00069,90.0,True,3,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,,True,3,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv index ec5445c3c..dfde4aebe 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_files.csv @@ -1,4 +1,4 @@ -KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath +KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3,FilePath datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-01/ses-phdiff/anat/sub-01_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-02/ses-phdiff/anat/sub-02_ses-phdiff_T1w.nii.gz datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0,/Users/scovitz/BIDS_Dataset_Datalad/sub-03/ses-phdiff/anat/sub-03_ses-phdiff_T1w.nii.gz diff --git a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv index 5af0eb838..a2a1c74c4 100644 --- a/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv +++ b/cubids/tests/data/CuBIDS_Toy_Dataset_Curation/v2_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,3,180,216,180,0.0029,,8.0,False,3,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi__1,acquisition-HASC55APVARIANTEchoTimeFlipAngle_datatype-dwi_suffix-dwi,1,1,36,43,36,0.089,0.00069,,True,1,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi__1,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,1,1,36,43,36,0.089,0.00069,90.0,True,1,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/cubids/tests/data/inconsistent/README b/cubids/tests/data/inconsistent/README index f96d811a8..42be407d1 100644 --- a/cubids/tests/data/inconsistent/README +++ b/cubids/tests/data/inconsistent/README @@ -1,7 +1,7 @@ Heavily downsampled CS-DSI testing datasets This data has had some parameters changed so that there are different -parameter groups in the same key groups. +parameter groups in the same entity sets. ## Changes diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 867f22cd2..28211cc2d 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -97,12 +97,12 @@ def test_get_param_groups(tmp_path): """Test get_param_groups.""" data_root = get_data(tmp_path) bod = CuBIDS(data_root / "inconsistent", use_datalad=True) - key_groups = bod.get_key_groups() + entity_sets = bod.get_entity_sets() bod._cache_fieldmaps() - for key_group in key_groups: - ret = bod.get_param_groups_from_key_group(key_group) - assert sum(ret[1].Counts) == ret[1].loc[0, "KeyGroupCount"] + for entity_set in entity_sets: + ret = bod.get_param_groups_from_entity_set(entity_set) + assert sum(ret[1].Counts) == ret[1].loc[0, "EntitySetCount"] def test_copy_exemplars(tmp_path): @@ -365,16 +365,16 @@ def test_tsv_merge_no_datalad(tmp_path): summary_df = pd.read_table(original_summary_tsv) (fa_nan_dwi_row,) = np.flatnonzero( np.isnan(summary_df.FlipAngle) - & summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + & summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) @@ -421,10 +421,10 @@ def test_tsv_merge_changes(tmp_path): # give tsv with no changes (make sure it does nothing except rename) bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) - # TEST RenameKeyGroup column got populated CORRECTLY + # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): if orig.loc[row, "ParamGroup"] != 1: - assert str(orig.loc[row, "RenameKeyGroup"]) != "nan" + assert str(orig.loc[row, "RenameEntitySet"]) != "nan" # TESTING RENAMES GOT APPLIED applied = pd.read_table(str(tmp_path / "unmodified_summary.tsv")) @@ -451,14 +451,14 @@ def test_tsv_merge_changes(tmp_path): assert len(orig) == len(applied) renamed = True - new_keys = applied["KeyGroup"].tolist() + new_keys = applied["EntitySet"].tolist() for row in range(len(orig)): if orig.loc[row, "Modality"] != "fmap": if ( - str(orig.loc[row, "RenameKeyGroup"]) != "nan" - and str(orig.loc[row, "RenameKeyGroup"]) not in new_keys + str(orig.loc[row, "RenameEntitySet"]) != "nan" + and str(orig.loc[row, "RenameEntitySet"]) not in new_keys ): - print(orig.loc[row, "RenameKeyGroup"]) + print(orig.loc[row, "RenameEntitySet"]) renamed = False assert renamed @@ -470,16 +470,16 @@ def test_tsv_merge_changes(tmp_path): summary_df = pd.read_table(original_summary_tsv) (fa_nan_dwi_row,) = np.flatnonzero( np.isnan(summary_df.FlipAngle) - & summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + & summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.KeyGroup.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) @@ -539,9 +539,9 @@ def test_merge_without_overwrite(): """Test merge_without_overwrite.""" meta1 = { "ManualCheck": 1.0, - "RenameKeyGroup": np.nan, + "RenameEntitySet": np.nan, "MergeInto": 2.0, - "KeyGroup": "datatype-func_suffix-bold_task-rest", + "EntitySet": "datatype-func_suffix-bold_task-rest", "ParamGroup": 12, "Counts": 2, "DwellTime": 2.6e-06, @@ -602,8 +602,8 @@ def test_merge_without_overwrite(): assert not bad_slice_merge -def test_keygroups(tmp_path): - """Test keygroups.""" +def test_entitysets(tmp_path): + """Test entitysets.""" data_root = get_data(tmp_path) # Test the complete data @@ -612,22 +612,22 @@ def test_keygroups(tmp_path): # There should be no unpaired fieldmaps assert len(complete_misfit_fmaps) == 0 - # Test that the correct key groups are found - key_groups = complete_bod.get_key_groups() - assert key_groups == COMPLETE_KEY_GROUPS + # Test that the correct entity sets are found + entity_sets = complete_bod.get_entity_sets() + assert entity_sets == COMPLETE_KEY_GROUPS # Test the incomplete ibod = CuBIDS(data_root / "inconsistent") inc_misfit_fmaps = ibod._cache_fieldmaps() assert len(inc_misfit_fmaps) == 1 - # There will still be the same number of key groups - ikey_groups = ibod.get_key_groups() - assert ikey_groups == COMPLETE_KEY_GROUPS + # There will still be the same number of entity sets + ientity_sets = ibod.get_entity_sets() + assert ientity_sets == COMPLETE_KEY_GROUPS def test_tsv_creation(tmp_path): - """Test the Key Group and Parameter Group creation on sample data.""" + """Test the Entity Set and Parameter Group creation on sample data.""" data_root = get_data(tmp_path) # Test the complete data @@ -636,9 +636,9 @@ def test_tsv_creation(tmp_path): # There should be no unpaired fieldmaps assert len(complete_misfit_fmaps) == 0 - # Test that the correct key groups are found - key_groups = complete_bod.get_key_groups() - assert key_groups == COMPLETE_KEY_GROUPS + # Test that the correct entity sets are found + entity_sets = complete_bod.get_entity_sets() + assert entity_sets == COMPLETE_KEY_GROUPS # Get the tsvs from the complete data cfiles_df, csummary_df = complete_bod.get_param_groups_dataframes() @@ -647,7 +647,7 @@ def test_tsv_creation(tmp_path): assert cfiles_df.shape[0] == 21 # This data should have the same number of param - # groups as key groups + # groups as entity sets assert csummary_df.shape[0] == len(COMPLETE_KEY_GROUPS) # check IntendedForXX and FieldmapKeyXX are boolean now @@ -668,9 +668,9 @@ def test_tsv_creation(tmp_path): inc_misfit_fmaps = ibod._cache_fieldmaps() assert len(inc_misfit_fmaps) == 1 - # There will still be the same number of key groups - ikey_groups = ibod.get_key_groups() - assert ikey_groups == COMPLETE_KEY_GROUPS + # There will still be the same number of entity sets + ientity_sets = ibod.get_entity_sets() + assert ientity_sets == COMPLETE_KEY_GROUPS # Get the tsvs from the inconsistent data ifiles_df, isummary_df = ibod.get_param_groups_dataframes() @@ -686,8 +686,8 @@ def test_tsv_creation(tmp_path): for i, (_, row) in enumerate(isummary_df.iterrows()): if i == len(isummary_df) - 1: break - # if key groups in rows i and i+1 are the same - if isummary_df.iloc[i]["KeyGroup"] == isummary_df.iloc[i + 1]["KeyGroup"]: + # if entity sets in rows i and i+1 are the same + if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 # and count i < count i + 1 @@ -697,8 +697,8 @@ def test_tsv_creation(tmp_path): for i, (_, row) in enumerate(ifiles_df.iterrows()): if i == len(ifiles_df) - 1: break - # if key groups in rows i and i+1 are the same - if ifiles_df.iloc[i]["KeyGroup"] == ifiles_df.iloc[i + 1]["KeyGroup"]: + # if entity sets in rows i and i+1 are the same + if ifiles_df.iloc[i]["EntitySet"] == ifiles_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 assert ifiles_df.iloc[i]["ParamGroup"] <= ifiles_df.iloc[i + 1]["ParamGroup"] @@ -707,9 +707,9 @@ def test_apply_tsv_changes(tmp_path): """Test apply_tsv_changes.""" # set up like narrative of user using this # similar to test tsv creation - # open the tsv, rename a key group + # open the tsv, rename a entity set # save tsv - # call change key groups + # call change entity sets # give tsv with no changes (make sure it does nothing) # make sure files you wanted to rename exist in the bids dir @@ -746,7 +746,7 @@ def test_apply_tsv_changes(tmp_path): assert og_content == mod1_content - # edit the tsv, add a RenameKeyGroup + # edit the tsv, add a RenameEntitySet # _edit_tsv(str(tmp_path / "originals_summary.tsv")) complete_cubids.apply_tsv_changes( @@ -830,9 +830,9 @@ def test_session_apply(tmp_path): """Test session_apply.""" # set up like narrative of user using this # similar to test tsv creation - # open the tsv, rename a key group + # open the tsv, rename a entity set # save tsv - # call change key groups + # call change entity sets # give tsv with no changes (make sure it does nothing) # make sure files you wanted to rename exist in the bids dir diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index 7e985de4d..6ab847fd5 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -101,9 +101,9 @@ def _test__cache_fieldmaps(cubids_instance): # Add assertions here -def _test_get_param_groups_from_key_group(cubids_instance): - key_group = "group-01" - param_groups = cubids_instance.get_param_groups_from_key_group(key_group) +def _test_get_param_groups_from_entity_set(cubids_instance): + entity_set = "group-01" + param_groups = cubids_instance.get_param_groups_from_entity_set(entity_set) # Add assertions here @@ -129,8 +129,8 @@ def _test_get_tsvs(cubids_instance): # Add assertions here -def _test_get_key_groups(cubids_instance): - key_groups = cubids_instance.get_key_groups() +def _test_get_entity_sets(cubids_instance): + entity_sets = cubids_instance.get_entity_sets() # Add assertions here @@ -179,21 +179,21 @@ def _test__update_json(cubids_instance): # Add assertions here -def _test__key_group_to_entities(cubids_instance): - key_group = "group-01" - entities = cubids_instance._key_group_to_entities(key_group) +def _test__entity_set_to_entities(cubids_instance): + entity_set = "group-01" + entities = cubids_instance._entity_set_to_entities(entity_set) # Add assertions here -def _test__entities_to_key_group(cubids_instance): +def _test__entities_to_entity_set(cubids_instance): entities = {"subject": "sub-01", "session": "ses-01"} - key_group = cubids_instance._entities_to_key_group(entities) + entity_set = cubids_instance._entities_to_entity_set(entities) # Add assertions here -def _test__file_to_key_group(cubids_instance): +def _test__file_to_entity_set(cubids_instance): filename = "sub-01_ses-01_task-rest_bold.nii.gz" - key_group = cubids_instance._file_to_key_group(filename) + entity_set = cubids_instance._file_to_entity_set(filename) # Add assertions here @@ -206,12 +206,12 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} - key_group_name = "group-01" + entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" keys_files = {"group-01": ["sub-01_ses-01_task-rest_bold.nii.gz"]} param_groups = cubids_instance._get_param_groups( - files, fieldmap_lookup, key_group_name, grouping_config, modality, keys_files + files, fieldmap_lookup, entity_set_name, grouping_config, modality, keys_files ) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 22263f9ba..c64da3727 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -53,12 +53,12 @@ def _add_deletion(summary_tsv): # def _edit_tsv(summary_tsv): # df = pd.read_table(summary_tsv) -# df['RenameKeyGroup'] = df['RenameKeyGroup'].apply(str) -# df['KeyGroup'] = df['KeyGroup'].apply(str) +# df['RenameEntitySet'] = df['RenameEntitySet'].apply(str) +# df['EntitySet'] = df['EntitySet'].apply(str) # for row in range(len(df)): -# if df.loc[row, 'KeyGroup'] == \ +# if df.loc[row, 'EntitySet'] == \ # "acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1": -# df.at[row, 'RenameKeyGroup'] = \ +# df.at[row, 'RenameEntitySet'] = \ # "acquisition-v5_datatype-fmap_fmap-magnitude1_suffix-magnitude1" # df.to_csv(summary_tsv) diff --git a/docs/_static/PNC_example_edited.csv b/docs/_static/PNC_example_edited.csv index a0e9b545e..0de481748 100644 --- a/docs/_static/PNC_example_edited.csv +++ b/docs/_static/PNC_example_edited.csv @@ -1,9 +1,9 @@ -RenameKeyGroup,MergeInto,KeyGroup,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime -,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 -acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 -,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 -,3,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -acquisition-NoSDC_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 -,0,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 +RenameEntitySet,MergeInto,EntitySet,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime +,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 +acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 +,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 +,3,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +acquisition-NoSDC_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 +,0,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 acquisition-VariantTr_datatype-dwi_run-1_suffix-dwi,,datatype-dwi_run-1_suffix-dwi,8,1,datatype-fmap_fmap-phase1_suffix-phase1,70,12.3 \ No newline at end of file diff --git a/docs/_static/PNC_example_unedited.csv b/docs/_static/PNC_example_unedited.csv index 1ca5aa678..4fdfb436b 100644 --- a/docs/_static/PNC_example_unedited.csv +++ b/docs/_static/PNC_example_unedited.csv @@ -1,9 +1,9 @@ -RenameKeyGroup,MergeInto,KeyGroup,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime -,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 -,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 -,,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 -,,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 +RenameEntitySet,MergeInto,EntitySet,ParamGroup,Counts,FieldmapKey00,NSliceTimes,RepetitionTime +,,datatype-dwi_run-1_suffix-dwi,1,1361,datatype-fmap_fmap-phase1_suffix-phase1,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,2,1,datatype-fmap_fmap-phase1_suffix-phase1,70,8.4 +,,datatype-dwi_run-1_suffix-dwi,3,15,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,4,1,datatype-fmap_fmap-phase1_suffix-phase1,70,9 +,,datatype-dwi_run-1_suffix-dwi,5,2,datatype-fmap_fmap-phasediff_suffix-phasediff,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,6,16,,70,8.1 +,,datatype-dwi_run-1_suffix-dwi,7,2,datatype-fmap_fmap-phase1_suffix-phase1,46,8.1 ,,datatype-dwi_run-1_suffix-dwi,8,1,datatype-fmap_fmap-phase1_suffix-phase1,70,12.3 \ No newline at end of file diff --git a/docs/_static/PNC_post_apply_summary.csv b/docs/_static/PNC_post_apply_summary.csv index c91f417c9..f4a30b562 100644 --- a/docs/_static/PNC_post_apply_summary.csv +++ b/docs/_static/PNC_post_apply_summary.csv @@ -1,8 +1,8 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-dwi_run-1_suffix-dwi__1,datatype-dwi_run-1_suffix-dwi,1,1388,128,128,70,0.082,0.000267,90,TRUE,1388,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,1,25,128,128,70,0.082,0.000267,90,FALSE,25,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,1,6,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__2,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,2,3,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,1,2,128,128,46,0.082,0.000267,90,TRUE,2,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.102,0.0008,90,TRUE,1,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-dwi_run-1_suffix-dwi__1,datatype-dwi_run-1_suffix-dwi,1,1388,128,128,70,0.082,0.000267,90,TRUE,1388,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,1,25,128,128,70,0.082,0.000267,90,FALSE,25,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,1,6,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi__2,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,2,3,128,128,70,0.082,0.000267,90,TRUE,9,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,1,2,128,128,46,0.082,0.000267,90,TRUE,2,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.102,0.0008,90,TRUE,1,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi__1,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,1,1,128,128,70,0.082,0.000267,90,TRUE,1,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/PNC_pre_apply_summary_dwi_run1.csv b/docs/_static/PNC_pre_apply_summary_dwi_run1.csv index ad0cae9b7..cb02df295 100644 --- a/docs/_static/PNC_pre_apply_summary_dwi_run1.csv +++ b/docs/_static/PNC_pre_apply_summary_dwi_run1.csv @@ -1,7 +1,7 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__7,datatype-dwi_run-1_suffix-dwi,7,1,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv b/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv index 15608fc77..b304633a5 100644 --- a/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv +++ b/docs/_static/PNC_pre_apply_summary_dwi_run1_deletion.csv @@ -1,7 +1,7 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 -,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 -,,0,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,ParallelReductionFactorInPlane,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,acquisition-VARIANTNoFmap_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__2,datatype-dwi_run-1_suffix-dwi,2,25,128,128,70,0.082,0.000267,90,FALSE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__3,datatype-dwi_run-1_suffix-dwi,3,6,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.0,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTRepetitionTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__4,datatype-dwi_run-1_suffix-dwi,4,3,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,9.8,0.034,FALSE,1.875,1.875,2.0 +,,,acquisition-VARIANTDim3SizeVoxelSizeDim3_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__5,datatype-dwi_run-1_suffix-dwi,5,2,128,128,46,0.082,0.000267,90,TRUE,1426,dwi,46,35.0,FALSE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,3.0 +,,0,acquisition-VARIANTEchoTimeEffectiveEchoSpacingRepetitionTimeTotalReadoutTime_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__6,datatype-dwi_run-1_suffix-dwi,6,1,128,128,70,0.102,0.0008,90,TRUE,1426,dwi,70,35.0,FALSE,3.0,0.75,j-,12.3,0.102,FALSE,1.875,1.875,2.0 ,,,acquisition-VARIANTObliquity_datatype-dwi_run-1_suffix-dwi,datatype-dwi_run-1_suffix-dwi__7,datatype-dwi_run-1_suffix-dwi,7,1,128,128,70,0.082,0.000267,90,TRUE,1426,dwi,70,35.0,TRUE,3.0,0.75,j-,8.1,0.034,FALSE,1.875,1.875,2.0 \ No newline at end of file diff --git a/docs/_static/v0_edited_summary.csv b/docs/_static/v0_edited_summary.csv index 82bbd39ae..bad6107ea 100644 --- a/docs/_static/v0_edited_summary.csv +++ b/docs/_static/v0_edited_summary.csv @@ -1,12 +1,12 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 -,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,FALSE,4,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 -,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,0,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,10.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.07,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,4,36,43,36,0.004,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,4,36,43,36,0.006,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 -,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,4,36,43,36,,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 -,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,3,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 -,,,acquisition-VARIANTUnused_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 -,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,2,36,43,36,0.089,0.00069,70,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,FALSE,4,anat,0,1.0,FALSE,,i-,2.5,,FALSE,1.0,1.0,1.0 +,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,0,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,TRUE,4,dwi,36,10.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,acquisition-HASC55APVARIANTEchoTime_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__3,acquisition-HASC55AP_datatype-dwi_suffix-dwi,3,1,36,43,36,0.07,0.00069,90,TRUE,4,dwi,36,61.0,FALSE,,j,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1__1,acquisition-v4_datatype-fmap_fmap-magnitude1_suffix-magnitude1,1,4,36,43,36,0.004,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2__1,acquisition-v4_datatype-fmap_fmap-magnitude2_suffix-magnitude2,1,4,36,43,36,0.006,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,FALSE,5.0,5.0,5.0 +,,,,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff__1,acquisition-v4_datatype-fmap_fmap-phasediff_suffix-phasediff,1,4,36,43,36,,,60,FALSE,4,fmap,0,1.0,FALSE,0.75,j-,1.5,,TRUE,5.0,5.0,5.0 +,,,,datatype-fmap_direction-PA_fmap-epi_suffix-epi__1,datatype-fmap_direction-PA_fmap-epi_suffix-epi,1,3,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,TRUE,5.0,5.0,5.0 +,,,acquisition-VARIANTUnused_datatype-fmap_direction-PA_fmap-epi_suffix-epi,datatype-fmap_direction-PA_fmap-epi_suffix-epi__2,datatype-fmap_direction-PA_fmap-epi_suffix-epi,2,1,36,43,36,0.089,0.00069,90,FALSE,4,fmap,36,1.0,FALSE,,j-,4.2,0.072,FALSE,5.0,5.0,5.0 +,,,,datatype-func_suffix-bold_task-rest__1,datatype-func_suffix-bold_task-rest,1,2,36,43,36,0.089,0.00069,70,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 ,,,acquisition-VARIANTFlipAngle_datatype-func_suffix-bold_task-rest,datatype-func_suffix-bold_task-rest__2,datatype-func_suffix-bold_task-rest,2,1,36,43,36,0.089,0.00069,90,TRUE,3,func,36,61.0,FALSE,,j,1.0,0.072,FALSE,5.0,5.0,5.0 \ No newline at end of file diff --git a/docs/_static/v0_summary.csv b/docs/_static/v0_summary.csv index 92fb53676..d19cc94ed 100644 --- a/docs/_static/v0_summary.csv +++ b/docs/_static/v0_summary.csv @@ -1,4 +1,4 @@ -Notes,ManualCheck,MergeInto,RenameKeyGroup,KeyParamGroup,KeyGroup,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,KeyGroupCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 +Notes,ManualCheck,MergeInto,RenameEntitySet,KeyParamGroup,EntitySet,ParamGroup,Counts,Dim1Size,Dim2Size,Dim3Size,EchoTime,EffectiveEchoSpacing,FlipAngle,HasFieldmap,EntitySetCount,Modality,NSliceTimes,NumVolumes,Obliquity,PartialFourier,PhaseEncodingDirection,RepetitionTime,TotalReadoutTime,UsedAsFieldmap,VoxelSizeDim1,VoxelSizeDim2,VoxelSizeDim3 ,,,,datatype-anat_suffix-T1w__1,datatype-anat_suffix-T1w,1,4,180,216,180,0.0029,,8,False,4,anat,0,1.0,False,,i-,2.5,,False,1.0,1.0,1.0 ,,,,acquisition-HASC55AP_datatype-dwi_suffix-dwi__1,acquisition-HASC55AP_datatype-dwi_suffix-dwi,1,2,36,43,36,0.089,0.00069,90,True,4,dwi,36,61.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 ,,,acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi,acquisition-HASC55AP_datatype-dwi_suffix-dwi__2,acquisition-HASC55AP_datatype-dwi_suffix-dwi,2,1,36,43,36,0.089,0.00069,90,True,4,dwi,36,10.0,False,,j,4.2,0.072,False,5.0,5.0,5.0 diff --git a/docs/about.rst b/docs/about.rst index 80e55f950..4dd44b46f 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -23,7 +23,7 @@ and **budget their computational time and resources** effectively. ``CuBIDS`` is designed to facilitate the curation of large, neuroimaging datasets so that users can infer useful information from descriptive and accurate BIDS labels before running pipelines *en masse*. -``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`keygroup`, +``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`entityset`, :ref:`paramgroup`, and :ref:`acquisitiongroup` categorizations in your data (we'll explain what these are in more detail in the next section). @@ -56,15 +56,15 @@ Examples Dominant Group resting state BOLD: * Example Filename: ``sub-01_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Entity Set: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` * Param Group: ``1`` (Dominant Group) Variant Group resting state BOLD (all scans in this Param Group are missing a fieldmap) * Example Filename: ``sub-02_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Entity Set: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` * Param Group: ``2`` (Variant Group) - * Rename Key Group: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` + * Rename Entity Set: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` These definitions are described in more detail in :doc:`glossary` and :doc:`usage`. diff --git a/docs/example.rst b/docs/example.rst index c7826f46c..0f50ca803 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -152,7 +152,7 @@ Note that it is best practice to provide a detailed commit message with each cha Adding NIfTI Information to JSON Sidecars ----------------------------------------- -Next, we seek to add more image parameters to our sidecars so that we can better define our Key Groups. +Next, we seek to add more image parameters to our sidecars so that we can better define our Entity Sets. Historically, only a subset of parameters in the NIfTI image header have been included in a BIDS sidecar... Parameters such as image dimensions, number of volumes, image obliquity, and voxel sizes — all important data that can change how our pipelines will eventually run! @@ -328,7 +328,7 @@ contains only one scan (see "Counts" column) with only 10 volumes (see "NumVolumes" column). Since the majority of DWI scans in this dataset have 61 volumes, ``CuBIDS`` assigns this single scan to a "Variant" (i.e. non-dominant) Parameter Group, -and automatically populates that Parameter Group's "RenameKeyGroup" column in ``v0_summary.tsv`` +and automatically populates that Parameter Group's "RenameEntitySet" column in ``v0_summary.tsv`` with a suggested name: ``acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi``. This time, though, we elect to remove this scan because it does not have enough volumes to be usable for most analyses. @@ -351,7 +351,7 @@ Applying changes Now that all metadata issues have been addressed — both validation and ``CuBIDS`` summary — -we are ready to rename our files based on their RenameKeyGroup values and +we are ready to rename our files based on their RenameEntitySet values and apply the requested deletion in ``v0_edited_summary.tsv``. The ``cubids apply`` function renames scans in each Variant Parameter Group according to the metadata parameters with a flag “VARIANT”, diff --git a/docs/glossary.rst b/docs/glossary.rst index 8a2b3a4f3..7c024461b 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -5,27 +5,27 @@ Glossary .. glossary:: - Key Group + Entity Set A set of scans whose filenames share all `BIDS filename key-value pairs`_, excluding subject and session. - The key group is derived from the common BIDS filename elements. + The entity set is derived from the common BIDS filename elements. For example, ``acquisition-*_datatype-*_run-*_task-*_suffix``. Parameter Group A set of scans with identical metadata parameters in their sidecars. - Defined within a Key Group. - Numerically identified, meaning that each Key Group will have *n* Param Groups, - where *n* is the number of unique sets of scanning parameters present in that Key Group + Defined within a Entity Set. + Numerically identified, meaning that each Entity Set will have *n* Param Groups, + where *n* is the number of unique sets of scanning parameters present in that Entity Set (e.g., 1, 2, etc.). Dominant Group - The Param Group that contains the most scans in its Key Group. + The Param Group that contains the most scans in its Entity Set. Variant Group Any Param Group that is non-dominant. - Rename Key Group - Auto-generated, recommended new Key Group name for Variant Groups. + Rename Entity Set + Auto-generated, recommended new Entity Set name for Variant Groups. Based on the metadata parameters that cause scans in Variant Groups to vary from those in their respective Dominant Groups. diff --git a/docs/notebooks/HTML_param_groups.ipynb b/docs/notebooks/HTML_param_groups.ipynb index e9854a736..7889c13c3 100644 --- a/docs/notebooks/HTML_param_groups.ipynb +++ b/docs/notebooks/HTML_param_groups.ipynb @@ -244,17 +244,17 @@ "\n", "# ISSUE! Grouping by char!\n", "\n", - "#param_group = cubids_obj.get_param_groups(key_group)\n", - "# print(key_group)\n", + "#param_group = cubids_obj.get_param_groups(entity_set)\n", + "# print(entity_set)\n", "# #print(param_group)\n", "\n", "NON_KEY_ENTITIES = set([\"subject\", \"session\", \"extension\"])\n", "\n", - "def key_group_to_entities(key_group):\n", - " return dict([group.split(\"-\") for group in key_group.split(\"_\")])\n", + "def entity_set_to_entities(entity_set):\n", + " return dict([group.split(\"-\") for group in entity_set.split(\"_\")])\n", "\n", "\n", - "def entities_to_key_group(entities):\n", + "def entities_to_entity_set(entities):\n", " group_keys = sorted(entities.keys())\n", " return \"_\".join(\n", " [\"{}-{}\".format(key, entities[key]) for key in group_keys])\n", @@ -263,13 +263,13 @@ " entities = parse_file_entities(str(filename))\n", " return entities\n", "\n", - "def file_to_key_group(filename):\n", + "def file_to_entity_set(filename):\n", " entities = parse_file_entities(str(filename))\n", - " keys = entities_to_key_group(entities)\n", + " keys = entities_to_entity_set(entities)\n", " return keys\n", "\n", - "def key_group_to_entities(key_group):\n", - " return dict([group.split(\"-\") for group in key_group.split(\"_\")])\n", + "def entity_set_to_entities(entity_set):\n", + " return dict([group.split(\"-\") for group in entity_set.split(\"_\")])\n", "\n", "def get_file_params(files):\n", " \"\"\"Finds a list of *parameter groups* from a list of files.\n", @@ -314,8 +314,8 @@ " return files_params\n", "\n", "\n", - "#def get_param_groups(key_group, path):\n", - "# key_entities = key_group_to_entities(key_group)\n", + "#def get_param_groups(entity_set, path):\n", + "# key_entities = entity_set_to_entities(entity_set)\n", "# path = Path(path)\n", "# matching_files = path.layout.get(return_type=\"file\", scope=\"self\",\n", "# **key_entities)\n", @@ -327,14 +327,14 @@ "path = \"/Users/Covitz/CuBIDS/cubids/testdata/complete/\"\n", "ret_entities = file_to_entities(filename)\n", "print(ret_entities)\n", - "key_group = entities_to_key_group(ret_entities)\n", - "print(key_group)\n", + "entity_set = entities_to_entity_set(ret_entities)\n", + "print(entity_set)\n", "\n", - "entities = key_group_to_entities(key_group)\n", + "entities = entity_set_to_entities(entity_set)\n", "print(\"Entities: \", entities)\n", "\n", - "key_groups = file_to_key_group(filename)\n", - "print(\"F_2_K: \", key_groups)\n", + "entity_sets = file_to_entity_set(filename)\n", + "print(\"F_2_K: \", entity_sets)\n", "\n", "\n", "\n", @@ -374,8 +374,8 @@ "\n", "\n", "cubids_obj = CuBIDS(\"/Users/Covitz/CuBIDS/cubids/testdata/complete/\")\n", - "key_group = cubids_obj.get_key_groups()\n", - "key_group" + "entity_set = cubids_obj.get_entity_sets()\n", + "entity_set" ] }, { @@ -428,20 +428,20 @@ "\n", "\n", "\n", - "def _entities_to_key_group(entities):\n", + "def _entities_to_entity_set(entities):\n", " group_keys = sorted(entities.keys() - NON_KEY_ENTITIES)\n", " return \"_\".join(\n", " [\"{}-{}\".format(key, entities[key]) for key in group_keys])\n", "\n", "\n", - "def _file_to_key_group(filename):\n", + "def _file_to_entity_set(filename):\n", " entities = parse_file_entities(str(filename))\n", - " return _entities_to_key_group(entities)\n", + " return _entities_to_entity_set(entities)\n", "\n", "\n", "def html_groups(bids_dir):\n", - " # get key groups using cubids.get_key_groups\n", - " # use key_group_to_entities to get entities\n", + " # get entity sets using cubids.get_entity_sets\n", + " # use entity_set_to_entities to get entities\n", " # get param groups for each entity\n", "\n", "\n", diff --git a/docs/notebooks/Key_and_Param_Groups.ipynb b/docs/notebooks/Key_and_Param_Groups.ipynb index 3970ce6df..9985a4856 100644 --- a/docs/notebooks/Key_and_Param_Groups.ipynb +++ b/docs/notebooks/Key_and_Param_Groups.ipynb @@ -58,13 +58,13 @@ ], "source": [ "###############################\n", - "# TESTING change_key_groups!!!!\n", + "# TESTING change_entity_sets!!!!\n", "###############################\n", "\n", "\n", "bod = CuBIDS(data_root2)\n", "\n", - "out = bod.change_key_groups('/Users/scovitz/CuBIDS/notebooks/NewTests', '/Users/scovitz/CuBIDS/notebooks/newtsvs')\n", + "out = bod.change_entity_sets('/Users/scovitz/CuBIDS/notebooks/NewTests', '/Users/scovitz/CuBIDS/notebooks/newtsvs')\n", "\n", "\n", "print(out)\n" @@ -148,8 +148,8 @@ "outputs": [], "source": [ "bod = CuBIDS(data_root2)\n", - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -171,22 +171,22 @@ "outputs": [], "source": [ "\n", - "for key_group in key_groups:\n", - " print(key_group)\n", - " output = bod.get_file_params(key_group)\n", + "for entity_set in entity_sets:\n", + " print(entity_set)\n", + " output = bod.get_file_params(entity_set)\n", " print(len(output))\n", " #print(output)\n", "\n", "\n", " #print(output)\n", - " #output2 = bod.get_param_groups(key_group)\n", + " #output2 = bod.get_param_groups(entity_set)\n", " #print(output2[1])\n", " #print(output[1])\n", " #print(output[0])\n", " #print(len(output[0]))\n", - " #output.to_csv('Tests/' + key_group + '.tsv')\n", + " #output.to_csv('Tests/' + entity_set + '.tsv')\n", " # export the line above to a tsv\n", - " # data = data.append(pd.DataFrame(DwellTime: key_group.\n", + " # data = data.append(pd.DataFrame(DwellTime: entity_set.\n", "#print(output['/Users/Covitz/Downloads/RBC_growupCCNP_BIDS/sub-colornest195/ses-1/func/sub-colornest195_ses-1_task-rest_run-02_bold.nii.gz'])\n", "#print(output)\n" ] @@ -197,8 +197,8 @@ "metadata": {}, "outputs": [], "source": [ - "key_group = 'datatype-func_run-2_suffix-bold_task-rest'\n", - "file_params_dict = bod.get_file_params(key_group)\n", + "entity_set = 'datatype-func_run-2_suffix-bold_task-rest'\n", + "file_params_dict = bod.get_file_params(entity_set)\n", "print(len(file_params_dict))\n", "print(file_params_dict)\n", "#print(output[1])\n", @@ -226,7 +226,7 @@ "# print(filenames_size)\n", "# print(filenames[0])\n", "\n", - "# param_groups = bod.get_param_groups(key_group)\n", + "# param_groups = bod.get_param_groups(entity_set)\n", "# print(len(param_groups))\n" ] }, @@ -237,11 +237,11 @@ "outputs": [], "source": [ "# TESTING THE change_filename method!\n", - "key_group = 'datatype-func_run-2_suffix-bold_task-rest'\n", + "entity_set = 'datatype-func_run-2_suffix-bold_task-rest'\n", "split_params = {'FlipAngle': 80, 'EffectiveEchoSpacing': 0.000510002, 'PartialFourier': 1, 'TotalReadoutTime': 0.0362102, 'RepetitionTime': 2.5, 'EchoTime': 0.03, 'PhaseEncodingDirection': 'j-', 'DwellTime': 3.1e-06, 'SliceTime000': 1.2, 'SliceTime001': 0, 'SliceTime002': 1.3, 'SliceTime003': 0.1, 'SliceTime004': 1.4, 'SliceTime005': 0.1, 'SliceTime006': 1.4, 'SliceTime007': 0.2, 'SliceTime008': 1.5, 'SliceTime009': 0.3, 'SliceTime010': 1.6, 'SliceTime011': 0.3, 'SliceTime012': 1.6, 'SliceTime013': 0.4, 'SliceTime014': 1.7, 'SliceTime015': 0.5, 'SliceTime016': 1.8, 'SliceTime017': 0.5, 'SliceTime018': 1.8, 'SliceTime019': 0.6, 'SliceTime020': 1.9, 'SliceTime021': 0.7, 'SliceTime022': 2.0, 'SliceTime023': 0.7, 'SliceTime024': 2.0, 'SliceTime025': 0.8, 'SliceTime026': 2.1, 'SliceTime027': 0.9, 'SliceTime028': 2.2, 'SliceTime029': 0.9, 'SliceTime030': 2.2, 'SliceTime031': 1.0, 'SliceTime032': 2.3, 'SliceTime033': 1.0, 'SliceTime034': 2.4, 'SliceTime035': 1.1, 'SliceTime036': 2.4, 'SliceTime037': 1.2}\n", "pattern = \"task\"\n", "replacement = \"TASK\"\n", - "ret = bod.change_filenames(key_group, split_params, pattern, replacement)\n", + "ret = bod.change_filenames(entity_set, split_params, pattern, replacement)\n", "\n", "changed_paths = ret\n", "\n", diff --git a/docs/notebooks/keyparamgrouptest.ipynb b/docs/notebooks/keyparamgrouptest.ipynb index 150eb3df6..09f0f02b2 100644 --- a/docs/notebooks/keyparamgrouptest.ipynb +++ b/docs/notebooks/keyparamgrouptest.ipynb @@ -51,8 +51,8 @@ } ], "source": [ - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -90,7 +90,7 @@ " PartialFourier\n", " PhaseEncodingDirection\n", " TotalReadoutTime\n", - " key_group\n", + " entity_set\n", " FieldmapKey00\n", " FieldmapKey01\n", " NSliceTimes\n", @@ -168,7 +168,7 @@ "1 2 1 j- \n", "2 2 1 j- \n", "\n", - " TotalReadoutTime key_group \\\n", + " TotalReadoutTime entity_set \\\n", "0 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", "1 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", "2 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi \n", @@ -195,7 +195,7 @@ } ], "source": [ - "bod.get_param_groups_from_key_group('acquisition-64dir_datatype-dwi_suffix-dwi')" + "bod.get_param_groups_from_entity_set('acquisition-64dir_datatype-dwi_suffix-dwi')" ] }, { @@ -244,7 +244,7 @@ " PartialFourier\n", " PhaseEncodingDirection\n", " TotalReadoutTime\n", - " key_group\n", + " entity_set\n", " ...\n", " FilePath\n", " ParamGroup\n", @@ -1023,7 +1023,7 @@ "27 NaN 1 j- \n", "28 NaN 1 j- \n", "\n", - " TotalReadoutTime key_group ... \\\n", + " TotalReadoutTime entity_set ... \\\n", "0 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", "1 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", "2 0.042744 acquisition-64dir_datatype-dwi_suffix-dwi ... \n", @@ -1342,7 +1342,7 @@ " \n", " \n", " \n", - " key_group\n", + " entity_set\n", " ParamGroup\n", " Count\n", " \n", @@ -1449,7 +1449,7 @@ "" ], "text/plain": [ - " key_group ParamGroup Count\n", + " entity_set ParamGroup Count\n", "0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3\n", "1 acquisition-HCP_datatype-anat_suffix-T1w 1 3\n", "2 acquisition-HCP_datatype-anat_suffix-T2w 1 1\n", @@ -1474,7 +1474,7 @@ } ], "source": [ - "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]" + "summary_df[[\"entity_set\", \"ParamGroup\", \"Count\"]]" ] }, { @@ -1487,7 +1487,7 @@ "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n", "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n", "print(uniques.shape)\n", - "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n", + "counts = df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')\n", "print(counts.shape)\n", "\n", "params_and_counts = pd.merge(uniques, counts)\n", @@ -1507,7 +1507,7 @@ "metadata": {}, "outputs": [], "source": [ - "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()" + "no_paths[[\"entity_set\", \"ParamGroup\"]].groupby([\"entity_set\", \"ParamGroup\"]).count()" ] }, { @@ -1530,7 +1530,7 @@ "metadata": {}, "outputs": [], "source": [ - "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')" + "keyparam_df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')" ] }, { @@ -1548,7 +1548,7 @@ "metadata": {}, "outputs": [], "source": [ - "bod.get_key_groups()" + "bod.get_entity_sets()" ] }, { @@ -1650,13 +1650,13 @@ "\n", "dfs = []\n", "fieldmap_lookup = bod.fieldmap_lookup\n", - "key_group_name = \"test\"\n", + "entity_set_name = \"test\"\n", "# path needs to be relative to the root with no leading prefix\n", "for path in files:\n", " metadata = bod.layout.get_metadata(path)\n", " wanted_keys = metadata.keys() & IMAGING_PARAMS\n", " example_data = {key: metadata[key] for key in wanted_keys}\n", - " example_data[\"key_group\"] = key_group_name\n", + " example_data[\"entity_set\"] = entity_set_name\n", "\n", " # Get the fieldmaps out and add their types\n", " print(fieldmap_lookup[path])\n", diff --git a/docs/notebooks/workwithtestdata.ipynb b/docs/notebooks/workwithtestdata.ipynb index 1270bcc97..7f4bd3a82 100644 --- a/docs/notebooks/workwithtestdata.ipynb +++ b/docs/notebooks/workwithtestdata.ipynb @@ -108,8 +108,8 @@ } ], "source": [ - "key_groups = bod.get_key_groups()\n", - "print(key_groups)" + "entity_sets = bod.get_entity_sets()\n", + "print(entity_sets)" ] }, { @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "ikey_groups = ibod.get_key_groups()" + "ientity_sets = ibod.get_entity_sets()" ] }, { @@ -167,7 +167,7 @@ } ], "source": [ - "ikey_groups == key_groups" + "ientity_sets == entity_sets" ] }, { @@ -270,7 +270,7 @@ " \n", " \n", " \n", - " key_group\n", + " entity_set\n", " ParamGroup\n", " Count\n", " \n", @@ -377,7 +377,7 @@ "" ], "text/plain": [ - " key_group ParamGroup Count\n", + " entity_set ParamGroup Count\n", "0 acquisition-64dir_datatype-dwi_suffix-dwi 1 3\n", "1 acquisition-HCP_datatype-anat_suffix-T1w 1 3\n", "2 acquisition-HCP_datatype-anat_suffix-T2w 1 1\n", @@ -402,7 +402,7 @@ } ], "source": [ - "summary_df[[\"key_group\", \"ParamGroup\", \"Count\"]]" + "summary_df[[\"entity_set\", \"ParamGroup\", \"Count\"]]" ] }, { @@ -415,7 +415,7 @@ "param_group_cols = list(set(df.columns.to_list()) - set([\"FilePath\"]))\n", "uniques = df.drop_duplicates(param_group_cols, ignore_index=True)\n", "print(uniques.shape)\n", - "counts = df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')\n", + "counts = df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')\n", "print(counts.shape)\n", "\n", "params_and_counts = pd.merge(uniques, counts)\n", @@ -435,7 +435,7 @@ "metadata": {}, "outputs": [], "source": [ - "no_paths[[\"key_group\", \"ParamGroup\"]].groupby([\"key_group\", \"ParamGroup\"]).count()" + "no_paths[[\"entity_set\", \"ParamGroup\"]].groupby([\"entity_set\", \"ParamGroup\"]).count()" ] }, { @@ -458,7 +458,7 @@ "metadata": {}, "outputs": [], "source": [ - "keyparam_df.groupby([\"key_group\", \"ParamGroup\"]).size().reset_index(name='Count')" + "keyparam_df.groupby([\"entity_set\", \"ParamGroup\"]).size().reset_index(name='Count')" ] }, { @@ -476,7 +476,7 @@ "metadata": {}, "outputs": [], "source": [ - "bod.get_key_groups()" + "bod.get_entity_sets()" ] }, { @@ -578,13 +578,13 @@ "\n", "dfs = []\n", "fieldmap_lookup = bod.fieldmap_lookup\n", - "key_group_name = \"test\"\n", + "entity_set_name = \"test\"\n", "# path needs to be relative to the root with no leading prefix\n", "for path in files:\n", " metadata = bod.layout.get_metadata(path)\n", " wanted_keys = metadata.keys() & IMAGING_PARAMS\n", " example_data = {key: metadata[key] for key in wanted_keys}\n", - " example_data[\"key_group\"] = key_group_name\n", + " example_data[\"entity_set\"] = entity_set_name\n", "\n", " # Get the fieldmaps out and add their types\n", " print(fieldmap_lookup[path])\n", diff --git a/docs/usage.rst b/docs/usage.rst index f86b4e386..70fefc71d 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -10,12 +10,12 @@ More definitions ---------------- -.. _keygroup: +.. _entityset: -Key Group +Entity Set ~~~~~~~~~ -A :term:`Key Group` is a unique set of BIDS key-value pairs, +A :term:`Entity Set` is a unique set of BIDS key-value pairs, excluding identifiers such as subject and session. For example, the files:: @@ -23,18 +23,18 @@ For example, the files:: bids-root/sub-1/ses-2/func/sub-1_ses-2_acq-mb_dir_PA_task-rest_bold.nii.gz bids-root/sub-2/ses-1/func/sub-2_ses-1_acq-mb_dir-PA_task-rest_bold.nii.gz -Would all share the same Key Group. +Would all share the same Entity Set. If these scans were all acquired as a part of the same study on the same scanner with exactly the same acquisition parameters, this naming convention would suffice. However, in large multi-scanner, multi-site, or longitudinal studies where acquisition parameters change over time, -it's possible that the same Key Group could contain scans that differ in important ways. +it's possible that the same Entity Set could contain scans that differ in important ways. -``CuBIDS`` examines all acquisitions within a Key Group to see if there are any images +``CuBIDS`` examines all acquisitions within a Entity Set to see if there are any images that differ in a set of important acquisition parameters. -The subsets of consistent acquisition parameter sets within a Key Group are called a :ref:`paramgroup`. +The subsets of consistent acquisition parameter sets within a Entity Set are called a :ref:`paramgroup`. .. _paramgroup: @@ -42,17 +42,17 @@ The subsets of consistent acquisition parameter sets within a Key Group are call Parameter Group ~~~~~~~~~~~~~~~ -A :term:`Parameter Group` is a subset of a Key Group that contains images with the same +A :term:`Parameter Group` is a subset of a Entity Set that contains images with the same acquisition parameters. -Even though two images may belong to the same Key Group and are valid BIDS, +Even though two images may belong to the same Entity Set and are valid BIDS, they may have images with different acquisition parameters. There is nothing fundamentally wrong with this — the ``bids-validator`` will often simply flag these differences with a ``Warning``, but not necessarily suggest changes. That being said, there can be detrimental consequences downstream if the different parameters cause the -same preprocessing pipelines to configure differently to images of the same Key Group. +same preprocessing pipelines to configure differently to images of the same Entity Set. .. _acquisitiongroup: @@ -62,7 +62,7 @@ Acquisition Group We define an :term:`Acquisition Group` as a collection of sessions across participants that contain the exact same set of Key and Parameter Groups. -Since Key Groups are based on the BIDS filenames— +Since Entity Sets are based on the BIDS filenames— and therefore both MRI image type and acquisition specific— each BIDS session directory contains images that belong to a set of Parameter Groups. CuBIDS assigns each session, or set of Parameter Groups, @@ -75,7 +75,7 @@ if a BIDS App runs successfully on a single subject from each Acquisition Group, one can be confident that it will handle all combinations of scanning parameters in the entire dataset. The Acquisition Groups that subjects belong to are listed in ``_AcqGrouping.csv``, -while the Key Groups and Parameter Groups that define each Acquisition Group are noted in +while the Entity Sets and Parameter Groups that define each Acquisition Group are noted in ``_AcqGroupingInfo.txt``. @@ -84,10 +84,10 @@ while the Key Groups and Parameter Groups that define each Acquisition Group are The ``_summary.tsv`` File ~~~~~~~~~~~~~~~~~~~~~~~~~ -This file contains all the detected Key Groups and Parameter Groups. +This file contains all the detected Entity Sets and Parameter Groups. It provides an opportunity to evaluate your data and decide how to handle heterogeneity. -Below is an example ``_summary.tsv`` of the run-1 DWI Key Group in the PNC [#f1]_. +Below is an example ``_summary.tsv`` of the run-1 DWI Entity Set in the PNC [#f1]_. This reflects the original data that has been converted to BIDS using a heuristic. It is similar to what you will see when you first use this functionality: @@ -112,9 +112,9 @@ but it keeps track of every file's assignment to Key and Parameter Groups. Modifying Key and Parameter Group Assignments --------------------------------------------- -Sometimes we see that there are important differences in acquisition parameters within a Key Group. +Sometimes we see that there are important differences in acquisition parameters within a Entity Set. If these differences impact how a pipeline will process the data, -it makes sense to assign the scans in that Parameter Group to a different Key Group +it makes sense to assign the scans in that Parameter Group to a different Entity Set (i.e., assign them a different BIDS name). This can be accomplished by editing the empty columns in the `_summary.csv` file produced by ``cubids group``. @@ -127,7 +127,7 @@ Once the columns have been edited you can apply the changes to BIDS data using The changes in ``keyparam_edited_summary.csv`` will be applied to the BIDS data in ``/bids/dir`` and the new Key and Parameter groups will be saved to csv files starting with ``new_keyparam_prefix``. -Note: fieldmaps keygroups with variant parameters will be identified but not renamed. +Note: fieldmaps entitysets with variant parameters will be identified but not renamed. The ``_AcqGrouping.tsv`` file @@ -142,14 +142,14 @@ Acquisition Group number. The ``_AcqGroupInfo.txt`` file ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The ``_AcqGroupInfo.txt`` file lists all Key Groups that belong to a given Acquisition Group +The ``_AcqGroupInfo.txt`` file lists all Entity Sets that belong to a given Acquisition Group along with the number of sessions each group possesses. Visualizing and summarizing metadata heterogeneity -------------------------------------------------- -Use ``cubids group`` to generate your dataset's Key Groups and Parameter Groups: +Use ``cubids group`` to generate your dataset's Entity Sets and Parameter Groups: .. code-block:: console @@ -174,10 +174,10 @@ Detecting Variant Groups ~~~~~~~~~~~~~~~~~~~~~~~~ Additionally, ``cubids apply`` can automatically rename files in :term:`Variant Groups ` -based on their scanning parameters that vary from those in their Key Groups' +based on their scanning parameters that vary from those in their Entity Sets' :term:`Dominant Parameter Groups `. Renaming is automatically suggested when the summary.tsv is generated from a ``cubids group`` run, -with the suggested new name listed in the tsv's :term:`Rename Key Group` column. +with the suggested new name listed in the tsv's :term:`Rename Entity Set` column. CuBIDS populates this column for all Variant Groups (e.g., every Parameter Group except the Dominant one). Specifically, CuBIDS will suggest renaming all non-dominant Parameter Group to include ``VARIANT*`` @@ -188,7 +188,7 @@ the one present in the Dominant Group, it will automatically suggest renaming all scans in that Variant Group to include ``acquisition-VARIANTRepetitionTime`` in their filenames. When the user runs ``cubids apply``, -filenames will get renamed according to the auto-generated names in the “Rename Key Group” column +filenames will get renamed according to the auto-generated names in the “Rename Entity Set” column in the summary.tsv @@ -233,7 +233,7 @@ Customizable configuration This file can be passed as an argument to ``cubids group`` and ``cubids apply`` using the ``--config`` flag and allows users to customize grouping settings based on MRI image type and parameter. -Each ``Key Group`` is associated with one (and only one) MRI image type, +Each ``Entity Set`` is associated with one (and only one) MRI image type, as BIDS filenames include MRI image type-specific values as their suffixes. This easy-to-modify configuration file provides several benefits to curation. From 68b496270b9ef4dbb78c6d3b277d9e644b76b064 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 4 Oct 2024 16:40:55 -0400 Subject: [PATCH 02/21] Fix linting issues --- cubids/cubids.py | 4 +++- cubids/metadata_merge.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index f056f8fd2..44c57fdce 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -347,7 +347,9 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T merge_commands = [] for source_id, dest_id in ok_merges: dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] - source_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == source_id).all(1)] + source_files = files_df.loc[ + (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) + ] # Get a source json file img_full_path = self.path + source_files.iloc[0].FilePath diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 2087d009c..6562f35b7 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -302,7 +302,9 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) - acq_groups[acq_id].append((row.EntitySet, row.ParamGroup, file_entities.get("session"))) + acq_groups[acq_id].append( + (row.EntitySet, row.ParamGroup, file_entities.get("session")) + ) # Map the contents to a list of subjects/sessions contents_to_subjects = defaultdict(list) From a3ce6fcc8622ab608ee5418b5358dd12a6f2e79e Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 9 Oct 2024 08:01:30 -0400 Subject: [PATCH 03/21] Update to ubuntu 2404:2024.08.1 and bids-validator@1.14.14-dev.0 For bids-validator@1.14.14-dev.0, data_path argument is before all other arguments --- .circleci/config.yml | 6 +++--- cubids/validator.py | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3dc05bf82..3ed256861 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -9,7 +9,7 @@ jobs: type: string default: "3.8" machine: - image: ubuntu-2004:202201-02 + image: ubuntu-2404:2024.08.1 working_directory: /home/circleci/src/CuBIDS steps: - checkout: @@ -34,7 +34,7 @@ jobs: # Add nodejs and the validator conda install nodejs npm install -g yarn && \ - npm install -g bids-validator + npm install -g bids-validator@1.14.14-dev.0 # Install CuBIDS pip install -e .[tests] @@ -83,7 +83,7 @@ jobs: deploy_pypi: machine: - image: ubuntu-2004:202201-02 + image: ubuntu-2404:2024.08.1 working_directory: /home/circleci/src/CuBIDS steps: - checkout: diff --git a/cubids/validator.py b/cubids/validator.py index 01dad11c8..414f19ada 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -16,7 +16,8 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # build docker call # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", "--verbose", "--json", "--ignoreSubjectConsistency"] + command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] + if ignore_headers: command.append("--ignoreNiftiHeaders") From 701e667cda0a328035cb5fc2d90aa3523d5ced9a Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 9 Oct 2024 08:46:48 -0400 Subject: [PATCH 04/21] debug circleci after updating bids-validator to 1.14.14-dev.0 --- cubids/tests/test_bond.py | 4 ++++ cubids/validator.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2d..8fe2fb1a5 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -988,6 +988,10 @@ def test_validator(tmp_path): call = build_validator_call(str(data_root) + "/complete") ret = run_validator(call) + # DEBUG: + print(ret.stdin.decode('UTF-8')) + print(ret.stdout.decode('UTF-8')) + assert ret.returncode == 0 parsed = parse_validator_output(ret.stdout.decode("UTF-8")) diff --git a/cubids/validator.py b/cubids/validator.py index 414f19ada..f3685249b 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -18,7 +18,6 @@ def build_validator_call(path, ignore_headers=False): # CuBIDS automatically ignores subject consistency. command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] - if ignore_headers: command.append("--ignoreNiftiHeaders") From c1b69953c6931fec159fe615afa9965b2d79ca23 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 9 Oct 2024 09:13:14 -0400 Subject: [PATCH 05/21] fix build_validator_call in cubids/validator.py --- cubids/tests/test_bond.py | 4 ---- cubids/validator.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 8fe2fb1a5..28211cc2d 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -988,10 +988,6 @@ def test_validator(tmp_path): call = build_validator_call(str(data_root) + "/complete") ret = run_validator(call) - # DEBUG: - print(ret.stdin.decode('UTF-8')) - print(ret.stdout.decode('UTF-8')) - assert ret.returncode == 0 parsed = parse_validator_output(ret.stdout.decode("UTF-8")) diff --git a/cubids/validator.py b/cubids/validator.py index f3685249b..d7e52fe4e 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -21,8 +21,6 @@ def build_validator_call(path, ignore_headers=False): if ignore_headers: command.append("--ignoreNiftiHeaders") - command.append(path) - return command From bee52cdfe3c9266dea04e85e1fb61040ace38b2a Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Wed, 9 Oct 2024 11:47:49 -0400 Subject: [PATCH 06/21] Revert changes in HISTORY.rst so that changelog is not modified --- HISTORY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index dd7a50641..db648f65b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -45,7 +45,7 @@ New Contributors * updated _update_json to no longer use pybids by @scovitz in https://github.com/PennLINC/CuBIDS/pull/232 * Minor tune ups: codespell'ing (fixes + tox + CI (github actions)), remove of unintended to be committed 2 files by @yarikoptic in https://github.com/PennLINC/CuBIDS/pull/239 * ENH: Make "NumVolumes" an integer for 3D images by @cookpa in https://github.com/PennLINC/CuBIDS/pull/211 -* adding note about fmap renameentitysets by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 +* adding note about fmap renamekeygroups by @megardn in https://github.com/PennLINC/CuBIDS/pull/140 * Update usage.rst by @megardn in https://github.com/PennLINC/CuBIDS/pull/138 * printing erroneous jsons and only rounding float parameters by @scovitz in https://github.com/PennLINC/CuBIDS/pull/257 From 87fcefc7de2b12a1b520aa8ac7a4e541531cc30e Mon Sep 17 00:00:00 2001 From: Tien Tong <35613222+tientong98@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:06:03 -0400 Subject: [PATCH 07/21] Update docs/usage.rst Co-authored-by: Matt Cieslak --- docs/usage.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.rst b/docs/usage.rst index 70fefc71d..1696b66b9 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -13,7 +13,7 @@ More definitions .. _entityset: Entity Set -~~~~~~~~~ +~~~~~~~~~~~ A :term:`Entity Set` is a unique set of BIDS key-value pairs, excluding identifiers such as subject and session. From cb73eb1b03969a6fb0df1bb80285b984cd62611a Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 31 Oct 2024 10:10:04 -0400 Subject: [PATCH 08/21] logger not printing metadata so use print instead --- cubids/workflows.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cubids/workflows.py b/cubids/workflows.py index e01ccc78f..1fc29e539 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -895,7 +895,8 @@ def print_metadata_fields(bids_dir, container): if container is None: bod = CuBIDS(data_root=str(bids_dir), use_datalad=False) fields = bod.get_all_metadata_fields() - logger.info("\n".join(fields)) + print("\n".join(fields)) # logger not printing + # logger.info("\n".join(fields)) sys.exit(0) # Run it through a container From b322dd0f9d7c3334eaa4a725ac4189694bb36425 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 31 Oct 2024 10:19:46 -0400 Subject: [PATCH 09/21] fix linting issue --- cubids/workflows.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cubids/workflows.py b/cubids/workflows.py index 1fc29e539..6cbc1e425 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -895,7 +895,7 @@ def print_metadata_fields(bids_dir, container): if container is None: bod = CuBIDS(data_root=str(bids_dir), use_datalad=False) fields = bod.get_all_metadata_fields() - print("\n".join(fields)) # logger not printing + print("\n".join(fields)) # logger not printing # logger.info("\n".join(fields)) sys.exit(0) From aa5ddd199fedc2dde1fc4dc3f2d9e7d73c6c91e7 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 09:31:46 -0500 Subject: [PATCH 10/21] replace legacy with schema validator also change cubids print-metadata-fields to account for json file errors due to not been validated yet --- cubids/cubids.py | 16 ++++++-- cubids/validator.py | 91 +++++++++++++++------------------------------ 2 files changed, 43 insertions(+), 64 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 44c57fdce..27f632e3a 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1336,9 +1336,19 @@ def get_all_metadata_fields(self): found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): - with open(json_file, "r") as jsonr: - metadata = json.load(jsonr) - found_fields.update(metadata.keys()) + # add this in case `print-metadata-fields` is run before validate + try: + with open(json_file, "r", encoding="utf-8") as jsonr: + content = jsonr.read().strip() + if not content: + print(f"Empty file: {json_file}") + continue + metadata = json.loads(content) + found_fields.update(metadata.keys()) + except json.JSONDecodeError as e: + print(f"Error decoding JSON in {json_file}: {e}") + except Exception as e: + print(f"Unexpected error with file {json_file}: {e}") return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/validator.py b/cubids/validator.py index d7e52fe4e..7fba81385 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -14,9 +14,9 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" - # build docker call - # CuBIDS automatically ignores subject consistency. - command = ["bids-validator", path, "--verbose", "--json", "--ignoreSubjectConsistency"] + # New schema BIDS validator doesn't have option to ignore subject consistency. + # Build the deno command to run the BIDS validator. + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -87,32 +87,6 @@ def parse_validator_output(output): Dataframe of validator output. """ - def get_nested(dct, *keys): - """Get a nested value from a dictionary. - - Parameters - ---------- - dct : :obj:`dict` - Dictionary to get value from. - keys : :obj:`list` - List of keys to get value from. - - Returns - ------- - :obj:`dict` - The nested value. - """ - for key in keys: - try: - dct = dct[key] - except (KeyError, TypeError): - return None - return dct - - data = json.loads(output) - - issues = data["issues"] - def parse_issue(issue_dict): """Parse a single issue from the validator output. @@ -126,30 +100,27 @@ def parse_issue(issue_dict): return_dict : :obj:`dict` Dictionary of parsed issue. """ - return_dict = {} - return_dict["files"] = [ - get_nested(x, "file", "relativePath") for x in issue_dict.get("files", "") - ] - return_dict["type"] = issue_dict.get("key", "") - return_dict["severity"] = issue_dict.get("severity", "") - return_dict["description"] = issue_dict.get("reason", "") - return_dict["code"] = issue_dict.get("code", "") - return_dict["url"] = issue_dict.get("helpUrl", "") - - return return_dict - - df = pd.DataFrame() - - for warn in issues["warnings"]: - parsed = parse_issue(warn) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) - - for err in issues["errors"]: - parsed = parse_issue(err) - parsed = pd.DataFrame(parsed) - df = pd.concat([df, parsed], ignore_index=True) + return { + "location": issue_dict.get("location", ""), + "code": issue_dict.get("code", ""), + "subCode": issue_dict.get("subCode", ""), + "severity": issue_dict.get("severity", ""), + "rule": issue_dict.get("rule", ""), + } + + # Load JSON data + data = json.loads(output) + + # Extract issues + issues = data.get("issues", {}).get("issues", []) + if not issues: + return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"]) + + # Parse all issues + parsed_issues = [parse_issue(issue) for issue in issues] + # Convert to DataFrame + df = pd.DataFrame(parsed_issues) return df @@ -161,12 +132,10 @@ def get_val_dictionary(): val_dict : dict Dictionary of values. """ - val_dict = {} - val_dict["files"] = {"Description": "File with warning orerror"} - val_dict["type"] = {"Description": "BIDS validation warning or error"} - val_dict["severity"] = {"Description": "gravity of problem (warning/error"} - val_dict["description"] = {"Description": "Description of warning/error"} - val_dict["code"] = {"Description": "BIDS validator issue code number"} - val_dict["url"] = {"Description": "Link to the issue's neurostars thread"} - - return val_dict + return { + "location": {"Description": "File with the validation issue."}, + "code": {"Description": "Code of the validation issue."}, + "subCode": {"Description": "Subcode providing additional issue details."}, + "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, + "rule": {"Description": "Validation rule that triggered the issue."}, + } From e98ca1786cde49ff1a7b043babde75ddbe7231ed Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 11:49:15 -0500 Subject: [PATCH 11/21] circleci install deno --- .circleci/config.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 3ed256861..c73d8af84 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -31,10 +31,8 @@ jobs: source activate cubids conda install -c conda-forge -y datalad - # Add nodejs and the validator - conda install nodejs - npm install -g yarn && \ - npm install -g bids-validator@1.14.14-dev.0 + # Add deno to run the schema validator + conda install deno # Install CuBIDS pip install -e .[tests] From 5ba0c717da66cdb2258a119d33be3260624c4d9d Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Mon, 18 Nov 2024 12:00:22 -0500 Subject: [PATCH 12/21] add deno installation instruction --- docs/installation.rst | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index d55b84a40..b02e734ec 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -16,7 +16,7 @@ initialize a new conda environment (for example, named ``cubids``) as follows: .. code-block:: console - $ conda create -n cubids python=3.8 pip + $ conda create -n cubids python=3.12 pip $ conda activate cubids You are now ready to install CuBIDS. @@ -44,23 +44,16 @@ Once you have a copy of the source, you can install it with: $ pip install -e . We will now need to install some dependencies of ``CuBIDS``. -To do this, we first must install nodejs. +To do this, we first must install deno to run `bids-validator`. We can accomplish this using the following command: .. code-block:: console - $ conda install nodejs + $ conda install deno -Now that we have npm installed, we can install ``bids-validator`` using the following command: +The new schema ``bids-validator`` doesn't need to be installed +and will be implemented automatically when `cubids validate` is called -.. code-block:: console - - $ npm install -g bids-validator@1.7.2 - -In our example walkthrough, -we use ``bids-validator`` v1.7.2. using a different version of the -validator may result in slightly different validation tsv printouts, -but ``CuBIDS`` is compatible with all versions of the validator at or above v1.6.2. We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on From 38cf11bd8415c955ce9c5a2a4f51548dc9504100 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:41:55 -0500 Subject: [PATCH 13/21] Add a function to save bids validator and schema version --- cubids/cli.py | 36 ++++++++++++ cubids/validator.py | 134 +++++++++++++++++++++++++++++++++++++++++++- cubids/workflows.py | 69 +++++++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) diff --git a/cubids/cli.py b/cubids/cli.py index f87ffc6c4..d2f8a9201 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -107,6 +107,41 @@ def _enter_validate(argv=None): workflows.validate(**args) +def _parse_bids_version(): + parser = argparse.ArgumentParser( + description="cubids bids-version: Get BIDS Validator and Schema version", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + PathExists = partial(_path_exists, parser=parser) + + parser.add_argument( + "bids_dir", + type=PathExists, + action="store", + help=( + "the root of a BIDS dataset. It should contain " + "sub-X directories and dataset_description.json" + ), + ) + parser.add_argument( + "--write", + action="store_true", + default=False, + help=( + "Save the validator and schema version to 'dataset_description.json' " + "when using `cubids bids-version /bids/path --write`. " + "By default, `cubids bids-version /bids/path` prints to the terminal." + ), + ) + return parser + + +def _enter_bids_version(argv=None): + options = _parse_bids_version().parse_args(argv) + args = vars(options).copy() + workflows.bids_version(**args) + + def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), @@ -655,6 +690,7 @@ def _enter_print_metadata_fields(argv=None): COMMANDS = [ ("validate", _parse_validate, workflows.validate), + ("bids-version", _parse_bids_version, workflows.bids_version), ("sidecar-merge", _parse_bids_sidecar_merge, workflows.bids_sidecar_merge), ("group", _parse_group, workflows.group), ("apply", _parse_apply, workflows.apply), diff --git a/cubids/validator.py b/cubids/validator.py index 7fba81385..fe0e08ef1 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -6,6 +6,7 @@ import os import pathlib import subprocess +import re import pandas as pd @@ -24,6 +25,22 @@ def build_validator_call(path, ignore_headers=False): return command +def get_bids_validator_version(): + """Get the version of the BIDS validator. + + Returns + ------- + version : :obj:`str` + Version of the BIDS validator. + """ + command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = result.stdout.decode("utf-8").strip() + version = output.split()[-1] + clean_ver = re.sub(r'\x1b\[[0-9;]*m', '', version) # Remove ANSI color codes + return {"ValidatorVersion": clean_ver} + + def build_subject_paths(bids_dir): """Build a list of BIDS dirs with 1 subject each.""" bids_dir = str(bids_dir) @@ -52,6 +69,26 @@ def build_subject_paths(bids_dir): return subjects_dict +def build_first_subject_path(bids_dir, subject): + """Build a list of BIDS dirs with 1 subject each.""" + bids_dir = str(bids_dir) + if not bids_dir.endswith("/"): + bids_dir += "/" + + root_files = [x for x in glob.glob(bids_dir + "*") if os.path.isfile(x)] + + subject_dict = {} + + purepath = pathlib.PurePath(subject) + sub_label = purepath.name + + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files.extend(root_files) + subject_dict[sub_label] = files + + return subject_dict + + def run_validator(call): """Run the validator with subprocess. @@ -103,6 +140,7 @@ def parse_issue(issue_dict): return { "location": issue_dict.get("location", ""), "code": issue_dict.get("code", ""), + "issueMessage": issue_dict.get("issueMessage", ""), "subCode": issue_dict.get("subCode", ""), "severity": issue_dict.get("severity", ""), "rule": issue_dict.get("rule", ""), @@ -114,7 +152,9 @@ def parse_issue(issue_dict): # Extract issues issues = data.get("issues", {}).get("issues", []) if not issues: - return pd.DataFrame(columns=["location", "code", "subCode", "severity", "rule"]) + return pd.DataFrame( + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] + ) # Parse all issues parsed_issues = [parse_issue(issue) for issue in issues] @@ -135,7 +175,99 @@ def get_val_dictionary(): return { "location": {"Description": "File with the validation issue."}, "code": {"Description": "Code of the validation issue."}, + "issueMessage": {"Description": "Validation issue message."}, "subCode": {"Description": "Subcode providing additional issue details."}, "severity": {"Description": "Severity of the issue (e.g., warning, error)."}, "rule": {"Description": "Validation rule that triggered the issue."}, } + + +def extract_summary_info(output): + """Extract summary information from the JSON output. + + Parameters + ---------- + output : str + JSON string of BIDS validator output. + + Returns + ------- + dict + Dictionary containing SchemaVersion and other summary info. + """ + try: + data = json.loads(output) + except json.JSONDecodeError as e: + raise ValueError("Invalid JSON provided to get SchemaVersion.") from e + + summary = data.get("summary", {}) + + return {"SchemaVersion": summary.get("schemaVersion", "")} + + +def update_dataset_description(path, new_info): + """Update or append information to dataset_description.json. + + Parameters + ---------- + path : :obj:`str` + Path to the dataset. + new_info : :obj:`dict` + Information to add or update. + """ + description_path = os.path.join(path, "dataset_description.json") + + # Load existing data if the file exists + if os.path.exists(description_path): + with open(description_path, "r") as f: + existing_data = json.load(f) + else: + existing_data = {} + + # Update the existing data with the new info + existing_data.update(new_info) + + # Write the updated data back to the file + with open(description_path, "w") as f: + json.dump(existing_data, f, indent=4) + print(f"Updated dataset_description.json at: {description_path}") + + # Check if .datalad directory exists before running the DataLad save command + datalad_dir = os.path.join(path, ".datalad") + if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): + try: + subprocess.run( + ["datalad", "save", "-m", + "Save BIDS validator and schema version to dataset_description", + description_path], + check=True + ) + print("Changes saved with DataLad.") + except subprocess.CalledProcessError as e: + print(f"Error running DataLad save: {e}") + + +def bids_validator_version(output, path, write=False): + """Save BIDS validator and schema version. + + Parameters + ---------- + output : :obj:`str` + Path to JSON file of BIDS validator output. + path : :obj:`str` + Path to the dataset. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(output) + + combined_info = {**validator_version, **summary_info} + + if write: + # Update the dataset_description.json file + update_dataset_description(path, combined_info) + elif not write: + print(combined_info) \ No newline at end of file diff --git a/cubids/workflows.py b/cubids/workflows.py index 6cbc1e425..69bed501f 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -22,6 +22,8 @@ get_val_dictionary, parse_validator_output, run_validator, + build_first_subject_path, + bids_validator_version, ) warnings.simplefilter(action="ignore", category=FutureWarning) @@ -258,6 +260,73 @@ def validate( sys.exit(proc.returncode) +def bids_version( + bids_dir, + write=False +): + """Get BIDS validator and schema version. + + Parameters + ---------- + bids_dir : :obj:`pathlib.Path` + Path to the BIDS directory. + write : :obj:`bool` + If True, write to dataset_description.json. If False, print to terminal. + """ + # Need to run validator to get output with schema version + # Copy code from `validate --sequential` + + try: # return first subject + # Get all folders that start with "sub-" + sub_folders = [ + name + for name in os.listdir(bids_dir) + if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") + ] + if not sub_folders: + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + subject = sub_folders[0] + except FileNotFoundError: + raise FileNotFoundError(f"The directory {bids_dir} does not exist.") + except ValueError as ve: + raise ve + + # build a dictionary with {SubjectLabel: [List of files]} + # run first subject only + subject_dict = build_first_subject_path(bids_dir, subject) + + # iterate over the dictionary + for subject, files_list in subject_dict.items(): + # logger.info(" ".join(["Processing subject:", subject])) + # create a temporary directory and symlink the data + with tempfile.TemporaryDirectory() as tmpdirname: + for fi in files_list: + # cut the path down to the subject label + bids_start = fi.find(subject) + + # maybe it's a single file + if bids_start < 1: + bids_folder = tmpdirname + fi_tmpdir = tmpdirname + + else: + bids_folder = Path(fi[bids_start:]).parent + fi_tmpdir = tmpdirname + "/" + str(bids_folder) + + if not os.path.exists(fi_tmpdir): + os.makedirs(fi_tmpdir) + output = fi_tmpdir + "/" + str(Path(fi).name) + shutil.copy2(fi, output) + + # run the validator + call = build_validator_call(tmpdirname) + ret = run_validator(call) + + # Get BIDS validator and schema version + decoded = ret.stdout.decode("UTF-8") + bids_validator_version(decoded, bids_dir, write=write) + + def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) From 6f85a1f1c5f8adc1e8d78b6641197e5e7b852ce7 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:54:32 -0500 Subject: [PATCH 14/21] Fix linting issue --- cubids/cli.py | 27 ++++++---- cubids/cubids.py | 105 ++++++++++++++++++++++++------------ cubids/metadata_merge.py | 15 ++++-- cubids/tests/test_bond.py | 75 +++++++++++++++++--------- cubids/tests/test_cli.py | 6 ++- cubids/tests/test_cubids.py | 15 ++++-- cubids/tests/utils.py | 6 ++- cubids/validator.py | 45 ++++++++++------ cubids/workflows.py | 44 ++++++++------- 9 files changed, 220 insertions(+), 118 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index d2f8a9201..ea78680cb 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,7 +27,8 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error( + f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -144,7 +145,8 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=( + "bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -216,7 +218,8 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -244,7 +247,8 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=( + "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -312,7 +316,8 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=("Level at which acquisition groups are created options: 'subject' or 'session'"), + help=( + "Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -341,7 +346,8 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=( + "cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -699,8 +705,10 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, + workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, + workflows.remove_metadata_fields), ] @@ -709,7 +717,8 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", action="version", version=__version__) + parser.add_argument("-v", "--version", + action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 27f632e3a..4de83826f 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,9 +149,11 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer( + validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout( + self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -201,7 +203,8 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception("DataLad has not been initialized. use datalad_init()") + raise Exception( + "DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -223,7 +226,8 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] for status in self.datalad_handle.status()]) + statuses = set([status["state"] + for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -237,8 +241,10 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception("Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception( + "Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run( + ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -342,11 +348,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations( + summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -357,13 +365,15 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append( + f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[( + files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -436,7 +446,8 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run( + cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -476,7 +487,8 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", + "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -490,7 +502,8 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join( + [f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -499,7 +512,8 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError(f"Could not construct new filename for {filepath}") + raise ValueError( + f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -517,7 +531,8 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + \ + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -536,7 +551,8 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext( + new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -609,7 +625,8 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace( + new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -635,7 +652,8 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append(_get_intended_for_reference(new_path)) + data["IntendedFor"].append( + _get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -808,7 +826,8 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace( + "_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -922,7 +941,8 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception("Fieldmaps must be cached to find parameter groups.") + raise Exception( + "Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -975,7 +995,8 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = {"Description": "Scanning Parameter"} + self.data_dict[s_param] = { + "Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -987,7 +1008,8 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = { + "Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1094,17 +1116,20 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns( + pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + \ + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + \ + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1253,8 +1278,10 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values( + by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1273,7 +1300,8 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets( + f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1492,7 +1520,8 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) + for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1514,7 +1543,8 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) for intention in intentions] + [_file_to_entity_set(intention) + for intention in intentions] ) # check if config says columns or bool @@ -1568,11 +1598,14 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge( + deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values( + by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange( + param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1580,13 +1613,15 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values( + by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop( + col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6562f35b7..ddaa585f1 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,7 +57,8 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[( + actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -298,7 +299,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), file_entities.get("session")) + acq_id = (file_entities.get("subject"), + file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -325,7 +327,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append( + (groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -333,7 +336,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", + sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -342,7 +346,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) + for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2d..4c2266e16 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,7 +88,8 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -143,7 +144,8 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append( + "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -276,7 +278,8 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run( + ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -356,10 +359,12 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -369,28 +374,33 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -401,7 +411,8 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -419,7 +430,8 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, + original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -446,7 +458,8 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [ + applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -464,7 +477,8 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash( + tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -474,27 +488,32 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch( + "acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, + "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, + str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash( + tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -505,7 +524,8 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str( + tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -689,7 +709,8 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -801,11 +822,13 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str( + tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -838,7 +861,8 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", + acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1039,7 +1063,8 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index a0e9066ac..78e15501a 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -14,9 +14,10 @@ """ import argparse + import pytest -from cubids.cli import _path_exists, _is_file, _get_parser, _main +from cubids.cli import _get_parser, _is_file, _main, _path_exists def _test_path_exists(): @@ -27,7 +28,8 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", + None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index 6ab847fd5..ca70d21ad 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,7 +74,8 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars( + exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -204,8 +205,10 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", + "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = { + "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -220,7 +223,8 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params(param_group_df, config, modality) + rounded_params = cubids_instance.round_params( + param_group_df, config, modality) # Add assertions here @@ -234,7 +238,8 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params(param_group_df, config, modality) + formatted_params = cubids_instance.format_params( + param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index c64da3727..9bf6bda1c 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,7 +27,8 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand( + *img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -76,7 +77,8 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace( + ".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index fe0e08ef1..bb7212126 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -5,8 +5,8 @@ import logging import os import pathlib -import subprocess import re +import subprocess import pandas as pd @@ -17,7 +17,8 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", + path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -34,10 +35,12 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run( + command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] - clean_ver = re.sub(r'\x1b\[[0-9;]*m', '', version) # Remove ANSI color codes + # Remove ANSI color codes + clean_ver = re.sub(r"\x1b\[[0-9;]*m", "", version) return {"ValidatorVersion": clean_ver} @@ -54,7 +57,8 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError( + "Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -62,7 +66,8 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -82,7 +87,8 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob( + subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -153,8 +159,9 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] - ) + columns=["location", "code", "issueMessage", + "subCode", "severity", "rule"] + ) # Parse all issues parsed_issues = [parse_issue(issue) for issue in issues] @@ -229,7 +236,7 @@ def update_dataset_description(path, new_info): # Write the updated data back to the file with open(description_path, "w") as f: - json.dump(existing_data, f, indent=4) + json.dump(existing_data, f, indent=4) print(f"Updated dataset_description.json at: {description_path}") # Check if .datalad directory exists before running the DataLad save command @@ -237,10 +244,14 @@ def update_dataset_description(path, new_info): if os.path.exists(datalad_dir) and os.path.isdir(datalad_dir): try: subprocess.run( - ["datalad", "save", "-m", - "Save BIDS validator and schema version to dataset_description", - description_path], - check=True + [ + "datalad", + "save", + "-m", + "Save BIDS validator and schema version to dataset_description", + description_path, + ], + check=True, ) print("Changes saved with DataLad.") except subprocess.CalledProcessError as e: @@ -263,11 +274,11 @@ def bids_validator_version(output, path, write=False): validator_version = get_bids_validator_version() # Extract schemaVersion summary_info = extract_summary_info(output) - + combined_info = {**validator_version, **summary_info} if write: - # Update the dataset_description.json file + # Update the dataset_description.json file update_dataset_description(path, combined_info) elif not write: - print(combined_info) \ No newline at end of file + print(combined_info) diff --git a/cubids/workflows.py b/cubids/workflows.py index 69bed501f..a28b61a30 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -17,13 +17,13 @@ from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( + bids_validator_version, + build_first_subject_path, build_subject_paths, build_validator_call, get_val_dictionary, parse_validator_output, run_validator, - build_first_subject_path, - bids_validator_version, ) warnings.simplefilter(action="ignore", category=FutureWarning) @@ -82,7 +82,8 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -129,7 +130,8 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys()) + ) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -158,7 +160,8 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error("Errors returned from validator run, parsing now") + logger.error( + "Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -169,7 +172,8 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info("No issues/warnings parsed, your dataset is BIDS valid.") + logger.info( + "No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -260,10 +264,7 @@ def validate( sys.exit(proc.returncode) -def bids_version( - bids_dir, - write=False -): +def bids_version(bids_dir, write=False): """Get BIDS validator and schema version. Parameters @@ -284,13 +285,14 @@ def bids_version( if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError( + "No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") except ValueError as ve: raise ve - + # build a dictionary with {SubjectLabel: [List of files]} # run first subject only subject_dict = build_first_subject_path(bids_dir, subject) @@ -329,7 +331,8 @@ def bids_version( def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json( + from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -368,7 +371,8 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -475,14 +479,18 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str( + edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str( + new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str( + config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name From 75bf86eac9ed257f8f1390e2736eb82e751a180c Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 10:59:22 -0500 Subject: [PATCH 15/21] Still fixing lint issues --- cubids/cli.py | 27 ++++------ cubids/cubids.py | 105 ++++++++++++------------------------ cubids/metadata_merge.py | 15 ++---- cubids/tests/test_bond.py | 75 +++++++++----------------- cubids/tests/test_cli.py | 3 +- cubids/tests/test_cubids.py | 15 ++---- cubids/tests/utils.py | 6 +-- cubids/validator.py | 18 +++---- cubids/workflows.py | 33 ++++-------- 9 files changed, 99 insertions(+), 198 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index ea78680cb..d2f8a9201 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -27,8 +27,7 @@ def _is_file(path, parser): """Ensure a given path exists and it is a file.""" path = _path_exists(path, parser) if not path.is_file(): - raise parser.error( - f"Path should point to a file (or symlink of file): <{path}>.") + raise parser.error(f"Path should point to a file (or symlink of file): <{path}>.") return path @@ -145,8 +144,7 @@ def _enter_bids_version(argv=None): def _parse_bids_sidecar_merge(): parser = argparse.ArgumentParser( - description=( - "bids-sidecar-merge: merge critical keys from one sidecar to another"), + description=("bids-sidecar-merge: merge critical keys from one sidecar to another"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) IsFile = partial(_is_file, parser=parser) @@ -218,8 +216,7 @@ def _parse_group(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -247,8 +244,7 @@ def _enter_group(argv=None): def _parse_apply(): parser = argparse.ArgumentParser( - description=( - "cubids-apply: apply the changes specified in a tsv to a BIDS directory"), + description=("cubids-apply: apply the changes specified in a tsv to a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -316,8 +312,7 @@ def _parse_apply(): default="subject", choices=["subject", "session"], action="store", - help=( - "Level at which acquisition groups are created options: 'subject' or 'session'"), + help=("Level at which acquisition groups are created options: 'subject' or 'session'"), ) parser.add_argument( "--config", @@ -346,8 +341,7 @@ def _enter_apply(argv=None): def _parse_datalad_save(): parser = argparse.ArgumentParser( - description=( - "cubids-datalad-save: perform a DataLad save on a BIDS directory"), + description=("cubids-datalad-save: perform a DataLad save on a BIDS directory"), formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) PathExists = partial(_path_exists, parser=parser) @@ -705,10 +699,8 @@ def _enter_print_metadata_fields(argv=None): ("copy-exemplars", _parse_copy_exemplars, workflows.copy_exemplars), ("undo", _parse_undo, workflows.undo), ("datalad-save", _parse_datalad_save, workflows.datalad_save), - ("print-metadata-fields", _parse_print_metadata_fields, - workflows.print_metadata_fields), - ("remove-metadata-fields", _parse_remove_metadata_fields, - workflows.remove_metadata_fields), + ("print-metadata-fields", _parse_print_metadata_fields, workflows.print_metadata_fields), + ("remove-metadata-fields", _parse_remove_metadata_fields, workflows.remove_metadata_fields), ] @@ -717,8 +709,7 @@ def _get_parser(): from cubids import __version__ parser = argparse.ArgumentParser(prog="cubids") - parser.add_argument("-v", "--version", - action="version", version=__version__) + parser.add_argument("-v", "--version", action="version", version=__version__) subparsers = parser.add_subparsers(help="CuBIDS commands") for command, parser_func, run_func in COMMANDS: diff --git a/cubids/cubids.py b/cubids/cubids.py index 4de83826f..27f632e3a 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -149,11 +149,9 @@ def reset_bids_layout(self, validate=False): re.compile(r"/\."), ] - indexer = bids.BIDSLayoutIndexer( - validate=validate, ignore=ignores, index_metadata=False) + indexer = bids.BIDSLayoutIndexer(validate=validate, ignore=ignores, index_metadata=False) - self._layout = bids.BIDSLayout( - self.path, validate=validate, indexer=indexer) + self._layout = bids.BIDSLayout(self.path, validate=validate, indexer=indexer) def create_cubids_code_dir(self): """Create CuBIDS code directory. @@ -203,8 +201,7 @@ def datalad_save(self, message=None): Commit message to use with datalad save. """ if not self.datalad_ready: - raise Exception( - "DataLad has not been initialized. use datalad_init()") + raise Exception("DataLad has not been initialized. use datalad_init()") statuses = self.datalad_handle.save(message=message or "CuBIDS Save") saved_status = set([status["status"] for status in statuses]) @@ -226,8 +223,7 @@ def is_datalad_clean(self): """ if not self.datalad_ready: raise Exception("Datalad not initialized, can't determine status") - statuses = set([status["state"] - for status in self.datalad_handle.status()]) + statuses = set([status["state"] for status in self.datalad_handle.status()]) return statuses == set(["clean"]) def datalad_undo_last_commit(self): @@ -241,10 +237,8 @@ def datalad_undo_last_commit(self): If there are untracked changes in the datalad dataset. """ if not self.is_datalad_clean(): - raise Exception( - "Untracked changes present. Run clear_untracked_changes first") - reset_proc = subprocess.run( - ["git", "reset", "--hard", "HEAD~1"], cwd=self.path) + raise Exception("Untracked changes present. Run clear_untracked_changes first") + reset_proc = subprocess.run(["git", "reset", "--hard", "HEAD~1"], cwd=self.path) reset_proc.check_returncode() def add_nifti_info(self): @@ -348,13 +342,11 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T files_df = pd.read_table(files_tsv) # Check that the MergeInto column only contains valid merges - ok_merges, deletions = check_merging_operations( - summary_tsv, raise_on_error=raise_on_error) + ok_merges, deletions = check_merging_operations(summary_tsv, raise_on_error=raise_on_error) merge_commands = [] for source_id, dest_id in ok_merges: - dest_files = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] + dest_files = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == dest_id).all(1)] source_files = files_df.loc[ (files_df[["ParamGroup", "EntitySet"]] == source_id).all(1) ] @@ -365,15 +357,13 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for dest_nii in dest_files.FilePath: dest_json = img_to_new_ext(self.path + dest_nii, ".json") if Path(dest_json).exists() and Path(source_json).exists(): - merge_commands.append( - f"bids-sidecar-merge {source_json} {dest_json}") + merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands # delete_commands = [] to_remove = [] for rm_id in deletions: - files_to_rm = files_df.loc[( - files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] + files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): @@ -446,8 +436,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T rename_commit = s1 + s2 - self.datalad_handle.run( - cmd=["bash", renames], message=rename_commit) + self.datalad_handle.run(cmd=["bash", renames], message=rename_commit) else: subprocess.run( ["bash", renames], @@ -487,8 +476,7 @@ def change_filename(self, filepath, entities): entity_file_keys = [] # Entities that may be in the filename? - file_keys = ["task", "acquisition", - "direction", "reconstruction", "run"] + file_keys = ["task", "acquisition", "direction", "reconstruction", "run"] for key in file_keys: if key in list(entities.keys()): @@ -502,8 +490,7 @@ def change_filename(self, filepath, entities): # XXX: This adds an extra leading zero to run. entities["run"] = "0" + str(entities["run"]) - filename = "_".join( - [f"{key}-{entities[key]}" for key in entity_file_keys]) + filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys]) filename = ( filename.replace("acquisition", "acq") .replace("direction", "dir") @@ -512,8 +499,7 @@ def change_filename(self, filepath, entities): if len(filename) > 0: filename = sub_ses + "_" + filename + "_" + suffix + old_ext else: - raise ValueError( - f"Could not construct new filename for {filepath}") + raise ValueError(f"Could not construct new filename for {filepath}") # CHECK TO SEE IF DATATYPE CHANGED # datatype may be overridden/changed if the original file is located in the wrong folder. @@ -531,8 +517,7 @@ def change_filename(self, filepath, entities): dtype_new = dtype_orig # Construct the new filename - new_path = str(self.path) + "/" + sub + "/" + \ - ses + "/" + dtype_new + "/" + filename + new_path = str(self.path) + "/" + sub + "/" + ses + "/" + dtype_new + "/" + filename # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -551,8 +536,7 @@ def change_filename(self, filepath, entities): # ensure assoc not an IntendedFor reference if ".nii" not in str(assoc_path): self.old_filenames.append(assoc_path) - new_ext_path = img_to_new_ext( - new_path, "".join(Path(assoc_path).suffixes)) + new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes)) self.new_filenames.append(new_ext_path) # MAKE SURE THESE AREN'T COVERED BY get_associations!!! @@ -625,8 +609,7 @@ def change_filename(self, filepath, entities): if Path(old_labeling).exists(): self.old_filenames.append(old_labeling) new_scan_end = "_" + suffix + old_ext - new_labeling = new_path.replace( - new_scan_end, "_asllabeling.jpg") + new_labeling = new_path.replace(new_scan_end, "_asllabeling.jpg") self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! @@ -652,8 +635,7 @@ def change_filename(self, filepath, entities): # remove old filename data["IntendedFor"].remove(item) # add new filename - data["IntendedFor"].append( - _get_intended_for_reference(new_path)) + data["IntendedFor"].append(_get_intended_for_reference(new_path)) # update the json with the new data dictionary _update_json(filename_with_if, data) @@ -826,8 +808,7 @@ def _purge_associations(self, scans): if "/func/" in str(path): # add tsvs - tsv = img_to_new_ext(str(path), ".tsv").replace( - "_bold", "_events") + tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events") if Path(tsv).exists(): to_remove.append(tsv) # add tsv json (if exists) @@ -941,8 +922,7 @@ def get_param_groups_from_entity_set(self, entity_set): 2. A data frame with param group summaries """ if not self.fieldmaps_cached: - raise Exception( - "Fieldmaps must be cached to find parameter groups.") + raise Exception("Fieldmaps must be cached to find parameter groups.") key_entities = _entity_set_to_entities(entity_set) key_entities["extension"] = ".nii[.gz]*" @@ -995,8 +975,7 @@ def create_data_dictionary(self): mod_dict = sidecar_params[mod] for s_param in mod_dict.keys(): if s_param not in self.data_dict.keys(): - self.data_dict[s_param] = { - "Description": "Scanning Parameter"} + self.data_dict[s_param] = {"Description": "Scanning Parameter"} relational_params = self.grouping_config.get("relational_params") for r_param in relational_params.keys(): @@ -1008,8 +987,7 @@ def create_data_dictionary(self): mod_dict = derived_params[mod] for d_param in mod_dict.keys(): if d_param not in self.data_dict.keys(): - self.data_dict[d_param] = { - "Description": "NIfTI Header Parameter"} + self.data_dict[d_param] = {"Description": "NIfTI Header Parameter"} # Manually add non-sidecar columns/descriptions to data_dict desc1 = "Column where users mark groups to manually check" @@ -1116,20 +1094,17 @@ def get_param_groups_dataframes(self): long_name = big_df.loc[row, "FilePath"] big_df.loc[row, "FilePath"] = long_name.replace(self.path, "") - summary = _order_columns( - pd.concat(param_group_summaries, ignore_index=True)) + summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True)) # create new col that strings key and param group together - summary["KeyParamGroup"] = summary["EntitySet"] + \ - "__" + summary["ParamGroup"].map(str) + summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = summary.pop("KeyParamGroup") summary.insert(0, "KeyParamGroup", key_param_col) # do the same for the files df - big_df["KeyParamGroup"] = big_df["EntitySet"] + \ - "__" + big_df["ParamGroup"].map(str) + big_df["KeyParamGroup"] = big_df["EntitySet"] + "__" + big_df["ParamGroup"].map(str) # move this column to the front of the dataframe key_param_col = big_df.pop("KeyParamGroup") @@ -1278,10 +1253,8 @@ def get_tsvs(self, path_prefix): big_df, summary = self.get_param_groups_dataframes() - summary = summary.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) - big_df = big_df.sort_values( - by=["Modality", "EntitySetCount"], ascending=[True, False]) + summary = summary.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) + big_df = big_df.sort_values(by=["Modality", "EntitySetCount"], ascending=[True, False]) # Create json dictionaries for summary and files tsvs self.create_data_dictionary() @@ -1300,8 +1273,7 @@ def get_tsvs(self, path_prefix): summary.to_csv(f"{path_prefix}_summary.tsv", sep="\t", index=False) # Calculate the acq groups - group_by_acquisition_sets( - f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) + group_by_acquisition_sets(f"{path_prefix}_files.tsv", path_prefix, self.acq_group_level) print(f"CuBIDS detected {len(summary)} Parameter Groups.") @@ -1520,8 +1492,7 @@ def _get_param_groups( # Get the fieldmaps out and add their types if "FieldmapKey" in relational_params: fieldmap_types = sorted( - [_file_to_entity_set(fmap.path) - for fmap in fieldmap_lookup[path]] + [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]] ) # check if config says columns or bool @@ -1543,8 +1514,7 @@ def _get_param_groups( # If it's a fieldmap, see what entity set it's intended to correct if "IntendedForKey" in relational_params: intended_entity_sets = sorted( - [_file_to_entity_set(intention) - for intention in intentions] + [_file_to_entity_set(intention) for intention in intentions] ) # check if config says columns or bool @@ -1598,14 +1568,11 @@ def _get_param_groups( {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()} ) - param_groups_with_counts = pd.merge( - deduped, param_group_counts, on=["ParamGroup"]) + param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"]) # Sort by counts and relabel the param groups - param_groups_with_counts.sort_values( - by=["Counts"], inplace=True, ascending=False) - param_groups_with_counts["ParamGroup"] = np.arange( - param_groups_with_counts.shape[0]) + 1 + param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False) + param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1 # Send the new, ordered param group ids to the files list ordered_labeled_files = pd.merge( @@ -1613,15 +1580,13 @@ def _get_param_groups( ) # sort ordered_labeled_files by param group - ordered_labeled_files.sort_values( - by=["Counts"], inplace=True, ascending=False) + ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False) # now get rid of cluster cols from deduped and df for col in list(ordered_labeled_files.columns): if col.startswith("Cluster_"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) - param_groups_with_counts = param_groups_with_counts.drop( - col, axis=1) + param_groups_with_counts = param_groups_with_counts.drop(col, axis=1) if col.endswith("_x"): ordered_labeled_files = ordered_labeled_files.drop(col, axis=1) diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index ddaa585f1..6562f35b7 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -57,8 +57,7 @@ def _check_sdc_cols(meta1, meta2): source_param_key = tuple(row_needs_merge[["MergeInto", "EntitySet"]]) dest_param_key = tuple(row_needs_merge[["ParamGroup", "EntitySet"]]) dest_metadata = row_needs_merge.to_dict() - source_row = actions.loc[( - actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] + source_row = actions.loc[(actions[["ParamGroup", "EntitySet"]] == source_param_key).all(1)] if source_param_key[0] == 0: print("going to delete ", dest_param_key) @@ -299,8 +298,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), - file_entities.get("session")) + acq_id = (file_entities.get("subject"), file_entities.get("session")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) else: acq_id = (file_entities.get("subject"), None) @@ -327,8 +325,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): acq_group_info = [] for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] - acq_group_info.append( - (groupnum, content_id_counts[content_id_row]) + content_id) + acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) for subject, session in contents_to_subjects[content_id]: grouped_sub_sess.append( {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} @@ -336,8 +333,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) - acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", - sep="\t", index=False) + acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv acq_dict = get_acq_dictionary() @@ -346,8 +342,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): # Write the summary of acq groups to a text file with open(output_prefix + "_AcqGroupInfo.txt", "w") as infotxt: - infotxt.write("\n".join([" ".join(map(str, line)) - for line in acq_group_info])) + infotxt.write("\n".join([" ".join(map(str, line)) for line in acq_group_info])) # Create and save AcqGroupInfo data dictionary header_dict = {} diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 4c2266e16..28211cc2d 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -88,8 +88,7 @@ def test_ok_json_merge_cli(tmp_path): assert os.path.isfile(source_json) assert os.path.isfile(dest_json) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(source_json), str(dest_json)]) assert merge_proc.returncode == 0 assert not _get_json_string(dest_json) == orig_dest_json_content @@ -144,8 +143,7 @@ def test_purge_no_datalad(tmp_path): / "sub-03_ses-phdiff_task-rest_bold.json" ) scans.append(scan_name) - scans.append( - "sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") + scans.append("sub-01/ses-phdiff/dwi/sub-01_ses-phdiff_acq-HASC55AP_dwi.nii.gz") # create and save .txt with list of scans purge_path = str(tmp_path / "purge_scans.txt") @@ -278,8 +276,7 @@ def test_bad_json_merge_cli(tmp_path): / "sub-01_ses-phdiff_acq-HASC55AP_dwi.json" ) - merge_proc = subprocess.run( - ["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) + merge_proc = subprocess.run(["bids-sidecar-merge", str(invalid_source_json), str(dest_json)]) assert merge_proc.returncode > 0 assert _get_json_string(dest_json) == orig_dest_json_content @@ -359,12 +356,10 @@ def test_tsv_merge_no_datalad(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) # these will not actually be equivalent because of the auto renames - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -374,33 +369,28 @@ def test_tsv_merge_no_datalad(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to apply merges! - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -411,8 +401,7 @@ def test_tsv_merge_no_datalad(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) @@ -430,8 +419,7 @@ def test_tsv_merge_changes(tmp_path): original_files_tsv = tsv_prefix + "_files.tsv" # give tsv with no changes (make sure it does nothing except rename) - bod.apply_tsv_changes(original_summary_tsv, - original_files_tsv, str(tmp_path / "unmodified")) + bod.apply_tsv_changes(original_summary_tsv, original_files_tsv, str(tmp_path / "unmodified")) orig = pd.read_table(original_summary_tsv) # TEST RenameEntitySet column got populated CORRECTLY for row in range(len(orig)): @@ -458,8 +446,7 @@ def test_tsv_merge_changes(tmp_path): applied_f.loc[row, "KeyParamGroup"] ) else: - occurrences[applied_f.loc[row, "FilePath"]] = [ - applied_f.loc[row, "KeyParamGroup"]] + occurrences[applied_f.loc[row, "FilePath"]] = [applied_f.loc[row, "KeyParamGroup"]] assert len(orig) == len(applied) @@ -477,8 +464,7 @@ def test_tsv_merge_changes(tmp_path): assert renamed # will no longer be equal because of auto rename! - assert file_hash(original_summary_tsv) != file_hash( - tmp_path / "unmodified_summary.tsv") + assert file_hash(original_summary_tsv) != file_hash(tmp_path / "unmodified_summary.tsv") # Find the dwi with no FlipAngle summary_df = pd.read_table(original_summary_tsv) @@ -488,32 +474,27 @@ def test_tsv_merge_changes(tmp_path): ) # Find the dwi with and EchoTime == (complete_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime > 0.05) ) (cant_merge_echotime_dwi_row,) = np.flatnonzero( - summary_df.EntitySet.str.fullmatch( - "acquisition-HASC55AP_datatype-dwi_suffix-dwi") + summary_df.EntitySet.str.fullmatch("acquisition-HASC55AP_datatype-dwi_suffix-dwi") & (summary_df.FlipAngle == 90.0) & (summary_df.EchoTime < 0.05) ) # Set a legal MergeInto value. This effectively fills in data # where there was previously as missing FlipAngle - summary_df.loc[fa_nan_dwi_row, - "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] + summary_df.loc[fa_nan_dwi_row, "MergeInto"] = summary_df.ParamGroup[complete_dwi_row] valid_tsv_file = tsv_prefix + "_valid_summary.tsv" summary_df.to_csv(valid_tsv_file, sep="\t", index=False) # about to merge - bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, - str(tmp_path / "ok_modified")) + bod.apply_tsv_changes(valid_tsv_file, original_files_tsv, str(tmp_path / "ok_modified")) - assert not file_hash(original_summary_tsv) == file_hash( - tmp_path / "ok_modified_summary.tsv") + assert not file_hash(original_summary_tsv) == file_hash(tmp_path / "ok_modified_summary.tsv") # Add an illegal merge to MergeInto summary_df.loc[cant_merge_echotime_dwi_row, "MergeInto"] = summary_df.ParamGroup[ @@ -524,8 +505,7 @@ def test_tsv_merge_changes(tmp_path): with pytest.raises(Exception): bod.apply_tsv_changes( - invalid_tsv_file, str( - tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") + invalid_tsv_file, str(tmp_path / "originals_files.tsv"), str(tmp_path / "ok_modified") ) # Make sure MergeInto == 0 deletes the param group and all associations @@ -709,8 +689,7 @@ def test_tsv_creation(tmp_path): # if entity sets in rows i and i+1 are the same if isummary_df.iloc[i]["EntitySet"] == isummary_df.iloc[i + 1]["EntitySet"]: # param group i = param group i+1 - assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + - 1]["ParamGroup"] - 1 + assert isummary_df.iloc[i]["ParamGroup"] == isummary_df.iloc[i + 1]["ParamGroup"] - 1 # and count i < count i + 1 assert isummary_df.iloc[i]["Counts"] >= isummary_df.iloc[i + 1]["Counts"] @@ -822,13 +801,11 @@ def test_apply_tsv_changes(tmp_path): for f in deleted_f: assert Path(str(data_root / "complete") + f).exists() - assert Path(str(data_root / "complete") + - f.replace("nii.gz", "json")).exists() + assert Path(str(data_root / "complete") + f.replace("nii.gz", "json")).exists() # apply deletion complete_cubids.apply_tsv_changes( - mod2_path, str( - tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") + mod2_path, str(tmp_path / "modified2_files.tsv"), str(tmp_path / "deleted") ) # make sure deleted_keyparam gone from files_tsv @@ -861,8 +838,7 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", - acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1063,8 +1039,7 @@ def test_docker(): """ try: return_status = 1 - ret = subprocess.run(["docker", "version"], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) except OSError as e: from errno import ENOENT diff --git a/cubids/tests/test_cli.py b/cubids/tests/test_cli.py index 78e15501a..06d3af2a2 100644 --- a/cubids/tests/test_cli.py +++ b/cubids/tests/test_cli.py @@ -28,8 +28,7 @@ def _test_path_exists(): It asserts that the function returns the expected path when the path exists, and raises an `argparse.ArgumentTypeError` when the path does not exist. """ - assert _path_exists("/path/to/existing/file", - None) == "/path/to/existing/file" + assert _path_exists("/path/to/existing/file", None) == "/path/to/existing/file" with pytest.raises(argparse.ArgumentTypeError): _path_exists("/path/to/nonexistent/file", None) diff --git a/cubids/tests/test_cubids.py b/cubids/tests/test_cubids.py index ca70d21ad..6ab847fd5 100644 --- a/cubids/tests/test_cubids.py +++ b/cubids/tests/test_cubids.py @@ -74,8 +74,7 @@ def _test_copy_exemplars(cubids_instance): exemplars_dir = "/path/to/exemplars" exemplars_tsv = "/path/to/exemplars.tsv" min_group_size = 2 - cubids_instance.copy_exemplars( - exemplars_dir, exemplars_tsv, min_group_size) + cubids_instance.copy_exemplars(exemplars_dir, exemplars_tsv, min_group_size) # Add assertions here @@ -205,10 +204,8 @@ def _test__get_intended_for_reference(cubids_instance): def _test__get_param_groups(cubids_instance): - files = ["sub-01_ses-01_task-rest_bold.nii.gz", - "sub-02_ses-01_task-rest_bold.nii.gz"] - fieldmap_lookup = { - "sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} + files = ["sub-01_ses-01_task-rest_bold.nii.gz", "sub-02_ses-01_task-rest_bold.nii.gz"] + fieldmap_lookup = {"sub-01_ses-01_task-rest_bold.nii.gz": "fieldmap.nii.gz"} entity_set_name = "group-01" grouping_config = {"group-01": {"modality": "bold"}} modality = "bold" @@ -223,8 +220,7 @@ def _test_round_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"round": 3}} modality = "bold" - rounded_params = cubids_instance.round_params( - param_group_df, config, modality) + rounded_params = cubids_instance.round_params(param_group_df, config, modality) # Add assertions here @@ -238,8 +234,7 @@ def _test_format_params(cubids_instance): param_group_df = pd.DataFrame({"param": [0.123456789]}) config = {"param": {"format": "{:.2f}"}} modality = "bold" - formatted_params = cubids_instance.format_params( - param_group_df, config, modality) + formatted_params = cubids_instance.format_params(param_group_df, config, modality) # Add assertions here diff --git a/cubids/tests/utils.py b/cubids/tests/utils.py index 9bf6bda1c..c64da3727 100644 --- a/cubids/tests/utils.py +++ b/cubids/tests/utils.py @@ -27,8 +27,7 @@ def _remove_a_json(json_file): def _edit_a_nifti(nifti_file): img = nb.load(nifti_file) - new_img = nb.Nifti1Image(np.random.rand( - *img.shape), affine=img.affine, header=img.header) + new_img = nb.Nifti1Image(np.random.rand(*img.shape), affine=img.affine, header=img.header) new_img.to_filename(nifti_file) @@ -77,8 +76,7 @@ def _add_ext_files(img_path): if "/dwi/" in img_path: # add bval and bvec for ext in dwi_exts: - dwi_ext_file = img_path.replace( - ".nii.gz", "").replace(".nii", "") + ext + dwi_ext_file = img_path.replace(".nii.gz", "").replace(".nii", "") + ext Path(dwi_ext_file).touch() if "bold" in img_path: no_suffix = img_path.rpartition("_")[0] diff --git a/cubids/validator.py b/cubids/validator.py index bb7212126..a7225ba0f 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -17,8 +17,7 @@ def build_validator_call(path, ignore_headers=False): """Build a subprocess command to the bids validator.""" # New schema BIDS validator doesn't have option to ignore subject consistency. # Build the deno command to run the BIDS validator. - command = ["deno", "run", "-A", "jsr:@bids/validator", - path, "--verbose", "--json"] + command = ["deno", "run", "-A", "jsr:@bids/validator", path, "--verbose", "--json"] if ignore_headers: command.append("--ignoreNiftiHeaders") @@ -35,8 +34,7 @@ def get_bids_validator_version(): Version of the BIDS validator. """ command = ["deno", "run", "-A", "jsr:@bids/validator", "--version"] - result = subprocess.run( - command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = result.stdout.decode("utf-8").strip() version = output.split()[-1] # Remove ANSI color codes @@ -57,8 +55,7 @@ def build_subject_paths(bids_dir): subjects = glob.glob(bids_dir) if len(subjects) < 1: - raise ValueError( - "Couldn't find any subjects in the specified directory:\n" + bids_dir) + raise ValueError("Couldn't find any subjects in the specified directory:\n" + bids_dir) subjects_dict = {} @@ -66,8 +63,7 @@ def build_subject_paths(bids_dir): purepath = pathlib.PurePath(sub) sub_label = purepath.name - files = [x for x in glob.glob( - sub + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(sub + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subjects_dict[sub_label] = files @@ -87,8 +83,7 @@ def build_first_subject_path(bids_dir, subject): purepath = pathlib.PurePath(subject) sub_label = purepath.name - files = [x for x in glob.glob( - subject + "**", recursive=True) if os.path.isfile(x)] + files = [x for x in glob.glob(subject + "**", recursive=True) if os.path.isfile(x)] files.extend(root_files) subject_dict[sub_label] = files @@ -159,8 +154,7 @@ def parse_issue(issue_dict): issues = data.get("issues", {}).get("issues", []) if not issues: return pd.DataFrame( - columns=["location", "code", "issueMessage", - "subCode", "severity", "rule"] + columns=["location", "code", "issueMessage", "subCode", "severity", "rule"] ) # Parse all issues diff --git a/cubids/workflows.py b/cubids/workflows.py index a28b61a30..11931c5b0 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -82,8 +82,7 @@ def validate( # parse the string output parsed = parse_validator_output(ret.stdout.decode("UTF-8")) if parsed.shape[1] < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: logger.info("BIDS issues/warnings found in the dataset") @@ -130,8 +129,7 @@ def validate( subjects_dict = { k: v for k, v in subjects_dict.items() if k in sequential_subjects } - assert len(list(subjects_dict.keys()) - ) > 1, "No subjects found in filter" + assert len(list(subjects_dict.keys())) > 1, "No subjects found in filter" for subject, files_list in tqdm.tqdm(subjects_dict.items()): # logger.info(" ".join(["Processing subject:", subject])) # create a temporary directory and symlink the data @@ -160,8 +158,7 @@ def validate( ret = run_validator(call) # parse output if ret.returncode != 0: - logger.error( - "Errors returned from validator run, parsing now") + logger.error("Errors returned from validator run, parsing now") # parse the output and add to list if it returns a df decoded = ret.stdout.decode("UTF-8") @@ -172,8 +169,7 @@ def validate( # concatenate the parsed data and exit if len(parsed) < 1: - logger.info( - "No issues/warnings parsed, your dataset is BIDS valid.") + logger.info("No issues/warnings parsed, your dataset is BIDS valid.") sys.exit(0) else: @@ -285,8 +281,7 @@ def bids_version(bids_dir, write=False): if os.path.isdir(os.path.join(bids_dir, name)) and name.startswith("sub-") ] if not sub_folders: - raise ValueError( - "No folders starting with 'sub-' found. Please provide a valid BIDS.") + raise ValueError("No folders starting with 'sub-' found. Please provide a valid BIDS.") subject = sub_folders[0] except FileNotFoundError: raise FileNotFoundError(f"The directory {bids_dir} does not exist.") @@ -331,8 +326,7 @@ def bids_version(bids_dir, write=False): def bids_sidecar_merge(from_json, to_json): """Merge critical keys from one sidecar to another.""" - merge_status = merge_json_into_json( - from_json, to_json, raise_on_error=False) + merge_status = merge_json_into_json(from_json, to_json, raise_on_error=False) sys.exit(merge_status) @@ -371,8 +365,7 @@ def group(bids_dir, container, acq_group_level, config, output_prefix): apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + output_prefix.name @@ -479,18 +472,14 @@ def apply( # Run it through a container container_type = _get_container_type(container) bids_dir_link = str(bids_dir.absolute()) + ":/bids" - input_summary_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" - input_files_tsv_dir_link = str( - edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" - output_tsv_dir_link = str( - new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" + input_summary_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_summary_tsv:ro" + input_files_tsv_dir_link = str(edited_summary_tsv.parent.absolute()) + ":/in_files_tsv:ro" + output_tsv_dir_link = str(new_tsv_prefix.parent.absolute()) + ":/out_tsv:rw" # FROM BOND-GROUP apply_config = config is not None if apply_config: - input_config_dir_link = str( - config.parent.absolute()) + ":/in_config:ro" + input_config_dir_link = str(config.parent.absolute()) + ":/in_config:ro" linked_input_config = "/in_config/" + config.name linked_output_prefix = "/tsv/" + new_tsv_prefix.name From 1736b514ac118044093d22a57bc201948ac26a22 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 16:31:23 -0500 Subject: [PATCH 16/21] add unit test for cubids bids-version --- cubids/cli.py | 16 ++++++------ cubids/cubids.py | 5 ++-- cubids/tests/test_bond.py | 52 ++++++++++++++++++++++++++++++++++++++- cubids/validator.py | 3 ++- docs/installation.rst | 2 +- pyproject.toml | 1 + 6 files changed, 66 insertions(+), 13 deletions(-) diff --git a/cubids/cli.py b/cubids/cli.py index d2f8a9201..cf48cf9ab 100644 --- a/cubids/cli.py +++ b/cubids/cli.py @@ -43,7 +43,7 @@ def _parse_validate(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -119,7 +119,7 @@ def _parse_bids_version(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -188,7 +188,7 @@ def _parse_group(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -255,7 +255,7 @@ def _parse_apply(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -351,7 +351,7 @@ def _parse_datalad_save(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -393,7 +393,7 @@ def _parse_undo(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -617,7 +617,7 @@ def _parse_remove_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) @@ -663,7 +663,7 @@ def _parse_print_metadata_fields(): type=PathExists, action="store", help=( - "the root of a BIDS dataset. It should contain " + "The root of a BIDS dataset. It should contain " "sub-X directories and dataset_description.json" ), ) diff --git a/cubids/cubids.py b/cubids/cubids.py index 27f632e3a..817815965 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1346,9 +1346,10 @@ def get_all_metadata_fields(self): metadata = json.loads(content) found_fields.update(metadata.keys()) except json.JSONDecodeError as e: - print(f"Error decoding JSON in {json_file}: {e}") + warnings.warn(f"Error decoding JSON in {json_file}: {e}") except Exception as e: - print(f"Unexpected error with file {json_file}: {e}") + warnings.warn(f"Unexpected error with file {json_file}: {e}") + return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 28211cc2d..068ee4fd4 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -9,6 +9,7 @@ import numpy as np import pandas as pd import pytest +from packaging.version import Version from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite @@ -22,7 +23,15 @@ file_hash, get_data, ) -from cubids.validator import build_validator_call, parse_validator_output, run_validator +from cubids.validator import ( + build_validator_call, + parse_validator_output, + run_validator, + get_bids_validator_version, + extract_summary_info, + update_dataset_description, + bids_validator_version, +) COMPLETE_KEY_GROUPS = [ "acquisition-HASC55AP_datatype-dwi_suffix-dwi", @@ -1028,6 +1037,47 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) +def bids_validator_version(tmp_path): + """Test validator.bids_validator_version.""" + # Get the BIDS validator version + validator_version = get_bids_validator_version() + # Extract schemaVersion + summary_info = extract_summary_info(decoded) + + +def test_bids_version(tmp_path): + """Test workflows.bids_version.""" + data_root = get_data(tmp_path) + bids_dir = Path(data_root) / "complete" + + # Ensure the test directory exists + assert bids_dir.exists() + + # test the validator in valid dataset + call = build_validator_call(bids_dir) + ret = run_validator(call) + + assert ret.returncode == 0 + + decoded = ret.stdout.decode("UTF-8") + + # Get the BIDS validator version + validator_version = Version(get_bids_validator_version()["ValidatorVersion"]) + # Extract schemaVersion + schema_version = Version(extract_summary_info(decoded)["SchemaVersion"]) + + # Set baseline versions to compare against + min_validator_version = Version("2.0.0") + min_schema_version = Version("0.11.3") + + assert ( + validator_version >= min_validator_version + ), f"Validator version {validator_version} is less than minimum {min_validator_version}" + assert ( + schema_version >= min_schema_version + ), f"Schema version {schema_version} is less than minimum {min_schema_version}" + + def test_docker(): """Verify that docker is installed and the user has permission to run docker images. diff --git a/cubids/validator.py b/cubids/validator.py index a7225ba0f..a4feeba5b 100644 --- a/cubids/validator.py +++ b/cubids/validator.py @@ -7,6 +7,7 @@ import pathlib import re import subprocess +import warnings import pandas as pd @@ -249,7 +250,7 @@ def update_dataset_description(path, new_info): ) print("Changes saved with DataLad.") except subprocess.CalledProcessError as e: - print(f"Error running DataLad save: {e}") + warnings.warn(f"Error running DataLad save: {e}") def bids_validator_version(output, path, write=False): diff --git a/docs/installation.rst b/docs/installation.rst index b02e734ec..b6ebda3bb 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -51,7 +51,7 @@ We can accomplish this using the following command: $ conda install deno -The new schema ``bids-validator`` doesn't need to be installed +The new schema-based ``bids-validator`` doesn't need to be installed and will be implemented automatically when `cubids validate` is called diff --git a/pyproject.toml b/pyproject.toml index 6e3c151c1..79d8e1f69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,6 +86,7 @@ cubids-copy-exemplars = "cubids.cli:_enter_copy_exemplars" cubids-undo = "cubids.cli:_enter_undo" bids-sidecar-merge = "cubids.cli:_enter_bids_sidecar_merge" cubids-validate = "cubids.cli:_enter_validate" +cubids-bids-version = "cubids.cli:_enter_bids_version" cubids-datalad-save = "cubids.cli:_enter_datalad_save" cubids-print-metadata-fields = "cubids.cli:_enter_print_metadata_fields" cubids-remove-metadata-fields = "cubids.cli:_enter_remove_metadata_fields" From d972d7b1611be06e80bddfe65c348659f55b1129 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Thu, 12 Dec 2024 16:43:33 -0500 Subject: [PATCH 17/21] Remove junk test code --- cubids/tests/test_bond.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 068ee4fd4..d33107bec 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -1037,14 +1037,6 @@ def test_validator(tmp_path): assert isinstance(parsed, pd.DataFrame) -def bids_validator_version(tmp_path): - """Test validator.bids_validator_version.""" - # Get the BIDS validator version - validator_version = get_bids_validator_version() - # Extract schemaVersion - summary_info = extract_summary_info(decoded) - - def test_bids_version(tmp_path): """Test workflows.bids_version.""" data_root = get_data(tmp_path) From 64e3392deebd814babb2a60fb9c8bfa960436ee5 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 13 Dec 2024 11:04:53 -0500 Subject: [PATCH 18/21] Try to fix codecov token issue --- .circleci/config.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c73d8af84..be6ce31d5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,6 +1,6 @@ version: 2.1 orbs: - codecov: codecov/codecov@1.0.5 + codecov: codecov/codecov@3.2.4 jobs: run_pytests: @@ -62,7 +62,7 @@ jobs: # We need curl for the codecov upload apt-get update - apt-get install -yqq curl + apt-get install -y -qq curl cd /home/circleci/src/coverage/ echo "Merge coverage files" From 445533a879eb4547d82000e8ae04f41221d7670f Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 13 Dec 2024 11:13:44 -0500 Subject: [PATCH 19/21] Still trying to fix codecov token issue --- .circleci/config.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/config.yml b/.circleci/config.yml index be6ce31d5..1fe2e7791 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -63,6 +63,7 @@ jobs: # We need curl for the codecov upload apt-get update apt-get install -y -qq curl + apt-get install -y gnupg cd /home/circleci/src/coverage/ echo "Merge coverage files" From 280f3c413fafaa4ac767b7750374de0fc4650275 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 13:31:51 -0500 Subject: [PATCH 20/21] Update scikit-learn requirement from <=1.5.1 to <=1.6.0 (#339) Updates the requirements on [scikit-learn](https://github.com/scikit-learn/scikit-learn) to permit the latest version. - [Release notes](https://github.com/scikit-learn/scikit-learn/releases) - [Commits](https://github.com/scikit-learn/scikit-learn/compare/0.1-beta...1.6.0) --- updated-dependencies: - dependency-name: scikit-learn dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 79d8e1f69..87da13302 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,7 +26,7 @@ dependencies = [ "pandas<=2.2.2", "pybids<=0.17.0", "pyyaml", - "scikit-learn<=1.5.1", + "scikit-learn<=1.6.0", "tqdm", ] dynamic = ["version"] From 7cb7f67a8255504b3ac9455c7b33d2e2052f237d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:04:32 -0500 Subject: [PATCH 21/21] Update pandas requirement from <=2.2.2 to <=2.2.3 (#328) Updates the requirements on [pandas](https://github.com/pandas-dev/pandas) to permit the latest version. - [Release notes](https://github.com/pandas-dev/pandas/releases) - [Commits](https://github.com/pandas-dev/pandas/compare/0.3.0...v2.2.3) --- updated-dependencies: - dependency-name: pandas dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 87da13302..47942ccc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ requires-python = ">=3.8" dependencies = [ "datalad>=0.13.5,!=0.17.3,!=0.17.0,!=0.16.1", "numpy<=2.1.0", - "pandas<=2.2.2", + "pandas<=2.2.3", "pybids<=0.17.0", "pyyaml", "scikit-learn<=1.6.0",