From 8e701c00e5a6fbec638798b10ad4ed5ed815228c Mon Sep 17 00:00:00 2001 From: Ziv Yaniv Date: Fri, 19 Jul 2024 13:21:33 -0400 Subject: [PATCH 1/2] Update the black version in .pre-commit configuration file. Move to a newer version of black (from 22.12.0 to 24.4.2). --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 79c1d3d..bc95f60 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: - id: check-json - id: check-yaml - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 24.4.2 hooks: # check conformance to black formatting - id: black args: ['--check'] # if run without arguments, will fail and will format the files From 82994f8b3b2f6622b8bd3024be02713b171a25e3 Mon Sep 17 00:00:00 2001 From: Ziv Yaniv Date: Fri, 19 Jul 2024 13:24:02 -0400 Subject: [PATCH 2/2] Adding validation of vendor entries in reagent_resoures.csv. A vendor name in the reagent_resources.csv "Vendor" column has to be one of the vendors specified in the vendor_urls.csv file. --- CHANGELOG.md | 5 ++++ pyproject.toml | 2 +- .../validate_reagent_resources.py | 28 +++++++++++++++---- tests/test_scripts.py | 12 ++++++-- 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ec11b0..26471a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,11 @@ needed. This is equivalent to summarizing all activity on a feature branch versu ## Unreleased +## v0.8.2 + +### Changed +* validate_image_resources - Check that the vendor listed in the reagent_resources.csv is from the list of vendors found in the vendor_urls.csv. + ## v0.8.1 ### Fixed diff --git a/pyproject.toml b/pyproject.toml index e73fed4..cc880ff 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "ibex_imaging_knowledge_base_utilities" -version = "0.8.1" +version = "0.8.2" authors = [{ name="Ziv Yaniv", email="zivyaniv@nih.gov" }, ] description = "Utility scripts used for managing the IBEX Imaging Community Knowledge-Base" diff --git a/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py b/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py index d86c13f..6cc1646 100644 --- a/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py +++ b/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py @@ -25,6 +25,7 @@ from ibex_imaging_knowledge_base_utilities.argparse_types import ( file_path_endswith, dir_path, + csv_path, ) from .utilities import validate_df @@ -47,6 +48,7 @@ def validate_reagent_resources( csv_file_name, json_config_file_name, zenodo_json_file_name, + vendors_csv_file_name, supporting_material_root_dir, ): MAX_ORCID_ENTRIES = 5 @@ -66,10 +68,18 @@ def validate_reagent_resources( zenodo_dict = json.load(fp) orcids = [data["orcid"].strip() for data in zenodo_dict["creators"]] + ["NA"] + # Get the list of vendor names from the vendor_urls.csv file name, column + # titled "Vendor" + vendor_names = pd.read_csv(vendors_csv_file_name)["Vendor"].to_list() + + # Add the ORCIDs and vendor names to the configuration dictionary to + # enforce column content to be in a set of values if "column_is_in" in configuration_dict: configuration_dict["column_is_in"]["Contributor"] = orcids + configuration_dict["column_is_in"]["Vendor"] = vendor_names else: configuration_dict["column_is_in"] = {"Contributor": orcids} + configuration_dict["column_is_in"] = {"Vendor": vendor_names} if "multi_value_column_is_in" in configuration_dict: configuration_dict["multi_value_column_is_in"]["Agree"] = orcids @@ -101,7 +111,7 @@ def validate_reagent_resources( # Check that the Contributor ORCID appears in the Agree or Disagree column. # In most cases it will be in the Agree column. When the recommendation # is refuted (multiple validators disagreed with the original contributor) - # and changed to from Yes to No or vice versa the ORCIDs from the Agree and + # and changed from Yes to No or vice versa the ORCIDs from the Agree and # Disagree columns are swapped and the original contributors ORCID will appear # in the Disagree column. df["Agree"] = df["Agree"].apply( @@ -159,7 +169,7 @@ def validate_reagent_resources( unique_target_conjugate = df[ ["Target Name / Protein Biomarker", "Conjugate"] ].drop_duplicates() - # md_file_paths_from_csv, status = unique_target_conjugate.apply( + res = unique_target_conjugate.apply( lambda target_conjugate: validate_supporting_material( target_conjugate, df, supporting_material_root_dir @@ -314,10 +324,10 @@ def validate_supporting_material( ] = supporting_orcid_configurations["Disagree"].apply( lambda x: frozenset([s[1:-1] for s in re.findall(orcid_pattern, x)]) ) - supporting_orcid_configurations[ - "Contributor" - ] = supporting_orcid_configurations["Contributor"].apply( - lambda x: re.findall(orcid_pattern, x)[0][1:-1] + supporting_orcid_configurations["Contributor"] = ( + supporting_orcid_configurations["Contributor"].apply( + lambda x: re.findall(orcid_pattern, x)[0][1:-1] + ) ) # Compare the configuration data from the supporting material to that from the reagent_resources file. # We don't use DataFrame.equal because that assumes the order of the columns and indexes is the same, @@ -362,6 +372,11 @@ def main(argv=None): type=lambda x: file_path_endswith(x, ".json"), help=".zenodo.json file which contains the ORCIDs of all contributors.", ) + parser.add_argument( + "vendors_csv_file", + type=lambda x: csv_path(x, required_columns={"Vendor"}), + help="csv file containing all valid vendor names in a column titled 'Vendor'.", + ) parser.add_argument( "supporting_material_root_dir", type=dir_path, @@ -373,6 +388,7 @@ def main(argv=None): args.csv_file, args.json_config_file, args.zenodo_json_file, + args.vendors_csv_file, args.supporting_material_root_dir, ) diff --git a/tests/test_scripts.py b/tests/test_scripts.py index 0a6d26a..9f920bd 100644 --- a/tests/test_scripts.py +++ b/tests/test_scripts.py @@ -449,25 +449,33 @@ def test_validate_image_resources( class TestReagentResourcesValidation(BaseTest): @pytest.mark.parametrize( - "json_config, input_csv, zenodo_json, supporting_material_root_dir, result", + "json_config, input_csv, zenodo_json, vendor_csv, supporting_material_root_dir, result", [ ( "reagent_resources.json", "reagent_resources.csv", "zenodo.json", + "vendors_and_urls.csv", "supporting_material", 0, ), ], ) def test_validate_reagent_resources( - self, json_config, input_csv, zenodo_json, supporting_material_root_dir, result + self, + json_config, + input_csv, + zenodo_json, + vendor_csv, + supporting_material_root_dir, + result, ): assert ( validate_reagent_resources( str(self.data_path / input_csv), str(self.data_path / json_config), str(self.data_path / zenodo_json), + str(self.data_path / vendor_csv), str(self.data_path / supporting_material_root_dir), ) == result