From 8e701c00e5a6fbec638798b10ad4ed5ed815228c Mon Sep 17 00:00:00 2001
From: Ziv Yaniv <zivyaniv@nih.gov>
Date: Fri, 19 Jul 2024 13:21:33 -0400
Subject: [PATCH 1/2] Update the black version in .pre-commit configuration
 file.

Move to a newer version of black (from 22.12.0 to 24.4.2).
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 79c1d3d..bc95f60 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
    - id: check-json
    - id: check-yaml
 - repo: https://github.com/psf/black
-  rev: 22.12.0
+  rev: 24.4.2
   hooks: # check conformance to black formatting
    - id: black 
      args: ['--check'] # if run without arguments, will fail and will format the files

From 82994f8b3b2f6622b8bd3024be02713b171a25e3 Mon Sep 17 00:00:00 2001
From: Ziv Yaniv <zivyaniv@nih.gov>
Date: Fri, 19 Jul 2024 13:24:02 -0400
Subject: [PATCH 2/2] Adding validation of vendor entries in
 reagent_resoures.csv.

A vendor name in the reagent_resources.csv "Vendor" column has to be
one of the vendors specified in the vendor_urls.csv file.
---
 CHANGELOG.md                                  |  5 ++++
 pyproject.toml                                |  2 +-
 .../validate_reagent_resources.py             | 28 +++++++++++++++----
 tests/test_scripts.py                         | 12 ++++++--
 4 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3ec11b0..26471a5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,11 @@ needed. This is equivalent to summarizing all activity on a feature branch versu
 
 ## Unreleased
 
+## v0.8.2
+
+### Changed
+* validate_image_resources - Check that the vendor listed in the reagent_resources.csv is from the list of vendors found in the vendor_urls.csv.
+
 ## v0.8.1
 
 ### Fixed
diff --git a/pyproject.toml b/pyproject.toml
index e73fed4..cc880ff 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "ibex_imaging_knowledge_base_utilities"
-version = "0.8.1"
+version = "0.8.2"
 authors = [{ name="Ziv Yaniv", email="zivyaniv@nih.gov" },
 ]
 description = "Utility scripts used for managing the IBEX Imaging Community Knowledge-Base"
diff --git a/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py b/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py
index d86c13f..6cc1646 100644
--- a/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py
+++ b/src/ibex_imaging_knowledge_base_utilities/data_validation/validate_reagent_resources.py
@@ -25,6 +25,7 @@
 from ibex_imaging_knowledge_base_utilities.argparse_types import (
     file_path_endswith,
     dir_path,
+    csv_path,
 )
 from .utilities import validate_df
 
@@ -47,6 +48,7 @@ def validate_reagent_resources(
     csv_file_name,
     json_config_file_name,
     zenodo_json_file_name,
+    vendors_csv_file_name,
     supporting_material_root_dir,
 ):
     MAX_ORCID_ENTRIES = 5
@@ -66,10 +68,18 @@ def validate_reagent_resources(
         zenodo_dict = json.load(fp)
     orcids = [data["orcid"].strip() for data in zenodo_dict["creators"]] + ["NA"]
 
+    # Get the list of vendor names from the vendor_urls.csv file name, column
+    # titled "Vendor"
+    vendor_names = pd.read_csv(vendors_csv_file_name)["Vendor"].to_list()
+
+    # Add the ORCIDs and vendor names to the configuration dictionary to
+    # enforce column content to be in a set of values
     if "column_is_in" in configuration_dict:
         configuration_dict["column_is_in"]["Contributor"] = orcids
+        configuration_dict["column_is_in"]["Vendor"] = vendor_names
     else:
         configuration_dict["column_is_in"] = {"Contributor": orcids}
+        configuration_dict["column_is_in"] = {"Vendor": vendor_names}
 
     if "multi_value_column_is_in" in configuration_dict:
         configuration_dict["multi_value_column_is_in"]["Agree"] = orcids
@@ -101,7 +111,7 @@ def validate_reagent_resources(
     # Check that the Contributor ORCID appears in the Agree or Disagree column.
     # In most cases it will be in the Agree column. When the recommendation
     # is refuted (multiple validators disagreed with the original contributor)
-    # and changed to from Yes to No or vice versa the ORCIDs from the Agree and
+    # and changed from Yes to No or vice versa the ORCIDs from the Agree and
     # Disagree columns are swapped and the original contributors ORCID will appear
     # in the Disagree column.
     df["Agree"] = df["Agree"].apply(
@@ -159,7 +169,7 @@ def validate_reagent_resources(
     unique_target_conjugate = df[
         ["Target Name / Protein Biomarker", "Conjugate"]
     ].drop_duplicates()
-    # md_file_paths_from_csv, status = unique_target_conjugate.apply(
+
     res = unique_target_conjugate.apply(
         lambda target_conjugate: validate_supporting_material(
             target_conjugate, df, supporting_material_root_dir
@@ -314,10 +324,10 @@ def validate_supporting_material(
             ] = supporting_orcid_configurations["Disagree"].apply(
                 lambda x: frozenset([s[1:-1] for s in re.findall(orcid_pattern, x)])
             )
-            supporting_orcid_configurations[
-                "Contributor"
-            ] = supporting_orcid_configurations["Contributor"].apply(
-                lambda x: re.findall(orcid_pattern, x)[0][1:-1]
+            supporting_orcid_configurations["Contributor"] = (
+                supporting_orcid_configurations["Contributor"].apply(
+                    lambda x: re.findall(orcid_pattern, x)[0][1:-1]
+                )
             )
             # Compare the configuration data from the supporting material to that from the reagent_resources file.
             # We don't use DataFrame.equal because that assumes the order of the columns and indexes is the same,
@@ -362,6 +372,11 @@ def main(argv=None):
         type=lambda x: file_path_endswith(x, ".json"),
         help=".zenodo.json file which contains the ORCIDs of all contributors.",
     )
+    parser.add_argument(
+        "vendors_csv_file",
+        type=lambda x: csv_path(x, required_columns={"Vendor"}),
+        help="csv file containing all valid vendor names in a column titled 'Vendor'.",
+    )
     parser.add_argument(
         "supporting_material_root_dir",
         type=dir_path,
@@ -373,6 +388,7 @@ def main(argv=None):
         args.csv_file,
         args.json_config_file,
         args.zenodo_json_file,
+        args.vendors_csv_file,
         args.supporting_material_root_dir,
     )
 
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
index 0a6d26a..9f920bd 100644
--- a/tests/test_scripts.py
+++ b/tests/test_scripts.py
@@ -449,25 +449,33 @@ def test_validate_image_resources(
 
 class TestReagentResourcesValidation(BaseTest):
     @pytest.mark.parametrize(
-        "json_config, input_csv, zenodo_json, supporting_material_root_dir, result",
+        "json_config, input_csv, zenodo_json, vendor_csv, supporting_material_root_dir, result",
         [
             (
                 "reagent_resources.json",
                 "reagent_resources.csv",
                 "zenodo.json",
+                "vendors_and_urls.csv",
                 "supporting_material",
                 0,
             ),
         ],
     )
     def test_validate_reagent_resources(
-        self, json_config, input_csv, zenodo_json, supporting_material_root_dir, result
+        self,
+        json_config,
+        input_csv,
+        zenodo_json,
+        vendor_csv,
+        supporting_material_root_dir,
+        result,
     ):
         assert (
             validate_reagent_resources(
                 str(self.data_path / input_csv),
                 str(self.data_path / json_config),
                 str(self.data_path / zenodo_json),
+                str(self.data_path / vendor_csv),
                 str(self.data_path / supporting_material_root_dir),
             )
             == result