Skip to content

Commit

Permalink
Merge pull request #35 from zivy/improveReagentResourcesValidation
Browse files Browse the repository at this point in the history
Improve reagent resources validation
  • Loading branch information
zivy authored Jul 19, 2024
2 parents 50c4b65 + 82994f8 commit 5d3b3d0
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 10 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ repos:
- id: check-json
- id: check-yaml
- repo: https://github.com/psf/black
rev: 22.12.0
rev: 24.4.2
hooks: # check conformance to black formatting
- id: black
args: ['--check'] # if run without arguments, will fail and will format the files
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,11 @@ needed. This is equivalent to summarizing all activity on a feature branch versu

## Unreleased

## v0.8.2

### Changed
* validate_image_resources - Check that the vendor listed in the reagent_resources.csv is from the list of vendors found in the vendor_urls.csv.

## v0.8.1

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ibex_imaging_knowledge_base_utilities"
version = "0.8.1"
version = "0.8.2"
authors = [{ name="Ziv Yaniv", email="[email protected]" },
]
description = "Utility scripts used for managing the IBEX Imaging Community Knowledge-Base"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from ibex_imaging_knowledge_base_utilities.argparse_types import (
file_path_endswith,
dir_path,
csv_path,
)
from .utilities import validate_df

Expand All @@ -47,6 +48,7 @@ def validate_reagent_resources(
csv_file_name,
json_config_file_name,
zenodo_json_file_name,
vendors_csv_file_name,
supporting_material_root_dir,
):
MAX_ORCID_ENTRIES = 5
Expand All @@ -66,10 +68,18 @@ def validate_reagent_resources(
zenodo_dict = json.load(fp)
orcids = [data["orcid"].strip() for data in zenodo_dict["creators"]] + ["NA"]

# Get the list of vendor names from the vendor_urls.csv file name, column
# titled "Vendor"
vendor_names = pd.read_csv(vendors_csv_file_name)["Vendor"].to_list()

# Add the ORCIDs and vendor names to the configuration dictionary to
# enforce column content to be in a set of values
if "column_is_in" in configuration_dict:
configuration_dict["column_is_in"]["Contributor"] = orcids
configuration_dict["column_is_in"]["Vendor"] = vendor_names
else:
configuration_dict["column_is_in"] = {"Contributor": orcids}
configuration_dict["column_is_in"] = {"Vendor": vendor_names}

if "multi_value_column_is_in" in configuration_dict:
configuration_dict["multi_value_column_is_in"]["Agree"] = orcids
Expand Down Expand Up @@ -101,7 +111,7 @@ def validate_reagent_resources(
# Check that the Contributor ORCID appears in the Agree or Disagree column.
# In most cases it will be in the Agree column. When the recommendation
# is refuted (multiple validators disagreed with the original contributor)
# and changed to from Yes to No or vice versa the ORCIDs from the Agree and
# and changed from Yes to No or vice versa the ORCIDs from the Agree and
# Disagree columns are swapped and the original contributors ORCID will appear
# in the Disagree column.
df["Agree"] = df["Agree"].apply(
Expand Down Expand Up @@ -159,7 +169,7 @@ def validate_reagent_resources(
unique_target_conjugate = df[
["Target Name / Protein Biomarker", "Conjugate"]
].drop_duplicates()
# md_file_paths_from_csv, status = unique_target_conjugate.apply(

res = unique_target_conjugate.apply(
lambda target_conjugate: validate_supporting_material(
target_conjugate, df, supporting_material_root_dir
Expand Down Expand Up @@ -314,10 +324,10 @@ def validate_supporting_material(
] = supporting_orcid_configurations["Disagree"].apply(
lambda x: frozenset([s[1:-1] for s in re.findall(orcid_pattern, x)])
)
supporting_orcid_configurations[
"Contributor"
] = supporting_orcid_configurations["Contributor"].apply(
lambda x: re.findall(orcid_pattern, x)[0][1:-1]
supporting_orcid_configurations["Contributor"] = (
supporting_orcid_configurations["Contributor"].apply(
lambda x: re.findall(orcid_pattern, x)[0][1:-1]
)
)
# Compare the configuration data from the supporting material to that from the reagent_resources file.
# We don't use DataFrame.equal because that assumes the order of the columns and indexes is the same,
Expand Down Expand Up @@ -362,6 +372,11 @@ def main(argv=None):
type=lambda x: file_path_endswith(x, ".json"),
help=".zenodo.json file which contains the ORCIDs of all contributors.",
)
parser.add_argument(
"vendors_csv_file",
type=lambda x: csv_path(x, required_columns={"Vendor"}),
help="csv file containing all valid vendor names in a column titled 'Vendor'.",
)
parser.add_argument(
"supporting_material_root_dir",
type=dir_path,
Expand All @@ -373,6 +388,7 @@ def main(argv=None):
args.csv_file,
args.json_config_file,
args.zenodo_json_file,
args.vendors_csv_file,
args.supporting_material_root_dir,
)

Expand Down
12 changes: 10 additions & 2 deletions tests/test_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,25 +449,33 @@ def test_validate_image_resources(

class TestReagentResourcesValidation(BaseTest):
@pytest.mark.parametrize(
"json_config, input_csv, zenodo_json, supporting_material_root_dir, result",
"json_config, input_csv, zenodo_json, vendor_csv, supporting_material_root_dir, result",
[
(
"reagent_resources.json",
"reagent_resources.csv",
"zenodo.json",
"vendors_and_urls.csv",
"supporting_material",
0,
),
],
)
def test_validate_reagent_resources(
self, json_config, input_csv, zenodo_json, supporting_material_root_dir, result
self,
json_config,
input_csv,
zenodo_json,
vendor_csv,
supporting_material_root_dir,
result,
):
assert (
validate_reagent_resources(
str(self.data_path / input_csv),
str(self.data_path / json_config),
str(self.data_path / zenodo_json),
str(self.data_path / vendor_csv),
str(self.data_path / supporting_material_root_dir),
)
== result
Expand Down

0 comments on commit 5d3b3d0

Please sign in to comment.