Skip to content

Commit

Permalink
Unittest: check system avus
Browse files Browse the repository at this point in the history
  • Loading branch information
claravox committed Oct 8, 2024
1 parent 1af322a commit 9a59a96
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 55 deletions.
8 changes: 4 additions & 4 deletions integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,16 +350,16 @@ def _test_folder_secure_func(ctx, func):
"test": lambda ctx: _call_msvc_json_arrayops(ctx, '["a", "b", "c"]', "", "size", 0, 3),
"check": lambda x: x == 3},
{"name": "msvc.json_objops.add_notexist_empty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add', 0),
"test": lambda ctx: _call_msvc_json_objops(ctx, '', msi.kvpair(ctx, "e", "f"), 'add', 0),
"check": lambda x: x == '{"e": "f"}'},
{"name": "msvc.json_objops.add_notexist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add', 0),
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "f"), 'add', 0),
"check": lambda x: x == '{"a": "b", "e": "f"}'},
{"name": "msvc.json_objops.add_exist_nonempty",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add', 0),
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b"}', msi.kvpair(ctx, "e", "g"), 'add', 0),
"check": lambda x: x == '{"a": "b", "e": "g"}'},
{"name": "msvc.json_objops.get_exist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get', 1),
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "c", ""), 'get', 1),
"check": lambda x: str(x) == "(['c'], ['d'])"},
{"name": "msvc.json_objops.get_notexist",
"test": lambda ctx: _call_msvc_json_objops(ctx, '{"a": "b", "c": "d"}', msi.kvpair(ctx, "e", ""), 'get', 1),
Expand Down
1 change: 0 additions & 1 deletion tools/troubleshoot-published-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ def parse_args():
help="If actions should be performed without connecting to external servers (needed for the Yoda team's development setup).")
parser.add_argument("-p", "--package", type=str, required=False,
help="Troubleshoot a specific data package by name (default: troubleshoot all packages)")
# TODO argument to optionally add an avu with json status info: time of check and was it a success
return parser.parse_args()


Expand Down
57 changes: 11 additions & 46 deletions troubleshoot_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,63 +75,27 @@ def find_data_packages(ctx, write_stdout):
return []


def check_data_package_system_avus(ctx, data_package, write_stdout):
def check_print_data_package_system_avus(ctx, data_package, write_stdout):
"""
Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_').
This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
a successfully published data package.
This also prints if there are any missing or unexpected results.
:param ctx: Combined type of a callback and rei struct
:param data_package: String representing the data package collection path.
:param write_stdout: A boolean representing whether to write to stdout or rodsLog
:returns: A tuple containing boolean results of checking results
:returns: A 2-tuple containing boolean results of checking results
"""
extracted_avus = avu.of_coll(ctx, data_package)
results = misc.check_data_package_system_avus(extracted_avus)

# Fetch AVUs of the data package and filter those starting with 'org_'
extracted_avus = {m.attr for m in avu.of_coll(ctx, data_package) if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')}

# Define the set of ground truth AVUs
avu_names_suffix = [
'publication_approval_actor', 'publication_randomId',
'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license',
'publication_anonymousAccess', 'publication_versionDOIMinted',
'publication_accessRestriction', 'publication_landingPagePath',
'publication_licenseUri', 'publication_publicationDate',
'publication_vaultPackage', 'publication_submission_actor', 'publication_status',
'publication_lastModifiedDateTime', 'publication_combiJsonPath',
'publication_landingPageUploaded', 'publication_oaiUploaded',
'publication_landingPageUrl', 'publication_dataCiteMetadataPosted'
]

# Define set of AVUs with more than one version of publication
avu_names_base_suffix = [
'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId',
'publication_baseDOIMinted'
]

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus:
combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
else:
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix}

# Find missing and unexpected AVUs
missing_avus = ground_truth_avus - extracted_avus
unexpected_avus = extracted_avus - ground_truth_avus

results = {
'no_missing_avus': not bool(missing_avus),
'missing_avus': list(missing_avus),
'no_unexpected_avus': not bool(unexpected_avus),
'unexpected_avus': list(unexpected_avus)
}

if missing_avus:
log.write(ctx, "check_data_package_system_avus: There are some missing AVUs in data package <{}> - {}".format(data_package, list(missing_avus)), write_stdout)
if not results["no_missing_avus"]:
log.write(ctx, "check_data_package_system_avus: There are some missing AVUs in data package <{}> - {}".format(data_package, list(results["missing_avus"])), write_stdout)

if unexpected_avus:
log.write(ctx, "check_data_package_system_avus: There are some unexpected AVUs in data package <{}> - {}".format(data_package, list(unexpected_avus)), write_stdout)
if not results["no_unexpected_avus"]:
log.write(ctx, "check_data_package_system_avus: There are some unexpected AVUs in data package <{}> - {}".format(data_package, list(results["unexpected_avus"])), write_stdout)

return (results["no_missing_avus"], results["no_unexpected_avus"])

Expand Down Expand Up @@ -406,11 +370,12 @@ def batch_troubleshoot_published_data_packages(ctx, requested_package, log_file,
for data_package in data_packages:
log.write(ctx, "Troubleshooting data package: {}".format(data_package), write_stdout)
result = {}
# Cannot check the metadata as technicaladmin
if not api_call:
schema_check_dict = vault_metadata_matches_schema(ctx, data_package, schema_cache, "troubleshoot-publications", write_stdout)
result['schema_check'] = schema_check_dict['match_schema'] if schema_check_dict else False

result['no_missing_AVUs_check'], result['no_unexpected_AVUs_check'] = check_data_package_system_avus(ctx, data_package, write_stdout)
result['no_missing_AVUs_check'], result['no_unexpected_AVUs_check'] = check_print_data_package_system_avus(ctx, data_package, write_stdout)
result['versionDOI_check'], result['baseDOI_check'] = check_datacite_doi_registration(ctx, data_package, offline, write_stdout)
result['landingPage_check'] = check_landingpage(ctx, data_package, offline, api_call)
publication_config = get_publication_config(ctx)
Expand Down
70 changes: 67 additions & 3 deletions unit-tests/test_util_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,83 @@

import sys
import time
from collections import OrderedDict
from collections import namedtuple, OrderedDict
from unittest import TestCase

sys.path.append('../util')

from misc import human_readable_size, last_run_time_acceptable, remove_empty_objects
from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects

avs_success_data_package = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
"org_publication_approval_actor": "datamanager#tempZone",
"org_publication_combiJsonPath": "/tempZone/yoda/publication/ICGVFV-combi.json",
"org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/ICGVFV-dataCite.json",
"org_publication_dataCiteMetadataPosted": "yes",
"org_publication_landingPagePath": "/tempZone/yoda/publication/ICGVFV.html",
"org_publication_landingPageUploaded": "yes",
"org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/ICGVFV.html",
"org_publication_lastModifiedDateTime": "2024-10-04T15:32:46.000000",
"org_publication_license": "Creative Commons Attribution 4.0 International Public License",
"org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
"org_publication_oaiUploaded": "yes",
"org_publication_publicationDate": "2024-10-04T15:33:17.853806",
"org_publication_randomId": "ICGVFV",
"org_publication_status": "OK",
"org_publication_submission_actor": "researcher#tempZone",
"org_publication_vaultPackage": "/tempZone/home/vault-default-3/research-default-3[1728048679]",
"org_publication_versionDOI": "10.00012/UU01-ICGVFV",
"org_publication_versionDOIMinted": "yes",
}
Avu = namedtuple('Avu', list('avu'))
Avu.attr = Avu.a
Avu.value = Avu.v
Avu.unit = Avu.u


class UtilMiscTest(TestCase):

def test_check_data_package_system_avus(self):
# Success
avs = avs_success_data_package
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_success)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Unexpected
avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)"
avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_unexpected)
self.assertTrue(result['no_missing_avus'])
self.assertFalse(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 1)

# Missing and unexpected
del avs['org_publication_landingPagePath']
avus_missing_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_missing_unexpected)
self.assertFalse(result['no_missing_avus'])
self.assertFalse(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 1)

# Missing
del avs['org_publication_userAddedSomethingWeird']
avus_missing = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_missing)
self.assertFalse(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 0)

def test_last_run_time_acceptable(self):
"""Test the last run time for copy to vault"""
# No last run time (job hasn't be tried before)
# No last run time (job hasn't been tried before)
found = False
last_run = 1
self.assertEqual(last_run_time_acceptable("b", found, last_run, 300), True)
Expand Down
56 changes: 55 additions & 1 deletion util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,60 @@
import time
from collections import OrderedDict

import constants


def check_data_package_system_avus(extracted_avus):
"""
Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_').
This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
a successfully published data package.
:param extracted_avus: AVUs of the data package
:returns: Dictionary of the results of the check
"""
# Filter those starting with 'org_'
extracted_avus = {m.attr for m in extracted_avus if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')}

# Define the set of ground truth AVUs
avu_names_suffix = [
'publication_approval_actor', 'publication_randomId',
'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license',
'publication_anonymousAccess', 'publication_versionDOIMinted',
'publication_accessRestriction', 'publication_landingPagePath',
'publication_licenseUri', 'publication_publicationDate',
'publication_vaultPackage', 'publication_submission_actor', 'publication_status',
'publication_lastModifiedDateTime', 'publication_combiJsonPath',
'publication_landingPageUploaded', 'publication_oaiUploaded',
'publication_landingPageUrl', 'publication_dataCiteMetadataPosted'
]

# Define set of AVUs with more than one version of publication
avu_names_base_suffix = [
'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId',
'publication_baseDOIMinted'
]

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus:
combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
else:
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix}

# Find missing and unexpected AVUs
missing_avus = ground_truth_avus - extracted_avus
unexpected_avus = extracted_avus - ground_truth_avus

results = {
'no_missing_avus': not bool(missing_avus),
'missing_avus': list(missing_avus),
'no_unexpected_avus': not bool(unexpected_avus),
'unexpected_avus': list(unexpected_avus)
}

return results


def last_run_time_acceptable(coll, found, last_run, config_backoff_time):
"""Return whether the last run time is acceptable to continue with task."""
Expand Down Expand Up @@ -48,5 +102,5 @@ def remove_empty_objects(d):
# Clean lists by filtering out empty objects.
return [remove_empty_objects(item) for item in d if remove_empty_objects(item) not in (None, '', {}, [])]
else:
# Return the value abecause it is not a dict or list.
# Return the value because it is not a dict or list.
return d

0 comments on commit 9a59a96

Please sign in to comment.