Skip to content

Commit

Permalink
Refine system AVUs check and unit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
claravox committed Oct 14, 2024
1 parent 929b73f commit d82bd4c
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 16 deletions.
91 changes: 91 additions & 0 deletions unit-tests/test_util_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects

# AVs of a successfully published data package, that is the first version of the package
avs_success_data_package = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
Expand All @@ -35,6 +36,60 @@
"org_publication_versionDOI": "10.00012/UU01-ICGVFV",
"org_publication_versionDOIMinted": "yes",
}

avs_success_data_package_multiversion = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
"org_publication_approval_actor": "datamanager#tempZone",
"org_publication_baseDOI": "10.00012/UU01-X0GU3S",
"org_publication_baseDOIMinted": "yes",
"org_publication_baseRandomId": "X0GU3S",
"org_publication_combiJsonPath": "/tempZone/yoda/publication/YU0JDH-combi.json",
"org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/YU0JDH-dataCite.json",
"org_publication_dataCiteMetadataPosted": "yes",
"org_publication_landingPagePath": "/tempZone/yoda/publication/YU0JDH.html",
"org_publication_landingPageUploaded": "yes",
"org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/YU0JDH.html",
"org_publication_lastModifiedDateTime": "2024-10-11T08:49:17.000000",
"org_publication_license": "Custom",
"org_publication_oaiUploaded": "yes",
"org_publication_previous_version": "/tempZone/home/vault-initial1/new-group01[1728550839]",
"org_publication_publicationDate": "2024-10-11T08:50:01.812220",
"org_publication_randomId": "YU0JDH",
"org_publication_status": "OK",
"org_publication_submission_actor": "datamanager#tempZone",
"org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728629336]",
"org_publication_versionDOI": "10.00012/UU01-YU0JDH",
"org_publication_versionDOIMinted": "yes"
}

avs_success_data_package_multiversion_first = {
"org_publication_accessRestriction": "Open - freely retrievable",
"org_publication_anonymousAccess": "yes",
"org_publication_approval_actor": "datamanager#tempZone",
"org_publication_baseDOI": "10.00012/UU01-X0GU3S",
"org_publication_baseRandomId": "X0GU3S",
"org_publication_combiJsonPath": "/tempZone/yoda/publication/T8D8QU-combi.json",
"org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/T8D8QU-dataCite.json",
"org_publication_dataCiteMetadataPosted": "yes",
"org_publication_landingPagePath": "/tempZone/yoda/publication/T8D8QU.html",
"org_publication_landingPageUploaded": "yes",
"org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/T8D8QU.html",
"org_publication_lastModifiedDateTime": "2024-10-10T09:06:05.000000",
"org_publication_license": "Creative Commons Attribution 4.0 International Public License",
"org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode",
"org_publication_next_version": "/tempZone/home/vault-initial1/new-group01[1728545387]",
"org_publication_oaiUploaded": "yes",
"org_publication_publicationDate": "2024-10-10T09:06:02.177810",
"org_publication_randomId": "T8D8QU",
"org_publication_status": "OK",
"org_publication_submission_actor": "datamanager#tempZone",
"org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728543897]",
"org_publication_versionDOI": "10.00012/UU01-T8D8QU",
"org_publication_versionDOIMinted": "yes",
}

# From avu.py
Avu = namedtuple('Avu', list('avu'))
Avu.attr = Avu.a
Avu.value = Avu.v
Expand All @@ -53,6 +108,24 @@ def test_check_data_package_system_avus(self):
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Missing license Uri for non-custom license
del avs['org_publication_licenseUri']
avus_missing_license_uri = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_missing_license_uri)
self.assertFalse(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Custom license, no license Uri (happy flow)
avs['org_publication_license'] = "Custom"
avus_custom_license = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_custom_license)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Unexpected
avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)"
avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()]
Expand Down Expand Up @@ -80,6 +153,24 @@ def test_check_data_package_system_avus(self):
self.assertTrue(len(result['missing_avus']) == 1)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Success, latest version of a publication
avs = avs_success_data_package_multiversion
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_success)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

# Success, first version of a publication that has had other versions
avs = avs_success_data_package_multiversion_first
avus_success = [Avu(attr, val, "") for attr, val in avs.items()]
result = check_data_package_system_avus(avus_success)
self.assertTrue(result['no_missing_avus'])
self.assertTrue(result['no_unexpected_avus'])
self.assertTrue(len(result['missing_avus']) == 0)
self.assertTrue(len(result['unexpected_avus']) == 0)

def test_last_run_time_acceptable(self):
"""Test the last run time for copy to vault"""
# No last run time (job hasn't be tried before)
Expand Down
53 changes: 37 additions & 16 deletions util/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,62 @@ def check_data_package_system_avus(extracted_avus):
This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from
a successfully published data package.
:param extracted_avus: AVUs of the data package
:param extracted_avus: AVUs of the data package in AVU form
:returns: Dictionary of the results of the check
"""
# Filter those starting with 'org_'
extracted_avus = {m.attr for m in extracted_avus if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')}
# Filter those starting with 'org_publication'
extracted_avs = {}
for m in extracted_avus:
if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_'):
extracted_avs[m.attr] = m.value
extracted_attrs = set(extracted_avs.keys())

# Define the set of ground truth AVUs
avu_names_suffix = [
avu_names_suffix = {
'publication_approval_actor', 'publication_randomId',
'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license',
'publication_anonymousAccess', 'publication_versionDOIMinted',
'publication_accessRestriction', 'publication_landingPagePath',
'publication_licenseUri', 'publication_publicationDate',
'publication_publicationDate',
'publication_vaultPackage', 'publication_submission_actor', 'publication_status',
'publication_lastModifiedDateTime', 'publication_combiJsonPath',
'publication_landingPageUploaded', 'publication_oaiUploaded',
'publication_landingPageUrl', 'publication_dataCiteMetadataPosted'
]
}

# If the license is not Custom, it must have a licenseUri
if constants.UUORGMETADATAPREFIX + 'publication_license' in extracted_attrs:
if extracted_avs[constants.UUORGMETADATAPREFIX + 'publication_license'] != "Custom":
avu_names_suffix.add('publication_licenseUri')

# Define set of AVUs with more than one version of publication
avu_names_base_suffix = [
# Define additional set of AVUs with more than one version of publication
avu_names_version_suffix = {
'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId',
'publication_baseDOIMinted'
]
}

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus:
combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
else:
ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix}
# Define additional set of AVUs expected for the first version of a publication, when there are multiple versions
avu_names_first_version_suffix = {
'publication_baseRandomId', 'publication_baseDOI', 'publication_next_version'
}

# for the second version, all we need is next_version in addition to avu_names_version_suffix
avu_names_previous_version_suffix = {'publication_next_version'}

combined_avu_names_suffix = avu_names_suffix

if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_version_suffix)
if constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_previous_version_suffix)
elif constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs:
combined_avu_names_suffix.update(avu_names_first_version_suffix)

ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix}
# Find missing and unexpected AVUs
missing_avus = ground_truth_avus - extracted_avus
unexpected_avus = extracted_avus - ground_truth_avus
missing_avus = ground_truth_avus - extracted_attrs
unexpected_avus = extracted_attrs - ground_truth_avus

results = {
'no_missing_avus': not bool(missing_avus),
Expand Down

0 comments on commit d82bd4c

Please sign in to comment.