diff --git a/folder.py b/folder.py index 6cb88a9dd..519069e18 100644 --- a/folder.py +++ b/folder.py @@ -205,7 +205,7 @@ def precheck_folder_secure(ctx, coll): found, last_run = get_last_run_time(ctx, coll) if (not correct_copytovault_start_status(ctx, coll) or not correct_copytovault_start_location(coll) - or not misc.last_run_time_acceptable(coll, found, last_run, config.vault_copy_backoff_time)): + or not misc.last_run_time_acceptable(found, last_run, config.vault_copy_backoff_time)): return False return True diff --git a/unit-tests/test_util_misc.py b/unit-tests/test_util_misc.py index 72571daa5..bd311c593 100644 --- a/unit-tests/test_util_misc.py +++ b/unit-tests/test_util_misc.py @@ -13,6 +13,7 @@ from misc import check_data_package_system_avus, human_readable_size, last_run_time_acceptable, remove_empty_objects +# AVs of a successfully published data package, that is the first version of the package avs_success_data_package = { "org_publication_accessRestriction": "Open - freely retrievable", "org_publication_anonymousAccess": "yes", @@ -35,6 +36,60 @@ "org_publication_versionDOI": "10.00012/UU01-ICGVFV", "org_publication_versionDOIMinted": "yes", } + +avs_success_data_package_multiversion = { + "org_publication_accessRestriction": "Open - freely retrievable", + "org_publication_anonymousAccess": "yes", + "org_publication_approval_actor": "datamanager#tempZone", + "org_publication_baseDOI": "10.00012/UU01-X0GU3S", + "org_publication_baseDOIMinted": "yes", + "org_publication_baseRandomId": "X0GU3S", + "org_publication_combiJsonPath": "/tempZone/yoda/publication/YU0JDH-combi.json", + "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/YU0JDH-dataCite.json", + "org_publication_dataCiteMetadataPosted": "yes", + "org_publication_landingPagePath": "/tempZone/yoda/publication/YU0JDH.html", + "org_publication_landingPageUploaded": "yes", + "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/YU0JDH.html", + "org_publication_lastModifiedDateTime": "2024-10-11T08:49:17.000000", + "org_publication_license": "Custom", + "org_publication_oaiUploaded": "yes", + "org_publication_previous_version": "/tempZone/home/vault-initial1/new-group01[1728550839]", + "org_publication_publicationDate": "2024-10-11T08:50:01.812220", + "org_publication_randomId": "YU0JDH", + "org_publication_status": "OK", + "org_publication_submission_actor": "datamanager#tempZone", + "org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728629336]", + "org_publication_versionDOI": "10.00012/UU01-YU0JDH", + "org_publication_versionDOIMinted": "yes" +} + +avs_success_data_package_multiversion_first = { + "org_publication_accessRestriction": "Open - freely retrievable", + "org_publication_anonymousAccess": "yes", + "org_publication_approval_actor": "datamanager#tempZone", + "org_publication_baseDOI": "10.00012/UU01-X0GU3S", + "org_publication_baseRandomId": "X0GU3S", + "org_publication_combiJsonPath": "/tempZone/yoda/publication/T8D8QU-combi.json", + "org_publication_dataCiteJsonPath": "/tempZone/yoda/publication/T8D8QU-dataCite.json", + "org_publication_dataCiteMetadataPosted": "yes", + "org_publication_landingPagePath": "/tempZone/yoda/publication/T8D8QU.html", + "org_publication_landingPageUploaded": "yes", + "org_publication_landingPageUrl": "https://public.yoda.test/allinone/UU01/T8D8QU.html", + "org_publication_lastModifiedDateTime": "2024-10-10T09:06:05.000000", + "org_publication_license": "Creative Commons Attribution 4.0 International Public License", + "org_publication_licenseUri": "https://creativecommons.org/licenses/by/4.0/legalcode", + "org_publication_next_version": "/tempZone/home/vault-initial1/new-group01[1728545387]", + "org_publication_oaiUploaded": "yes", + "org_publication_publicationDate": "2024-10-10T09:06:02.177810", + "org_publication_randomId": "T8D8QU", + "org_publication_status": "OK", + "org_publication_submission_actor": "datamanager#tempZone", + "org_publication_vaultPackage": "/tempZone/home/vault-initial1/new-group01[1728543897]", + "org_publication_versionDOI": "10.00012/UU01-T8D8QU", + "org_publication_versionDOIMinted": "yes", +} + +# From avu.py Avu = namedtuple('Avu', list('avu')) Avu.attr = Avu.a Avu.value = Avu.v @@ -44,6 +99,9 @@ class UtilMiscTest(TestCase): def test_check_data_package_system_avus(self): + # TODO switch to dictionary? + # TODO should I be concerned when there are multiple versions of avus + # Success avs = avs_success_data_package avus_success = [Avu(attr, val, "") for attr, val in avs.items()] @@ -53,6 +111,24 @@ def test_check_data_package_system_avus(self): self.assertTrue(len(result['missing_avus']) == 0) self.assertTrue(len(result['unexpected_avus']) == 0) + # Missing license Uri for non-custom license + del avs['org_publication_licenseUri'] + avus_missing_license_uri = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_missing_license_uri) + self.assertFalse(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 1) + self.assertTrue(len(result['unexpected_avus']) == 0) + + # Custom license, no license Uri (happy flow) + avs['org_publication_license'] = "Custom" + avus_custom_license = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_custom_license) + self.assertTrue(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 0) + self.assertTrue(len(result['unexpected_avus']) == 0) + # Unexpected avs['org_publication_userAddedSomethingWeird'] = "yodayoda:)" avus_unexpected = [Avu(attr, val, "") for attr, val in avs.items()] @@ -80,25 +156,43 @@ def test_check_data_package_system_avus(self): self.assertTrue(len(result['missing_avus']) == 1) self.assertTrue(len(result['unexpected_avus']) == 0) + # Success, latest version of a publication + avs = avs_success_data_package_multiversion + avus_success = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_success) + self.assertTrue(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 0) + self.assertTrue(len(result['unexpected_avus']) == 0) + + # Success, first version of a publication that has had other versions + avs = avs_success_data_package_multiversion_first + avus_success = [Avu(attr, val, "") for attr, val in avs.items()] + result = check_data_package_system_avus(avus_success) + self.assertTrue(result['no_missing_avus']) + self.assertTrue(result['no_unexpected_avus']) + self.assertTrue(len(result['missing_avus']) == 0) + self.assertTrue(len(result['unexpected_avus']) == 0) + def test_last_run_time_acceptable(self): """Test the last run time for copy to vault""" # No last run time (job hasn't been tried before) found = False last_run = 1 - self.assertEqual(last_run_time_acceptable("b", found, last_run, 300), True) + self.assertEqual(last_run_time_acceptable(found, last_run, 300), True) # Last run time greater than the backoff, so can run now = int(time.time()) found = True copy_backoff_time = 300 last_run = now - copy_backoff_time - 1 - self.assertEqual(last_run_time_acceptable("b", found, last_run, copy_backoff_time), True) + self.assertEqual(last_run_time_acceptable(found, last_run, copy_backoff_time), True) # Last run time more recent than the backoff, so should not run found = True copy_backoff_time = 300 last_run = now - self.assertEqual(last_run_time_acceptable("b", found, int(time.time()), copy_backoff_time), False) + self.assertEqual(last_run_time_acceptable(found, int(time.time()), copy_backoff_time), False) def test_human_readable_size(self): output = human_readable_size(0) diff --git a/util/misc.py b/util/misc.py index 57643d623..05696a0ee 100644 --- a/util/misc.py +++ b/util/misc.py @@ -17,41 +17,62 @@ def check_data_package_system_avus(extracted_avus): This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from a successfully published data package. - :param extracted_avus: AVUs of the data package + :param extracted_avus: AVUs of the data package in AVU form :returns: Dictionary of the results of the check """ - # Filter those starting with 'org_' - extracted_avus = {m.attr for m in extracted_avus if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_')} + # Filter those starting with 'org_publication' + extracted_avs = {} + for m in extracted_avus: + if m.attr.startswith(constants.UUORGMETADATAPREFIX + 'publication_'): + extracted_avs[m.attr] = m.value + extracted_attrs = set(extracted_avs.keys()) # Define the set of ground truth AVUs - avu_names_suffix = [ + avu_names_suffix = { 'publication_approval_actor', 'publication_randomId', 'publication_versionDOI', 'publication_dataCiteJsonPath', 'publication_license', 'publication_anonymousAccess', 'publication_versionDOIMinted', 'publication_accessRestriction', 'publication_landingPagePath', - 'publication_licenseUri', 'publication_publicationDate', + 'publication_publicationDate', 'publication_vaultPackage', 'publication_submission_actor', 'publication_status', 'publication_lastModifiedDateTime', 'publication_combiJsonPath', 'publication_landingPageUploaded', 'publication_oaiUploaded', 'publication_landingPageUrl', 'publication_dataCiteMetadataPosted' - ] + } + + # If the license is not Custom, it must have a licenseUri + if constants.UUORGMETADATAPREFIX + 'publication_license' in extracted_attrs: + if extracted_avs[constants.UUORGMETADATAPREFIX + 'publication_license'] != "Custom": + avu_names_suffix.add('publication_licenseUri') - # Define set of AVUs with more than one version of publication - avu_names_base_suffix = [ + # Define additional set of AVUs with more than one version of publication + avu_names_version_suffix = { 'publication_previous_version', 'publication_baseDOI', 'publication_baseRandomId', 'publication_baseDOIMinted' - ] + } - if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_avus: - combined_avu_names_suffix = avu_names_base_suffix + avu_names_suffix - ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix} - else: - ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in avu_names_suffix} + # Define additional set of AVUs expected for the first version of a publication, when there are multiple versions + avu_names_first_version_suffix = { + 'publication_baseRandomId', 'publication_baseDOI', 'publication_next_version' + } + + # for the second version, all we need is next_version in addition to avu_names_version_suffix + avu_names_previous_version_suffix = {'publication_next_version'} + + combined_avu_names_suffix = avu_names_suffix + + if constants.UUORGMETADATAPREFIX + 'publication_previous_version' in extracted_attrs: + combined_avu_names_suffix.update(avu_names_version_suffix) + if constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs: + combined_avu_names_suffix.update(avu_names_previous_version_suffix) + elif constants.UUORGMETADATAPREFIX + 'publication_next_version' in extracted_attrs: + combined_avu_names_suffix.update(avu_names_first_version_suffix) + ground_truth_avus = {constants.UUORGMETADATAPREFIX + name for name in combined_avu_names_suffix} # Find missing and unexpected AVUs - missing_avus = ground_truth_avus - extracted_avus - unexpected_avus = extracted_avus - ground_truth_avus + missing_avus = ground_truth_avus - extracted_attrs + unexpected_avus = extracted_attrs - ground_truth_avus results = { 'no_missing_avus': not bool(missing_avus), @@ -63,7 +84,7 @@ def check_data_package_system_avus(extracted_avus): return results -def last_run_time_acceptable(coll, found, last_run, config_backoff_time): +def last_run_time_acceptable(found, last_run, config_backoff_time): """Return whether the last run time is acceptable to continue with task.""" now = int(time.time())