From b30006cb4373d7e5ded6e719f1a5305e648bcadd Mon Sep 17 00:00:00 2001 From: Harm de Raaff Date: Tue, 29 Aug 2023 12:20:32 +0200 Subject: [PATCH 01/30] YDA-5282: basic embargo lifting implementation --- json_landing_page.py | 12 ++ publication.py | 199 +++++++++++++++++++++++++++++++- templates/landingpage.html.j2 | 2 +- tools/add-embargo-indications.r | 20 ++++ tools/lift-embargos.r | 20 ++++ 5 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 tools/add-embargo-indications.r create mode 100644 tools/lift-embargos.r diff --git a/json_landing_page.py b/json_landing_page.py index c377aa9fa..5b0a5d268 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -4,6 +4,8 @@ __copyright__ = 'Copyright (c) 2019-2023, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from datetime import datetime + import jinja2 from dateutil import parser @@ -82,6 +84,15 @@ def json_landing_page_create_json_landing_page(callback, rodsZone, template_name return landing_page # Gather all metadata. + + # Is this datapackage under embargo? + no_active_embargo = True + + # Datapackage under embargo? + embargo_end_date = dictJsonData.get('Embargo_End_Date', None) + if embargo_end_date is not None and len(embargo_end_date): + no_active_embargo = (datetime.now().strftime('%Y-%m-%d') >= embargo_end_date) + title = dictJsonData['Title'] description = dictJsonData['Description'] @@ -299,6 +310,7 @@ def json_landing_page_create_json_landing_page(callback, rodsZone, template_name # tm.globals['custom_function'] = custom_function tm.globals['persistent_identifier_to_uri'] = persistent_identifier_to_uri landing_page = tm.render( + no_active_embargo=no_active_embargo, title=title, description=description, datatype=datatype, diff --git a/publication.py b/publication.py index 29a8aa737..80a371727 100644 --- a/publication.py +++ b/publication.py @@ -4,6 +4,8 @@ __copyright__ = 'Copyright (c) 2019-2023, Utrecht University' __license__ = 'GPLv3, see LICENSE' +# from datetime import datetime + from datetime import datetime import genquery @@ -21,7 +23,160 @@ __all__ = ['rule_process_publication', 'rule_process_depublication', 'rule_process_republication', - 'rule_update_publication'] + 'rule_update_publication', + 'rule_lift_embargos_on_data_access', + 'rule_add_lift_embargo_indications'] + + +@rule.make() +def rule_add_lift_embargo_indications(ctx): + """ + # PURPOSE: give all published datapackages that are under embargo a lift_embargo_date indication (for further cronjob processing) + + Each indication will hold the actual embargo date. + This will be picked up by a cronjob that will lift the embargo if applicable. + Then the indication will be removed so the datapackage will no longer be picked up + + """ + # check permissions - rodsadmin only + if user.user_type(ctx) != 'rodsadmin': + log.write(ctx, "User is no rodsadmin") + return 'Insufficient permissions - should only be called by rodsadmin' + + # select all vault packages with with embargo date - no matter when + zone = user.zone(ctx) + + # Find all packages that have embargo date + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "' AND META_COLL_ATTR_NAME = 'Embargo_End_Date'", + genquery.AS_LIST, ctx + ) + for row in iter: + vault_package = row[0] + embargo_date = row[1] + + # Only look at the PUBLISHED packages so check first + iter2 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "vault_status'", + genquery.AS_LIST, ctx + ) + + for row2 in iter2: + # Check whether lift_embargo_date is present already + iter3 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", + genquery.AS_LIST, ctx + ) + if not len(list(iter3)) > 0: + # Add lift_embargo indication + avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', embargo_date) + + return 'OK' + + +@rule.make() +def rule_lift_embargos_on_data_access(ctx): + """ + PURPOSE: Find vault packages that have a data access embargo that can be lifted as the embargo expires. + + If lift_embargo_date < now: + - new landing page (add access link) + - secure copy new landing page to server + - set acls + + If all went well => remove lift_embargo_date indication + If something went wrong leave lift_embargo_date so it will be dealt with again the next time around + + :param ctx: Combined type of a callback and rei struct + + :returns: + """ + publication_state = {} + + # check permissions - rodsadmin only + if user.user_type(ctx) != 'rodsadmin': + log.write(ctx, "User is no rodsadmin") + return 'Insufficient permissions - should only be called by rodsadmin' + + # get publication configuration + publication_config = get_publication_config(ctx) + + zone = user.zone(ctx) + + # Find all packages that have embargo date for data access that must be lifted + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "'" + " AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + 'lift_embargo_date' + "'" + " AND META_COLL_ATTR_VALUE > '{}'".format(datetime.now().strftime('%Y-%m-%d')), + genquery.AS_LIST, ctx + ) + for row in iter: + vault_package = row[0] + + log.write(ctx, "Lift embargo for package: " + vault_package) + + # Per package (re)initialize publication state + publication_state = {} + publication_state["status"] = 'OK' + + # For this vault package, that has an embargo date that needs to be lifted, find whether data access resrictions apply + iter2 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'Data_Access_Restriction'".format(vault_package), + genquery.AS_LIST, ctx + ) + for row2 in iter2: + # Check data access restriction - must be open + if row2[1].startswith('open'): + # Setup publication_state in such a way that landing page can be newly created and sent to the public host + + # First find DOI as all is hung up on that - org_publication_randomId + random_id = '' + iter3 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'org_publication_randomId'".format(vault_package), + genquery.AS_LIST, ctx + ) + for row3 in iter3: + random_id = row3[1] + + publication_state["combiJsonPath"] = '/{}/yoda/publication/{}-combi.json'.format(zone, random_id) + publication_state["randomId"] = random_id + publication_state["vaultPackage"] = vault_package + + # Adjust landing page add data access link + try: + generate_landing_page(ctx, publication_state, "publish") + # will hold publication_state["landingPagePath"] as required for secure copy + log.write(ctx, publication_state["landingPagePath"]) + except Exception: + log.write(ctx, "Error while creating landing page with data access.") + publication_state["status"] = "Unrecoverable" + + if publication_state["status"] == "OK": + # Secure copy to public host + copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) + + if publication_state.get("landingPageUploaded", None) != "yes": + log.write(ctx, 'Could not securely upload adjusted landing page to public host') + publication_state["status"] = "Unrecoverable" + else: + # Adjust ACLs so data can actually be reached + try: + msi.set_acl(ctx, "recursive", "read", "anonymous", vault_package) + except Exception: + log.write(ctx, "Could not set acls to read for: " + vault_package) + publication_state["status"] = "Unrecoverable" + + # if all went well remove the lift embargo attribute so it will not be selected again the next time around + if publication_state["status"] == 'OK': + # Only remove when embargo was lifted successfully. + # Not removing will ensure the entire process is repeated again next time around + avu.rmw_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', '%') def get_publication_config(ctx): @@ -594,16 +749,56 @@ def copy_metadata_to_moai(ctx, random_id, publication_config, publication_state) def set_access_restrictions(ctx, vault_package, publication_state): """Set access restriction for vault package. + This function is called when (re)publishing a vault package. + The embargo date of a package is essential determining access. + ALS EMBARGO => zet embargo end date in lift_embargo_date zodat later wordt opgepakt door cronjob + :param ctx: Combined type of a callback and rei struct :param vault_package: Path to the package in the vault :param publication_state: Dict with state of the publication process :returns: None """ + # Embargo handling + combiJsonPath = publication_state["combiJsonPath"] + dictJsonData = jsonutil.read(ctx, combiJsonPath, want_bytes=False) + + # Remove empty lists, empty dicts, or None elements + # to prevent empty fields on landingpage. + dictJsonData = jsonutil.remove_empty(dictJsonData) + + active_embargo = False + + # Check whether lift_embargo_date is present already + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", + genquery.AS_LIST, ctx + ) + for row in iter: + # Just get rid of the previous lift_embargo_date. + # Will be introduced again if required in below code but will keep the code more focused whether lift_date must be introduced or not. + avu.rm_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', row[1]) + + # Datapackage under embargo? + embargo_end_date = dictJsonData.get('Embargo_End_Date', None) + if embargo_end_date is not None and len(embargo_end_date): + # String comparison is possible as both are in same string format YYYY-MM-DD + active_embargo = (datetime.now().strftime('%Y-%m-%d') < embargo_end_date) + access_restriction = publication_state["accessRestriction"] - access_level = "null" + # Lift embargo handling is only interesting when package has open access. if access_restriction.startswith('Open'): + if active_embargo: + # datapackage data is under embargo. + # Add indication to metadata on vault_package so cronjob can pick it up and sets acls when embargo date is passed in the FUTURE + avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', embargo_end_date) + + # Now handle the data access taking possible embargo into account + access_level = "null" + # Only without an active embargo date AND open access is it allowed to read data! + if access_restriction.startswith('Open') and not active_embargo: access_level = "read" try: diff --git a/templates/landingpage.html.j2 b/templates/landingpage.html.j2 index 50ccd0a62..ffb30f8fd 100644 --- a/templates/landingpage.html.j2 +++ b/templates/landingpage.html.j2 @@ -94,7 +94,7 @@ {% endif%}
- {%if data_access_restriction.startswith('Open') %} + {%if no_active_embargo and data_access_restriction.startswith('Open') %} View contents diff --git a/tools/add-embargo-indications.r b/tools/add-embargo-indications.r new file mode 100644 index 000000000..516f52220 --- /dev/null +++ b/tools/add-embargo-indications.r @@ -0,0 +1,20 @@ +# Add Lift embargo inidications so the cron job to lift them can pick up these indicated vault packages +run { + uuGetUserType("$userNameClient#$rodsZoneClient", *usertype); + + if (*usertype != "rodsadmin") { + failmsg(-1, "This script needs to be run by a rodsadmin"); + } + + # Retrieve current timestamp. + msiGetIcatTime(*timestamp, "human"); + writeLine('stdout', '[' ++ *timestamp ++ '] Start adding lift embargo indications to vault packages'); + + *result = rule_add_lift_embargo_indications(); + + writeLine('stdout', 'Status: Finished adding lift embargo indications to vault packages'); + writeLine('stdout', *result); + +} +input null +output ruleExecOut diff --git a/tools/lift-embargos.r b/tools/lift-embargos.r new file mode 100644 index 000000000..94f6d15c8 --- /dev/null +++ b/tools/lift-embargos.r @@ -0,0 +1,20 @@ +# Lift embargo on data access when embargo date is passed +run { + uuGetUserType("$userNameClient#$rodsZoneClient", *usertype); + + if (*usertype != "rodsadmin") { + failmsg(-1, "This script needs to be run by a rodsadmin"); + } + + # Retrieve current timestamp. + msiGetIcatTime(*timestamp, "human"); + writeLine('stdout', '[' ++ *timestamp ++ '] Start finding data access under embargo that must be lifted'); + + *result = rule_lift_embargos_on_data_access(); + + writeLine('stdout', 'Status: Finished finding of data under embargo that must be lifted'); + writeLine('stdout', *result); + +} +input null +output ruleExecOut From 72dbefb18502dbe40b52a06fb1a98b24400e6a4e Mon Sep 17 00:00:00 2001 From: Harm de Raaff Date: Mon, 18 Sep 2023 18:39:27 +0200 Subject: [PATCH 02/30] flake8 --- publication.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/publication.py b/publication.py index 80a371727..19a0a6439 100644 --- a/publication.py +++ b/publication.py @@ -154,8 +154,8 @@ def rule_lift_embargos_on_data_access(ctx): # will hold publication_state["landingPagePath"] as required for secure copy log.write(ctx, publication_state["landingPagePath"]) except Exception: - log.write(ctx, "Error while creating landing page with data access.") - publication_state["status"] = "Unrecoverable" + log.write(ctx, "Error while creating landing page with data access.") + publication_state["status"] = "Unrecoverable" if publication_state["status"] == "OK": # Secure copy to public host @@ -771,9 +771,9 @@ def set_access_restrictions(ctx, vault_package, publication_state): # Check whether lift_embargo_date is present already iter = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", - genquery.AS_LIST, ctx + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", + genquery.AS_LIST, ctx ) for row in iter: # Just get rid of the previous lift_embargo_date. From 82afcd5454654e72c3d2cfa39ecfff317b2ea34a Mon Sep 17 00:00:00 2001 From: kaur16 <126662478+kaur16@users.noreply.github.com> Date: Wed, 23 Aug 2023 11:48:12 +0200 Subject: [PATCH 03/30] YDA-5309 - Added transformation script for existing publications. (#319) --- tools/transform-existing-publications.r | 36 +++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tools/transform-existing-publications.r diff --git a/tools/transform-existing-publications.r b/tools/transform-existing-publications.r new file mode 100644 index 000000000..f3cd1ebe6 --- /dev/null +++ b/tools/transform-existing-publications.r @@ -0,0 +1,36 @@ +#!/usr/bin/irule -r irods_rule_engine_plugin-python-instance -F +# +# Transform existing publications according to the new changes in the publication process. +# This script handles converting all the prefixes from yoda to version. +# Additionally, this script will also add prefix version to DOIAvailable and DOI Minted variables. +# +# +import subprocess +import genquery +import session_vars + +def main(rule_args, callback, rei): + zone = session_vars.get_map(rei)['client_user']['irods_zone'] + + # Changing yoda prefix -> version + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE", + "USER_ZONE = '{}' AND META_COLL_ATTR_NAME LIKE 'org_publication_yoda%'".format(zone), + genquery.AS_TUPLE, + callback) + + iter2 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE", + "USER_ZONE = '{}' AND META_COLL_ATTR_NAME in ('org_publication_DOIAvailable', 'org_publication_DOIMinted')".format(zone), + genquery.AS_TUPLE, + callback) + + for row in iter: + subprocess.call(["imeta", "mod", "-C", row[0], row[1], row[2], "n:{}".format(row[1].replace("yoda", "version")), "v:{}".format(row[2])]) + + for row in iter2: + attr_name = row[1].rsplit('_', 1)[0] + "_version" + row[1].split('_')[-1] + subprocess.call(["imeta", "mod", "-C", row[0], row[1], row[2], "n:{}".format(attr_name), "v:{}".format(row[2])]) + +INPUT null +OUTPUT ruleExecOut \ No newline at end of file From eddf06a9e302ab8c86f6c3c97b7feec23678ccd7 Mon Sep 17 00:00:00 2001 From: Sirjan Kaur Date: Wed, 23 Aug 2023 09:10:42 -0400 Subject: [PATCH 04/30] YDA-5363 - Removed hardcoded code (previously used for backward compatibility) --- publication.py | 26 ++++++++----------------- tools/transform-existing-publications.r | 2 +- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/publication.py b/publication.py index 19a0a6439..07d3a7bb9 100644 --- a/publication.py +++ b/publication.py @@ -237,11 +237,7 @@ def generate_combi_json(ctx, publication_config, publication_state): vaultPackage = publication_state["vaultPackage"] randomId = publication_state["randomId"] combiJsonPath = temp_coll + "/" + randomId + "-combi.json" - - if "versionDOI" in publication_state: - versionDOI = publication_state["versionDOI"] - else: - versionDOI = publication_state['yodaDOI'] + versionDOI = publication_state["versionDOI"] lastModifiedDateTime = publication_state["lastModifiedDateTime"] publicationDate = publication_state["publicationDate"] @@ -276,10 +272,7 @@ def generate_system_json(ctx, publication_state): randomId = publication_state["randomId"] system_json_path = temp_coll + "/" + randomId + "-combi.json" - if "versionDOI" in publication_state: - doi = publication_state["versionDOI"] - else: - doi = publication_state["yodaDOI"] + doi = publication_state["versionDOI"] system_json_data = { "System": { @@ -932,14 +925,14 @@ def process_publication(ctx, vault_package): publication_state["publicationDate"] = get_publication_date(ctx, vault_package) # DOI handling - if "versionDOI" not in publication_state and "yodaDOI" not in publication_state: + if "versionDOI" not in publication_state: if verbose: log.write(ctx, "Generating preliminary DOI.") generate_preliminary_DOI(ctx, publication_config, publication_state) save_publication_state(ctx, vault_package, publication_state) - elif "versionDOIAvailable" in publication_state or "DOIAvailable" in publication_state: + elif "versionDOIAvailable" in publication_state: if publication_state["versionDOIAvailable"] == "no": if verbose: log.write(ctx, "Version DOI available: no") @@ -998,7 +991,7 @@ def process_publication(ctx, vault_package): return publication_state["status"] # Check if DOI is in use - if "versionDOIAvailable" not in publication_state and "DOIAvailable" not in publication_state: + if "versionDOIAvailable" not in publication_state: if verbose: log.write(ctx, "Checking whether version DOI is available.") @@ -1019,7 +1012,7 @@ def process_publication(ctx, vault_package): # Determine whether an update ('put') or create ('post') message has to be sent to datacite datacite_action = 'post' try: - if publication_state['versionDOIMinted'] == 'yes' or publication_state['DOIMinted'] == 'yes': + if publication_state['versionDOIMinted'] == 'yes': datacite_action = 'put' except KeyError: pass @@ -1125,7 +1118,7 @@ def process_publication(ctx, vault_package): return publication_state["status"] # Mint DOI with landing page URL. - if "versionDOIMinted" not in publication_state and "DOIMinted" not in publication_state: + if "versionDOIMinted" not in publication_state: if verbose: log.write(ctx, "Minting DOI.") mint_doi(ctx, publication_state, 'version') @@ -1529,10 +1522,7 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing # Send DataCite JSON to metadata end point try: - if "versionDOI" in publication_state: - post_metadata_to_datacite(ctx, publication_state, publication_state["versionDOI"], 'put') - else: - post_metadata_to_datacite(ctx, publication_state, publication_state["yodaDOI"], 'put') + post_metadata_to_datacite(ctx, publication_state, publication_state["versionDOI"], 'put') if update_base_doi: post_metadata_to_datacite(ctx, publication_state, publication_state["baseDOI"], 'put') except Exception: diff --git a/tools/transform-existing-publications.r b/tools/transform-existing-publications.r index f3cd1ebe6..91526ad75 100644 --- a/tools/transform-existing-publications.r +++ b/tools/transform-existing-publications.r @@ -2,7 +2,7 @@ # # Transform existing publications according to the new changes in the publication process. # This script handles converting all the prefixes from yoda to version. -# Additionally, this script will also add prefix version to DOIAvailable and DOI Minted variables. +# Additionally, this script will add prefix version to DOIAvailable and DOI Minted variables. # # import subprocess From 5574b1e1dda961ddb1b8e7094efaca835c60fd32 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Wed, 9 Aug 2023 14:20:39 +0200 Subject: [PATCH 05/30] YDA-5314 Enable indexing on index collection. --- folder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/folder.py b/folder.py index 07de6bc7f..61e255883 100644 --- a/folder.py +++ b/folder.py @@ -248,7 +248,8 @@ def folder_secure(ctx, coll, target): """ # Enable indexing on vault target. if collection_group_name(ctx, coll).startswith("deposit-"): - subprocess.call(["imeta", "add", "-C", target, "irods::indexing::index", "yoda::metadata", "elasticsearch"]) + msi.coll_create(ctx, coll + "/index", "", irods_types.BytesBuf()) + subprocess.call(["imeta", "add", "-C", coll + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) # Starting point of last part of securing a folder into the vault msi.check_access(ctx, coll, 'modify object', irods_types.BytesBuf()) From d6bb25fef7a8768d106309e2e81adf850f5ad432 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Fri, 18 Aug 2023 17:10:50 +0800 Subject: [PATCH 06/30] YDA-5314 Put flat metadata on index subcollection. As well as the irods::indexing::index attribute. --- folder.py | 9 ++++----- meta.py | 35 ++++++++++++++++++----------------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/folder.py b/folder.py index 61e255883..3a9d1687a 100644 --- a/folder.py +++ b/folder.py @@ -246,11 +246,6 @@ def folder_secure(ctx, coll, target): ctx.iiCopyFolderToVault(coll, target) """ - # Enable indexing on vault target. - if collection_group_name(ctx, coll).startswith("deposit-"): - msi.coll_create(ctx, coll + "/index", "", irods_types.BytesBuf()) - subprocess.call(["imeta", "add", "-C", coll + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) - # Starting point of last part of securing a folder into the vault msi.check_access(ctx, coll, 'modify object', irods_types.BytesBuf()) modify_access = msi.check_access(ctx, coll, 'modify object', irods_types.BytesBuf())['arguments'][2] @@ -263,6 +258,10 @@ def folder_secure(ctx, coll, target): vault.vault_copy_original_metadata_to_vault(ctx, target) vault.vault_write_license(ctx, target) + # Enable indexing on vault target. + if config.enable_open_search and collection_group_name(ctx, coll).startswith("deposit-"): + subprocess.call(["imeta", "add", "-C", target + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) + # Copy provenance log from research folder to vault package. provenance.provenance_copy_log(ctx, coll, target) diff --git a/meta.py b/meta.py index a36ababea..c76388d97 100644 --- a/meta.py +++ b/meta.py @@ -380,67 +380,68 @@ def ingest_metadata_staging(ctx, path): def update_index_metadata(ctx, path, metadata, creation_time, data_package): """Update the index attributes for JSON metadata.""" - ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) + msi.coll_create(ctx, path, "", irods_types.BytesBuf()) + ctx.msi_rmw_avu('-C', path, '%', '%', constants.UUFLATINDEX) for creator in metadata['Creator']: name = creator['Name'] if 'Given_Name' in name and 'Family_Name' in name: - ctx.msi_add_avu('-d', path, 'Creator', + ctx.msi_add_avu('-C', path, 'Creator', name['Given_Name'] + ' ' + name['Family_Name'], constants.UUFLATINDEX) if 'Owner_Role' in creator: - ctx.msi_add_avu('-d', path, 'Owner_Role', creator['Owner_Role'], + ctx.msi_add_avu('-C', path, 'Owner_Role', creator['Owner_Role'], constants.UUFLATINDEX) if 'Contributor' in metadata: for contributor in metadata['Contributor']: name = contributor['Name'] if 'Given_Name' in name and 'Family_Name' in name: - ctx.msi_add_avu('-d', path, 'Contributor', + ctx.msi_add_avu('-C', path, 'Contributor', name['Given_Name'] + ' ' + name['Family_Name'], constants.UUFLATINDEX) if 'Tag' in metadata: for tag in metadata['Tag']: - ctx.msi_add_avu('-d', path, 'Tag', tag, + ctx.msi_add_avu('-C', path, 'Tag', tag, constants.UUFLATINDEX) - ctx.msi_add_avu('-d', path, 'Title', metadata['Title'], + ctx.msi_add_avu('-C', path, 'Title', metadata['Title'], constants.UUFLATINDEX) - ctx.msi_add_avu('-d', path, 'Description', metadata['Description'], + ctx.msi_add_avu('-C', path, 'Description', metadata['Description'], constants.UUFLATINDEX) - ctx.msi_add_avu('-d', path, 'Data_Access_Restriction', + ctx.msi_add_avu('-C', path, 'Data_Access_Restriction', metadata['Data_Access_Restriction'], constants.UUFLATINDEX) if 'Research_Group' in metadata: - ctx.msi_add_avu('-d', path, 'Research_Group', + ctx.msi_add_avu('-C', path, 'Research_Group', metadata['Research_Group'], constants.UUFLATINDEX) if 'Collection_Name' in metadata: - ctx.msi_add_avu('-d', path, 'Collection_Name', + ctx.msi_add_avu('-C', path, 'Collection_Name', metadata['Collection_Name'], constants.UUFLATINDEX) if 'Collected' in metadata: if 'Start_Date' in metadata['Collected']: - ctx.msi_add_avu('-d', path, 'Collected_Start_Year', + ctx.msi_add_avu('-C', path, 'Collected_Start_Year', metadata['Collected']['Start_Date'][:4], constants.UUFLATINDEX) if 'End_Date' in metadata['Collected']: - ctx.msi_add_avu('-d', path, 'Collected_End_Year', + ctx.msi_add_avu('-C', path, 'Collected_End_Year', metadata['Collected']['End_Date'][:4], constants.UUFLATINDEX) if 'GeoLocation' in metadata: for geoLocation in metadata['GeoLocation']: if 'Description_Spatial' in geoLocation: - ctx.msi_add_avu('-d', path, 'Description_Spatial', geoLocation['Description_Spatial'], + ctx.msi_add_avu('-C', path, 'Description_Spatial', geoLocation['Description_Spatial'], constants.UUFLATINDEX) - ctx.msi_add_avu('-d', path, 'Creation_Time', creation_time, + ctx.msi_add_avu('-C', path, 'Creation_Time', creation_time, constants.UUFLATINDEX) - ctx.msi_add_avu('-d', path, 'Creation_Year', + ctx.msi_add_avu('-C', path, 'Creation_Year', str(datetime.fromtimestamp(int(creation_time)).year), constants.UUFLATINDEX) if config.enable_data_package_reference: - ctx.msi_add_avu('-d', path, 'Data_Package_Reference', data_package, + ctx.msi_add_avu('-C', path, 'Data_Package_Reference', data_package, constants.UUFLATINDEX) @@ -482,7 +483,7 @@ def ingest_metadata_vault(ctx, path): # Update flat index metadata for OpenSearch. if config.enable_open_search: - update_index_metadata(ctx, path, metadata, creation_time, data_package) + update_index_metadata(ctx, coll + "/index", metadata, creation_time, data_package) # Remove any remaining legacy XML-style AVUs. ctx.iiRemoveAVUs(coll, constants.UUUSERMETADATAPREFIX) From bdfe8aecbbe1d8864605106c940eae166b8ecca7 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Fri, 25 Aug 2023 11:44:42 +0800 Subject: [PATCH 07/30] YDA-5314 Hide index collection in the browser. --- browse.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/browse.py b/browse.py index 1455622be..54bf7a8dc 100644 --- a/browse.py +++ b/browse.py @@ -82,7 +82,7 @@ def transform(row): offset=offset, limit=limit, output=AS_DICT) elif space == str(pathutil.Space.VAULT): qcoll = Query(ctx, ccols, - "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone), + "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%' AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(coll, zone, zone), offset=offset, limit=limit, output=AS_DICT) else: qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), @@ -182,7 +182,7 @@ def transform(row): offset=offset, limit=limit, output=AS_DICT) elif space == str(pathutil.Space.VAULT): qcoll = Query(ctx, ccols, - "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone), + "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%' AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(coll, zone, zone), offset=offset, limit=limit, output=AS_DICT) else: qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), @@ -280,6 +280,9 @@ def transform(row): status_name, status_value, "/" + zone + "/home" ) + # exclude index collections + where = where + " AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(zone) + if sort_order == 'desc': cols = [x.replace('ORDER(', 'ORDER_DESC(') for x in cols] From 2572c5a490ca31104a58c8cfef41b9a52958e8f0 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Fri, 25 Aug 2023 15:38:45 +0800 Subject: [PATCH 08/30] YDA-5314 Fix maintenance scripts. The opensearch index can be regenerated with: irule -r irods_rule_engine_plugin-irods_rule_language-instance -F tools/index/remove-vault-indexing.r tools/index/init-index.sh http://combined.yoda.test:9200 irule -r irods_rule_engine_plugin-irods_rule_language-instance -F tools/index/add-vault-indexing.r Removing the indexing attribute will not trigger an opensearch update, and will not start any delayed rules. --- folder.py | 4 +-- tools/index/add-vault-indexing.r | 4 ++- tools/index/remove-vault-indexing.r | 4 ++- vault.py | 39 +++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 5 deletions(-) diff --git a/folder.py b/folder.py index 3a9d1687a..cdeec1fb6 100644 --- a/folder.py +++ b/folder.py @@ -4,7 +4,6 @@ __copyright__ = 'Copyright (c) 2019-2022, Utrecht University' __license__ = 'GPLv3, see LICENSE' -import subprocess import uuid import genquery @@ -259,8 +258,7 @@ def folder_secure(ctx, coll, target): vault.vault_write_license(ctx, target) # Enable indexing on vault target. - if config.enable_open_search and collection_group_name(ctx, coll).startswith("deposit-"): - subprocess.call(["imeta", "add", "-C", target + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) + vault.vault_enable_indexing(ctx, target) # Copy provenance log from research folder to vault package. provenance.provenance_copy_log(ctx, coll, target) diff --git a/tools/index/add-vault-indexing.r b/tools/index/add-vault-indexing.r index 5b07738cf..d04783560 100755 --- a/tools/index/add-vault-indexing.r +++ b/tools/index/add-vault-indexing.r @@ -9,7 +9,9 @@ addVaultIndexing { *vaultHome = "/" ++ $rodsZoneClient ++ "/home/vault-pilot"; if (uuCollectionExists(*vaultHome)) { foreach (*row in SELECT COLL_NAME WHERE COLL_PARENT_NAME = *vaultHome) { - msiExecCmd("enable-indexing.sh", *row.COLL_NAME, "", "", 0, *out); + *coll = *row.COLL_NAME; + *out = ""; + rule_vault_enable_indexing(*coll, *out); } } } diff --git a/tools/index/remove-vault-indexing.r b/tools/index/remove-vault-indexing.r index d916a0748..52a77a142 100755 --- a/tools/index/remove-vault-indexing.r +++ b/tools/index/remove-vault-indexing.r @@ -9,7 +9,9 @@ removeVaultIndexing { *vaultHome = "/" ++ $rodsZoneClient ++ "/home/vault-pilot"; if (uuCollectionExists(*vaultHome)) { foreach (*row in SELECT COLL_NAME WHERE COLL_PARENT_NAME = *vaultHome) { - msiExecCmd("disable-indexing.sh", *row.COLL_NAME, "", "", 0, *out); + *coll = *row.COLL_NAME; + *out = ""; + rule_vault_disable_indexing(*coll, *out); } } } diff --git a/vault.py b/vault.py index 1b244f88c..4b3268b6b 100644 --- a/vault.py +++ b/vault.py @@ -7,6 +7,7 @@ import itertools import os import re +import subprocess import time from datetime import datetime @@ -31,6 +32,8 @@ 'api_vault_unpreservable_files', 'rule_vault_copy_original_metadata_to_vault', 'rule_vault_write_license', + 'rule_vault_enable_indexing', + 'rule_vault_disable_indexing', 'rule_vault_process_status_transitions', 'api_vault_system_metadata', 'api_vault_collection_details', @@ -368,6 +371,42 @@ def vault_write_license(ctx, vault_pkg_coll): log.write(ctx, "rule_vault_write_license: License URI not available for <{}>".format(license)) +@rule.make(inputs=[0], outputs=[1]) +def rule_vault_enable_indexing(ctx, coll): + vault_enable_indexing(ctx, coll) + return "Success" + + +def vault_enable_indexing(ctx, coll): + if config.enable_open_search and folder.collection_group_name(ctx, coll).startswith("deposit-"): + if not collection.exists(ctx, coll + "/index"): + # index collection does not exist yet + path = meta.get_latest_vault_metadata_path(ctx, coll) + ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) + meta.ingest_metadata_vault(ctx, path) + + # add indexing attribute and update opensearch + subprocess.call(["imeta", "add", "-C", coll + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) + + +@rule.make(inputs=[0], outputs=[1]) +def rule_vault_disable_indexing(ctx, coll): + vault_disable_indexing(ctx, coll) + return "Success" + + +def vault_disable_indexing(ctx, coll): + if config.enable_open_search and folder.collection_group_name(ctx, coll).startswith("deposit-"): + if collection.exists(ctx, coll + "/index"): + coll = coll + "/index" + + # tricky: remove indexing attribute without updating opensearch + try: + msi.mod_avu_metadata(ctx, "-C", coll, "rm", "irods::indexing::index", "yoda::metadata", "elasticsearch") + except Exception: + pass + + @api.make() def api_vault_system_metadata(ctx, coll): """Return system metadata of a vault collection. From cfdb3687553108f019ddb446d917d663cef80141 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Fri, 25 Aug 2023 16:22:04 +0800 Subject: [PATCH 09/30] YDA-5314 Perform validity checks in the right places. --- folder.py | 3 ++- vault.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/folder.py b/folder.py index cdeec1fb6..c833f1c53 100644 --- a/folder.py +++ b/folder.py @@ -258,7 +258,8 @@ def folder_secure(ctx, coll, target): vault.vault_write_license(ctx, target) # Enable indexing on vault target. - vault.vault_enable_indexing(ctx, target) + if collection_group_name(ctx, coll).startswith("deposit-"): + vault.vault_enable_indexing(ctx, target) # Copy provenance log from research folder to vault package. provenance.provenance_copy_log(ctx, coll, target) diff --git a/vault.py b/vault.py index 4b3268b6b..357490257 100644 --- a/vault.py +++ b/vault.py @@ -378,7 +378,7 @@ def rule_vault_enable_indexing(ctx, coll): def vault_enable_indexing(ctx, coll): - if config.enable_open_search and folder.collection_group_name(ctx, coll).startswith("deposit-"): + if config.enable_open_search: if not collection.exists(ctx, coll + "/index"): # index collection does not exist yet path = meta.get_latest_vault_metadata_path(ctx, coll) @@ -396,7 +396,7 @@ def rule_vault_disable_indexing(ctx, coll): def vault_disable_indexing(ctx, coll): - if config.enable_open_search and folder.collection_group_name(ctx, coll).startswith("deposit-"): + if config.enable_open_search: if collection.exists(ctx, coll + "/index"): coll = coll + "/index" From 65e48247292a7dba49e59775e2f3d26fdecc703a Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Thu, 31 Aug 2023 16:38:03 +0200 Subject: [PATCH 10/30] YDA-5379: tighten filter for deposit index coll The portal should not display internal index collections in vault deposit groups. This change improves the matching of collection names, so that collections named 'index' in archived data (e.g. /tempZone/home/vault-deposit/deposit[123][456]/original/index) are shown normally , whereas internal index collections (e.g. /tempZone/home/vault-deposit/deposit[123][456]/index) are filtered out. --- browse.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/browse.py b/browse.py index 54bf7a8dc..998ac4659 100644 --- a/browse.py +++ b/browse.py @@ -82,13 +82,13 @@ def transform(row): offset=offset, limit=limit, output=AS_DICT) elif space == str(pathutil.Space.VAULT): qcoll = Query(ctx, ccols, - "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%' AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(coll, zone, zone), + "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone), offset=offset, limit=limit, output=AS_DICT) else: qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), offset=offset, limit=limit, output=AS_DICT) - colls = map(transform, list(qcoll)) + colls = map(transform, [c for c in list(qcoll) if _filter_vault_deposit_index(c)]) qdata = Query(ctx, dcols, "COLL_NAME = '{}'".format(coll), offset=max(0, offset - qcoll.total_rows()), limit=limit - len(colls), output=AS_DICT) @@ -182,13 +182,13 @@ def transform(row): offset=offset, limit=limit, output=AS_DICT) elif space == str(pathutil.Space.VAULT): qcoll = Query(ctx, ccols, - "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%' AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(coll, zone, zone), + "COLL_PARENT_NAME = '{}' AND COLL_NAME like '/{}/home/%vault-%'".format(coll, zone), offset=offset, limit=limit, output=AS_DICT) else: qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), offset=offset, limit=limit, output=AS_DICT) - colls = map(transform, list(qcoll)) + colls = map(transform, [d for d in list(qcoll) if _filter_vault_deposit_index(d)]) if len(colls) == 0: # No results at all? @@ -280,16 +280,29 @@ def transform(row): status_name, status_value, "/" + zone + "/home" ) - # exclude index collections - where = where + " AND COLL_NAME not like '/{}/home/%vault-%/%/index'".format(zone) - if sort_order == 'desc': cols = [x.replace('ORDER(', 'ORDER_DESC(') for x in cols] qdata = Query(ctx, cols, where, offset=max(0, int(offset)), limit=int(limit), case_sensitive=query_is_case_sensitive, output=AS_DICT) - datas = map(transform, list(qdata)) + datas = map(transform, [d for d in list(qdata) if _filter_vault_deposit_index(d)]) return OrderedDict([('total', qdata.total_rows()), ('items', datas)]) + + +def _filter_vault_deposit_index(row): + """This internal function filters out index collections in deposit vault collections. + These collections are used internally by Yoda for indexing data package metadata, and + should not be displayed. + + :param row: row of results data from GenQuery, containing collection name (COLL_NAME) + + :returns: boolean value that indicated whether row should be displayed + """ + # Remove ORDER_BY etc. wrappers from column names. + x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + # Filter out deposit vault index collection + return not re.match("^/[^/]+/home/vault-[^/]+/deposit-[^/]+/index$", + x['COLL_NAME']) From 7ac5aeba13b804b0d84465fbe10b8c359ef41bb4 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Thu, 31 Aug 2023 12:16:10 +0200 Subject: [PATCH 11/30] YDA-5375 Run revisions cleanup in batches. --- revisions.py | 63 ++++++++++++++++++++++---------------- tools/revision-clean-up.py | 44 ++++++++++++++++++++++++++ tools/revision-clean-up.r | 9 ------ 3 files changed, 81 insertions(+), 35 deletions(-) create mode 100755 tools/revision-clean-up.py delete mode 100644 tools/revision-clean-up.r diff --git a/revisions.py b/revisions.py index b079d69b9..b4c6cdb9f 100644 --- a/revisions.py +++ b/revisions.py @@ -6,6 +6,7 @@ import datetime import hashlib +import json import os import random import time @@ -21,6 +22,7 @@ 'api_revisions_search_on_filename', 'api_revisions_list', 'rule_revision_batch', + 'rule_revisions_info', 'rule_revisions_clean_up'] @@ -521,15 +523,10 @@ def revision_create(ctx, resource, data_id, max_size, verbose): return revision_created -@rule.make(inputs=range(2), outputs=range(2, 3)) -def rule_revisions_clean_up(ctx, bucketcase, endOfCalendarDay): - """Step through entire revision store and apply the chosen bucket strategy. - - :param ctx: Combined type of a callback and rei struct - :param bucketcase: Multiple ways of cleaning up revisions can be chosen. - :param endOfCalendarDay: If zero, system will determine end of current day in seconds since epoch (1970-01-01 00:00 UTC) - - :returns: String with status of cleanup +@rule.make(inputs=[], outputs=[0]) +def rule_revisions_info(ctx): + """Obtain information about all revisions. + :returns: Json string with info about revisions """ zone = user.zone(ctx) revision_store = '/' + zone + constants.UUREVISIONCOLLECTION @@ -538,13 +535,6 @@ def rule_revisions_clean_up(ctx, bucketcase, endOfCalendarDay): msi.set_acl(ctx, "recursive", "admin:own", user.full_name(ctx), revision_store) msi.set_acl(ctx, "recursive", "inherit", user.full_name(ctx), revision_store) - end_of_calendar_day = int(endOfCalendarDay) - if end_of_calendar_day == 0: - end_of_calendar_day = calculate_end_of_calendar_day(ctx) - - # get definition of buckets - buckets = revision_bucket_list(ctx, bucketcase) - # first, get original_path and ids for every revision iter = genquery.row_iterator( "order(META_DATA_ATTR_VALUE), order_desc(DATA_ID)", @@ -573,18 +563,39 @@ def rule_revisions_clean_up(ctx, bucketcase, endOfCalendarDay): revision_id = row[0] path = row[1] + "/" + row[2] modify_time = row[3] - if revision_id in rev_dict: - rev_dict[revision_id].append([modify_time, path]) - else: - rev_dict[revision_id] = [[modify_time, path]] + rev_dict[revision_id] = [int(revision_id), int(modify_time), path] + + # collate revision info + revisions_info = [] + for revisions in path_dict.values(): + revision_list = [] + for revision_id in revisions: + revision_list.append(rev_dict[revision_id]) + revisions_info.append(revision_list) + return json.dumps(revisions_info) - for revlist in path_dict.values(): - # make list of [revision_id, modify_time] pairs - revisions = [] - for revision_id in revlist: - if revision_id in rev_dict: - revisions.append([revision_id, rev_dict[revision_id][0]]) +@rule.make(inputs=[0, 1, 2], outputs=[3]) +def rule_revisions_clean_up(ctx, revisions_info, bucketcase, endOfCalendarDay): + """Step through part of revision store and apply the chosen bucket strategy. + + :param ctx: Combined type of a callback and rei struct + :param revisions_info: Json-encoded revision info. + :param bucketcase: Multiple ways of cleaning up revisions can be chosen. + :param endOfCalendarDay: If zero, system will determine end of current day in seconds since epoch (1970-01-01 00:00 UTC) + + :returns: String with status of cleanup + """ + revisions_list = json.loads(revisions_info) + + end_of_calendar_day = int(endOfCalendarDay) + if end_of_calendar_day == 0: + end_of_calendar_day = calculate_end_of_calendar_day(ctx) + + # get definition of buckets + buckets = revision_bucket_list(ctx, bucketcase) + + for revisions in revisions_list: # Process the original path conform the bucket settings candidates = get_deletion_candidates(ctx, buckets, revisions, end_of_calendar_day) diff --git a/tools/revision-clean-up.py b/tools/revision-clean-up.py new file mode 100755 index 000000000..4da19c1eb --- /dev/null +++ b/tools/revision-clean-up.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python + +import json +import subprocess +import sys + + +if len(sys.argv) != 3: + print('Usage: {} endOfCalendarDay bucketcase'.format(sys.argv[0])) + exit(1) + +endOfCalendarDay = sys.argv[1] +bucketcase = sys.argv[2] + + +def clean_up(revisions): + chunk = json.dumps(revisions) + chunk = "\\\\".join(chunk.split("\\")) + chunk = "\\'".join(chunk.split("'")) + return subprocess.check_output([ + 'irule', + '-r', + 'irods_rule_engine_plugin-irods_rule_language-instance', + "*out=''; rule_revisions_clean_up('{}', '{}', '{}', *out); writeString('stdout', *out);".format(chunk, bucketcase, endOfCalendarDay), + 'null', + 'ruleExecOut' + ]) + + +print('START cleaning up revision store') + +revisions_info = json.loads(subprocess.check_output([ + 'irule', + '-r', + 'irods_rule_engine_plugin-irods_rule_language-instance', + '*out=""; rule_revisions_info(*out); writeString("stdout", *out);', + 'null', + 'ruleExecOut' +])) + +while len(revisions_info) > 100: + clean_up(revisions_info[:100]) + revisions_info = revisions_info[100:] +print(clean_up(revisions_info)) diff --git a/tools/revision-clean-up.r b/tools/revision-clean-up.r deleted file mode 100644 index f0d2fa11c..000000000 --- a/tools/revision-clean-up.r +++ /dev/null @@ -1,9 +0,0 @@ -cleanup { - writeLine("stdout", 'START cleaning up revision store'); - *status = ""; - rule_revisions_clean_up(*bucketcase, str(*endOfCalendarDay), *status); - writeLine("stdout", *status); -} - -input *endOfCalendarDay=0, *bucketcase="B" -output ruleExecOut From 1a657e1723c937c0e15f4773430b0285d122f3fa Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Fri, 1 Sep 2023 13:42:50 +0200 Subject: [PATCH 12/30] YDA-5375 Flake --- revisions.py | 4 +++- tools/revision-clean-up.py | 12 ++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/revisions.py b/revisions.py index b4c6cdb9f..730bd2168 100644 --- a/revisions.py +++ b/revisions.py @@ -526,7 +526,9 @@ def revision_create(ctx, resource, data_id, max_size, verbose): @rule.make(inputs=[], outputs=[0]) def rule_revisions_info(ctx): """Obtain information about all revisions. - :returns: Json string with info about revisions + + :param ctx: Combined type of a callback and rei struct + :returns: Json string with info about revisions """ zone = user.zone(ctx) revision_store = '/' + zone + constants.UUREVISIONCOLLECTION diff --git a/tools/revision-clean-up.py b/tools/revision-clean-up.py index 4da19c1eb..9c57ad52a 100755 --- a/tools/revision-clean-up.py +++ b/tools/revision-clean-up.py @@ -18,12 +18,12 @@ def clean_up(revisions): chunk = "\\\\".join(chunk.split("\\")) chunk = "\\'".join(chunk.split("'")) return subprocess.check_output([ - 'irule', - '-r', - 'irods_rule_engine_plugin-irods_rule_language-instance', - "*out=''; rule_revisions_clean_up('{}', '{}', '{}', *out); writeString('stdout', *out);".format(chunk, bucketcase, endOfCalendarDay), - 'null', - 'ruleExecOut' + 'irule', + '-r', + 'irods_rule_engine_plugin-irods_rule_language-instance', + "*out=''; rule_revisions_clean_up('{}', '{}', '{}', *out); writeString('stdout', *out);".format(chunk, bucketcase, endOfCalendarDay), + 'null', + 'ruleExecOut' ]) From 327275006b6928079aded4852055c805a25572a1 Mon Sep 17 00:00:00 2001 From: Harm de Raaff Date: Wed, 30 Aug 2023 10:16:01 +0200 Subject: [PATCH 13/30] initialize min_month/year properly so no errors occur when no storage data is present yet --- resources.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/resources.py b/resources.py index 5969c6395..e497e4b80 100644 --- a/resources.py +++ b/resources.py @@ -337,6 +337,10 @@ def api_resource_monthly_category_stats(ctx): current_month = datetime.now().month current_year = datetime.now().year + # Initialize to prevent errors in log when no data has been registered yet. + min_year = -1 + min_month = -1 + # find minimal registered date registered. iter = list(genquery.Query(ctx, ['ORDER(META_USER_ATTR_NAME)'], "META_USER_ATTR_NAME like '{}%%'".format(constants.UUMETADATAGROUPSTORAGETOTALS), @@ -346,6 +350,10 @@ def api_resource_monthly_category_stats(ctx): min_year = int(row[0][-10:-6]) min_month = int(row[0][-5:-3]) + if min_month == -1: + # if min_month == -1 no minimal date was found. Consequently, stop further processing + return {'storage': [], 'dates': []} + # Prepare storage data # Create dict with all groups that will contain list of storage values corresponding to complete range from minimal date till now. group_storage = {} From 1093cf88edbc9aa6a3dfbfa9bebc443089a81d4d Mon Sep 17 00:00:00 2001 From: Harm de Raaff Date: Thu, 31 Aug 2023 16:04:02 +0200 Subject: [PATCH 14/30] YDA-5365: removed hardcoded zone This caused the group creation date in the group manager to not be shown. --- groups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/groups.py b/groups.py index 3ff52cbc4..95eb235af 100644 --- a/groups.py +++ b/groups.py @@ -390,7 +390,7 @@ def api_group_data(ctx): creation_date = "" iter = genquery.row_iterator( "COLL_CREATE_TIME", - "COLL_NAME = '/tempZone/home/{}'".format(group['name']), + "COLL_NAME = '/{}/home/{}'".format(user.zone(ctx), group['name']), genquery.AS_LIST, ctx ) for row in iter: From b6b879e9997fb26dc5577e9da262e3286c349a93 Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Wed, 13 Sep 2023 14:23:45 +0200 Subject: [PATCH 15/30] YDA-5404: increase logging publication functions Increase logging publication-related functions for easier troubleshooting: 1. Also log errors if publication verbose mode is not enabled 2. Perform more logging in verbose mode for publication updates and republication 3. Log exceptions, rather than just the fact than an exception occurred. --- publication.py | 138 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 94 insertions(+), 44 deletions(-) diff --git a/publication.py b/publication.py index 07d3a7bb9..42becd62c 100644 --- a/publication.py +++ b/publication.py @@ -796,7 +796,8 @@ def set_access_restrictions(ctx, vault_package, publication_state): try: msi.set_acl(ctx, "recursive", access_level, "anonymous", vault_package) - except Exception: + except Exception as e: + log.write(ctx, "set_access_restrictions for {} failed: {}".format(vault_package, str(e))) publication_state["status"] = "Unrecoverable" return @@ -905,9 +906,8 @@ def process_publication(ctx, vault_package): # Set the link to previous publication state previous_publication_state["baseDOI"] = publication_state["baseDOI"] previous_publication_state["baseRandomId"] = publication_state["baseRandomId"] - except Exception: - if verbose: - log.write(ctx, "Error while checking version DOI availability.") + except Exception as e: + log.write(ctx, "Error while checking version DOI availability: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, previous_vault_package, previous_publication_state) @@ -955,9 +955,8 @@ def process_publication(ctx, vault_package): try: generate_combi_json(ctx, publication_config, publication_state) - except Exception: - if verbose: - log.write(ctx, "Exception while generating combi JSON.") + except Exception as e: + log.write(ctx, "Exception while generating combi JSON: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -978,16 +977,14 @@ def process_publication(ctx, vault_package): log.write(ctx, "Generating Datacite JSON.") try: generate_datacite_json(ctx, publication_state) - except Exception: - if verbose: - log.write(ctx, "Error while generating Datacite JSON.") + except Exception as e: + log.write(ctx, "Exception while generating Datacite JSON: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] in ["Unrecoverable", "Retry"]: - if verbose: - log.write(ctx, "Error status after generating Datacite JSON.") + log.write(ctx, "Error status after generating Datacite JSON: " + publication_state["status"]) return publication_state["status"] # Check if DOI is in use @@ -997,16 +994,14 @@ def process_publication(ctx, vault_package): try: check_doi_availability(ctx, publication_state, 'version') - except Exception: - if verbose: - log.write(ctx, "Error while checking version DOI availability.") + except Exception as e: + log.write(ctx, "Error while checking DOI availability: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] == "Retry": - if verbose: - log.write(ctx, "Error status after checking version DOI availability.") + log.write(ctx, "Error status after checking version DOI availability: " + publication_state["status"]) return publication_state["status"] # Determine whether an update ('put') or create ('post') message has to be sent to datacite @@ -1034,16 +1029,14 @@ def process_publication(ctx, vault_package): log.write(ctx, "Updating base DOI.") base_doi = publication_state['baseDOI'] post_metadata_to_datacite(ctx, publication_state, base_doi, datacite_action) - except Exception: - if verbose: - log.write(ctx, "Error while sending metadata to Datacite.") + except Exception as e: + log.write(ctx, "Exception while sending metadata to Datacite: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] in ["Unrecoverable", "Retry"]: - if verbose: - log.write(ctx, "Error status after sending metadata to Datacite.") + log.write(ctx, "Error status after sending metadata to Datacite: " + publication_state["status"]) return publication_state["status"] # Create landing page @@ -1053,16 +1046,14 @@ def process_publication(ctx, vault_package): # Create landing page try: generate_landing_page(ctx, publication_state, "publish") - except Exception: - if verbose: - log.write(ctx, "Error while sending metadata to Datacite.") + except Exception as e: + log.write(ctx, "Error while sending metadata to Datacite: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] == "Unrecoverable": - if verbose: - log.write(ctx, "Error status after creating landing page.") + log.write(ctx, "Error status after creating landing page: " + publication_state["status"]) return publication_state["status"] # Use secure copy to push landing page to the public host @@ -1081,8 +1072,7 @@ def process_publication(ctx, vault_package): save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] == "Retry": - if verbose: - log.write(ctx, "Error status after uploading landing page.") + log.write(ctx, "Error status after uploading landing page:" + publication_state["status"]) return publication_state["status"] # Use secure copy to push combi JSON to MOAI server @@ -1101,8 +1091,7 @@ def process_publication(ctx, vault_package): save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] == "Retry": - if verbose: - log.write(ctx, "Error status after uploading to MOAI.") + log.write(ctx, "Error status after uploading to MOAI: " + publication_state["status"]) return publication_state["status"] # Set access restriction for vault package. @@ -1113,8 +1102,7 @@ def process_publication(ctx, vault_package): save_publication_state(ctx, vault_package, publication_state) if publication_state["status"] == "Retry": - if verbose: - log.write(ctx, "Error status after setting vault access restrictions.") + log.write(ctx, "Error status after setting vault access restrictions." + publication_state["status"]) return publication_state["status"] # Mint DOI with landing page URL. @@ -1185,6 +1173,11 @@ def process_depublication(ctx, vault_package): publication_state = get_publication_state(ctx, vault_package) status = publication_state['status'] + # Check if verbose mode is enabled + verbose = True if "verboseMode" in publication_config else False + if verbose: + log.write(ctx, "Running process_depublication in verbose mode.") + if status == "OK": # reset on first call set_update_publication_state(ctx, vault_package) @@ -1207,9 +1200,12 @@ def process_depublication(ctx, vault_package): # Generate Combi Json consisting of user and system metadata if "combiJsonPath" not in publication_state: + if verbose: + log.write(ctx, "Generating combi JSON.") try: generate_system_json(ctx, publication_state) - except Exception: + except Exception as e: + log.write(ctx, "Exception while trying to generate system JSON during depublication: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1219,11 +1215,14 @@ def process_depublication(ctx, vault_package): # Hide metadata from DataCite if "dataCiteMetadataPosted" not in publication_state: + if verbose: + log.write(ctx, "Uploading metadata to Datacite.") try: remove_metadata_from_datacite(ctx, publication_state, 'version') if update_base_doi: remove_metadata_from_datacite(ctx, publication_state, 'base') - except Exception: + except Exception as e: + log.write(ctx, "Exception while trying to remove metadata from Datacite during depublication: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, vault_package, publication_state) @@ -1233,10 +1232,13 @@ def process_depublication(ctx, vault_package): # Create landing page if "landingPagePath" not in publication_state: + if verbose: + log.write(ctx, "Creating landing page.") # Create landing page try: generate_landing_page(ctx, publication_state, "depublish") - except Exception: + except Exception as e: + log.write(ctx, "Exception while generating landing page during depublication: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1246,6 +1248,8 @@ def process_depublication(ctx, vault_package): # Use secure copy to push landing page to the public host if "landingPageUploaded" not in publication_state: + if verbose: + log.write(ctx, "Uploading landing page.") random_id = publication_state["randomId"] copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) @@ -1260,6 +1264,8 @@ def process_depublication(ctx, vault_package): # Use secure copy to push combi JSON to MOAI server if "oaiUploaded" not in publication_state: + if verbose: + log.write(ctx, "Uploading to MOAI.") random_id = publication_state["randomId"] copy_metadata_to_moai(ctx, random_id, publication_config, publication_state) @@ -1274,6 +1280,8 @@ def process_depublication(ctx, vault_package): # Set access restriction for vault package. if "anonymousAccess" not in publication_state: + if verbose: + log.write(ctx, "Setting vault access restrictions.") set_access_restrictions(ctx, vault_package, publication_state) save_publication_state(ctx, vault_package, publication_state) @@ -1312,6 +1320,11 @@ def process_republication(ctx, vault_package): publication_state = get_publication_state(ctx, vault_package) status = publication_state['status'] + # Check if verbose mode is enabled + verbose = True if "verboseMode" in publication_config else False + if verbose: + log.write(ctx, "Running process_republication in verbose mode.") + if status == "OK": # reset on first call set_update_publication_state(ctx, vault_package) @@ -1327,6 +1340,8 @@ def process_republication(ctx, vault_package): # Set flag to update base DOI when this data package is the latest version. update_base_doi = False if "previous_version" in publication_state and "next_version" not in publication_state: + if verbose: + log.write(ctx, "In branch for updating base DOI") update_base_doi = True # Publication date @@ -1338,9 +1353,12 @@ def process_republication(ctx, vault_package): # Generate Combi Json consisting of user and system metadata if "combiJsonPath" not in publication_state: + if verbose: + log.write(ctx, "Generating combi JSON.") try: generate_combi_json(ctx, publication_config, publication_state) - except Exception: + except Exception as e: + log.write(ctx, "Exception while generating combi JSON during republication: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1350,9 +1368,12 @@ def process_republication(ctx, vault_package): # Generate DataCite JSON if "dataCiteJsonPath" not in publication_state: + if verbose: + log.write(ctx, "Generating Datacite JSON.") try: generate_datacite_json(ctx, publication_state) - except Exception: + except Exception as e: + log.write(ctx, "Exception while generating DataCite JSON for republication: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1362,12 +1383,15 @@ def process_republication(ctx, vault_package): # Send DataCite JSON to metadata end point if "dataCiteMetadataPosted" not in publication_state: + if verbose: + log.write(ctx, "Uploading metadata to Datacite.") try: post_metadata_to_datacite(ctx, publication_state, publication_state['versionDOI'], 'put') if update_base_doi: post_metadata_to_datacite(ctx, publication_state, publication_state['baseDOI'], 'put') - except Exception: + except Exception as e: + log.write(ctx, "Exception while posting metadata to Datacite during republication: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, vault_package, publication_state) @@ -1377,10 +1401,13 @@ def process_republication(ctx, vault_package): # Create landing page if "landingPagePath" not in publication_state: + if verbose: + log.write(ctx, "Creating landing page.") # Create landing page try: generate_landing_page(ctx, publication_state, "publish") - except Exception: + except Exception as e: + log.write(ctx, "Exception while creating landing page during republication: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1390,6 +1417,8 @@ def process_republication(ctx, vault_package): # Use secure copy to push landing page to the public host if "landingPageUploaded" not in publication_state: + if verbose: + log.write(ctx, "Uploading landing page.") random_id = publication_state["randomId"] copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) @@ -1404,6 +1433,8 @@ def process_republication(ctx, vault_package): # Use secure copy to push combi JSON to MOAI server if "oaiUploaded" not in publication_state: + if verbose: + log.write(ctx, "Uploading to MOAI.") random_id = publication_state["randomId"] copy_metadata_to_moai(ctx, random_id, publication_config, publication_state) @@ -1418,6 +1449,8 @@ def process_republication(ctx, vault_package): # Set access restriction for vault package. if "anonymousAccess" not in publication_state: + if verbose: + log.write(ctx, "Setting vault access restrictions.") set_access_restrictions(ctx, vault_package, publication_state) save_publication_state(ctx, vault_package, publication_state) @@ -1480,12 +1513,19 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing publication_state = get_publication_state(ctx, vault_package) status = publication_state['status'] + # Check if verbose mode is enabled + verbose = True if "verboseMode" in publication_config else False + if verbose: + log.write(ctx, "Running update_publication in verbose mode.") + # Publication must be finished. if status != "OK": return status update_base_doi = False if "baseDOI" in publication_state: + if verbose: + log.write(ctx, "In branch for updating base DOI") if "previous_version" in publication_state and "next_version" not in publication_state: update_base_doi = True @@ -1497,9 +1537,12 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing publication_state["lastModifiedDateTime"] = get_last_modified_datetime(ctx, vault_package) # Generate Combi Json consisting of user and system metadata + if verbose: + log.write(ctx, "Generating combi JSON.") try: generate_combi_json(ctx, publication_config, publication_state) - except Exception: + except Exception as e: + log.write(ctx, "Exception while generating combi JSON after metadata update: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1512,7 +1555,8 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing log.write(ctx, 'Update datacite for package {}'.format(vault_package)) try: generate_datacite_json(ctx, publication_state) - except Exception: + except Exception as e: + log.write(ctx, "Exception while generating DataCite JSON after metadata update: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1521,11 +1565,14 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing return publication_state["status"] # Send DataCite JSON to metadata end point + if verbose: + log.write(ctx, "Uploading metadata to Datacite.") try: post_metadata_to_datacite(ctx, publication_state, publication_state["versionDOI"], 'put') if update_base_doi: post_metadata_to_datacite(ctx, publication_state, publication_state["baseDOI"], 'put') - except Exception: + except Exception as e: + log.write(ctx, "Exception while posting metadata to Datacite after metadata update: " + str(e)) publication_state["status"] = "Retry" save_publication_state(ctx, vault_package, publication_state) @@ -1538,7 +1585,8 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing log.write(ctx, 'Update landingpage for package {}'.format(vault_package)) try: generate_landing_page(ctx, publication_state, "publish") - except Exception: + except Exception as e: + log.write(ctx, "Exception while updating landing page after metadata update: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) @@ -1548,6 +1596,8 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing # Use secure copy to push landing page to the public host random_id = publication_state["randomId"] + if verbose: + log.write(ctx, "Uploading landing page.") copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) if update_base_doi: base_random_id = publication_state["baseRandomId"] From 5657a9af64bc83d2781509dc4c650e4cb908c5d1 Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Wed, 13 Sep 2023 15:04:41 +0200 Subject: [PATCH 16/30] iiDatamanagerPolicies: fix typo in header --- iiDatamanagerPolicies.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iiDatamanagerPolicies.r b/iiDatamanagerPolicies.r index ef2a34169..4fd410c3f 100644 --- a/iiDatamanagerPolicies.r +++ b/iiDatamanagerPolicies.r @@ -4,7 +4,7 @@ # \author Paul Frederiks # \author Lazlo Westerhof # \copyright Copyright (c) 2017-2022, Utrecht University. All rights reserved. -# \licens GPLv3 see LICENSE. +# \license GPLv3 see LICENSE. # This policy override enables the datamanager to manage ACL's in the vault From 874024fd4f553e2d81da63892b49b1c149e7dac1 Mon Sep 17 00:00:00 2001 From: claravox Date: Fri, 15 Sep 2023 14:02:25 +0200 Subject: [PATCH 17/30] Send notification to datamanager if research group has been inactive for many months (#323) Add notification for if research group has not been modified recently --- notifications.py | 115 ++++++++++++++++++ rules_uu.cfg.template | 3 + .../notification-groups-inactivity.sh | 2 + util/config.py | 2 + util/group.py | 2 +- 5 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tools/notification/notification-groups-inactivity.sh diff --git a/notifications.py b/notifications.py index 6b9236ccd..8d3015168 100644 --- a/notifications.py +++ b/notifications.py @@ -19,6 +19,7 @@ import data_access_token import folder import mail +import meta import settings from util import * @@ -28,6 +29,7 @@ 'rule_mail_notification_report', 'rule_process_ending_retention_packages', 'rule_process_groups_expiration_date', + 'rule_process_inactive_research_groups', 'rule_process_data_access_token_expiry'] NOTIFICATION_KEY = constants.UUORGMETADATAPREFIX + "notification" @@ -344,6 +346,119 @@ def rule_process_groups_expiration_date(ctx): log.write(ctx, 'group expiration date - Finished checking research groups for reaching group expiration date | notified: {}'.format(notify_count)) +@rule.make() +def rule_process_inactive_research_groups(ctx): + """Rule interface for checking for research groups that have not been modified after a certain amount of months. + + :param ctx: Combined type of a callback and rei struct + """ + # Only send notifications if inactivity notifications are enabled. + if not config.enable_inactivity_notification: + return + + # check permissions - rodsadmin only + if user.user_type(ctx) != 'rodsadmin': + log.write(ctx, "inactive research group - Insufficient permissions - should only be called by rodsadmin") + return + + log.write(ctx, 'inactive research group - Checking Research packages for last modification dates') + + zone = user.zone(ctx) + notify_count = 0 + inactivity_cutoff = datetime.now() - timedelta(weeks=4.35 * config.inactivity_cutoff_months) + inactivity_cutoff_epoch = int((inactivity_cutoff - datetime(1970, 1, 1)).total_seconds()) + + # First query: obtain a list of groups with group attributes + iter = genquery.row_iterator( + "USER_GROUP_NAME", + "USER_TYPE = 'rodsgroup' AND USER_GROUP_NAME like 'research-%'", + genquery.AS_LIST, ctx + ) + + for row in iter: + group_name = row[0] + coll = '/{}/home/{}'.format(zone, group_name) + # Trigger this flag if there are any files that have been modified after the cut off + # If the flag is still false after going through all the files, then that is when we send the notification + recent_files_modified = False + data_objects_count = 0 + where_clause = { + 'self': "COLL_NAME = '{}' AND USER_GROUP_NAME = '{}'".format(coll, group_name), + 'subfolders': "COLL_NAME LIKE '{}/%' AND USER_GROUP_NAME = '{}'".format(coll, group_name) + } + + # Per group two statements are required to gather all data + # 1) data in folder itself + # 2) data in all subfolders of the folder + for folder_type in ['self', 'subfolders']: + iter_subcoll = genquery.row_iterator( + "COUNT(DATA_NAME)", + where_clause[folder_type], + genquery.AS_LIST, ctx + ) + # This loop should only run once + for sub_row in iter_subcoll: + data_objects_count += int(sub_row[0]) + + if data_objects_count > 0: + for folder_type in ['self', 'subfolders']: + if recent_files_modified: + break + + iter_subcoll = genquery.row_iterator( + "DATA_NAME, COLL_NAME", + where_clause[folder_type], + genquery.AS_LIST, ctx + ) + + for sub_row in iter_subcoll: + if recent_files_modified: + break + + sub_coll = sub_row[1] + + # Get count of any data objects that have been modified after the inactivity cut off + iter_recent_data = genquery.row_iterator( + "COUNT(DATA_NAME)", + "COLL_NAME = '{}' AND USER_GROUP_NAME = '{}' AND DATA_MODIFY_TIME n> '{}'".format(sub_coll, group_name, inactivity_cutoff_epoch), + genquery.AS_LIST, ctx + ) + + # This loop should only run once + for count_row in iter_recent_data: + if int(count_row[0]) > 0: + recent_files_modified = True + else: + # Empty research group, so check the modified date of the collection, then send a notification + iter_data = genquery.row_iterator( + "COLL_MODIFY_TIME", + "COLL_NAME = '{}'".format(coll), + genquery.AS_LIST, ctx + ) + # This loop should only run once + for sub_row in iter_data: + if int(sub_row[0]) > inactivity_cutoff_epoch: + recent_files_modified = True + + if not recent_files_modified: + # find corresponding datamanager + category = group.get_category(ctx, group_name) + datamanager_group_name = "datamanager-" + category + if group.exists(ctx, datamanager_group_name): + notify_count += 1 + # Send notifications to datamanager(s). + datamanagers = folder.get_datamanagers(ctx, '/{}/home/'.format(zone) + datamanager_group_name) + message = "Group '{}' has been inactive for more than {} months".format(group_name, config.inactivity_cutoff_months) + + for datamanager in datamanagers: + datamanager = '{}#{}'.format(*datamanager) + actor = 'system' + set(ctx, actor, datamanager, coll, message) + log.write(ctx, 'inactive research group - Notifications set for group {} having been inactive since at least {}. <{}>'.format(group_name, inactivity_cutoff, coll)) + + log.write(ctx, 'inactive research group - Finished checking research groups for inactivity | notified: {}'.format(notify_count)) + + @rule.make() def rule_process_data_access_token_expiry(ctx): """Rule interface for checking for data access tokens that are expiring soon. diff --git a/rules_uu.cfg.template b/rules_uu.cfg.template index 2097ec998..674f03585 100644 --- a/rules_uu.cfg.template +++ b/rules_uu.cfg.template @@ -49,6 +49,9 @@ token_database = token_length = token_lifetime = +enable_inactivity_notification = +inactivity_cutoff_months = + async_replication_delay_time = async_revision_delay_time = diff --git a/tools/notification/notification-groups-inactivity.sh b/tools/notification/notification-groups-inactivity.sh new file mode 100644 index 000000000..1dbd9c122 --- /dev/null +++ b/tools/notification/notification-groups-inactivity.sh @@ -0,0 +1,2 @@ +#!/bin/bash +irule -r irods_rule_engine_plugin-python-instance rule_process_inactive_research_groups "null" "null" diff --git a/util/config.py b/util/config.py index 10ebf8915..28b4fb111 100644 --- a/util/config.py +++ b/util/config.py @@ -98,6 +98,7 @@ def __repr__(self): eus_api_tls_verify=True, enable_deposit=False, enable_open_search=False, + enable_inactivity_notification=False, enable_intake=False, enable_datarequest=False, enable_data_package_archive=False, @@ -109,6 +110,7 @@ def __repr__(self): enable_data_package_reference=False, enable_tokens=False, enable_tape_archive=False, + inactivity_cutoff_months=3, token_database=None, token_database_password=None, token_length=0, diff --git a/util/group.py b/util/group.py index ad7821ce3..801d0e81a 100644 --- a/util/group.py +++ b/util/group.py @@ -52,7 +52,7 @@ def get_category(ctx, grp): :param ctx: Combined type of a callback and rei struct :param grp: Group name - :returns: Categroy of given group + :returns: Category of given group """ ret = ctx.uuGroupGetCategory(grp, '', '') x = ret['arguments'][1] From 44db9c3d815a52e33e6ad04afa9a29f1cc249eff Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Wed, 20 Sep 2023 10:49:21 +0200 Subject: [PATCH 18/30] Publication: fix error message --- publication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/publication.py b/publication.py index 42becd62c..b2dfe6e33 100644 --- a/publication.py +++ b/publication.py @@ -1047,7 +1047,7 @@ def process_publication(ctx, vault_package): try: generate_landing_page(ctx, publication_state, "publish") except Exception as e: - log.write(ctx, "Error while sending metadata to Datacite: " + str(e)) + log.write(ctx, "Error while creating landing page: " + str(e)) publication_state["status"] = "Unrecoverable" save_publication_state(ctx, vault_package, publication_state) From f02c761d83787c1776c583640d03f0ae2733aa0b Mon Sep 17 00:00:00 2001 From: Sietse Snel Date: Wed, 20 Sep 2023 10:59:22 +0200 Subject: [PATCH 19/30] Typo fixes --- uuGroup.r | 8 ++++---- uuSudoPolicies.r | 2 +- yc2Vault.r | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/uuGroup.r b/uuGroup.r index bb3a5be0e..5ca5ed184 100644 --- a/uuGroup.r +++ b/uuGroup.r @@ -441,10 +441,10 @@ uuGroupGetCategory(*groupName, *category, *subcategory) { } -# \brief Get a group's desription. +# \brief Get a group's description. # # \param[in] groupName -# \param[out] decsription +# \param[out] description # uuGroupGetDescription(*groupName, *description) { *description = ""; @@ -872,7 +872,7 @@ uuGroupRemove(*groupName, *status, *message) { # uuUserModify(*userName, *property, *value, *status, *message) { *status = 1; - *message = "An internal error occured."; + *message = "An internal error occurred."; *kv.'.' = "."; @@ -909,7 +909,7 @@ uuUserModify(*userName, *property, *value, *status, *message) { # uuUserMetaRemove(*userName, *property, *status, *message) { *status = 1; - *message = "An internal error occured."; + *message = "An internal error occurred."; *status = errorcode(msiSudoObjMetaRemove(*userName, "-u", "wildcards", *property, "", "", "")); if (*status == 0) { diff --git a/uuSudoPolicies.r b/uuSudoPolicies.r index 49bb558d5..05f90bedf 100644 --- a/uuSudoPolicies.r +++ b/uuSudoPolicies.r @@ -12,7 +12,7 @@ # Preproc rules should never issue Sudo actions of their own. # Instead, put any additional actions that may need to be taken with a certain # sudo action in a postproc rule, which is guaranteed to be executed on -# succesful completion of the sudo action. +# successful completion of the sudo action. # # There are currently three implementations of the preproc set of sudo policy # rules, all listed in this rule file. diff --git a/yc2Vault.r b/yc2Vault.r index 568d55510..ce9331e6a 100644 --- a/yc2Vault.r +++ b/yc2Vault.r @@ -116,7 +116,7 @@ uuYcVaultIngestObject(*objectPath, *isCollection, *vaultPath, *status) { } } } else { # its not a collection but a data object - # first chksum the orginal file then use it to verify the vault copy + # first chksum the original file, then use it to verify the vault copy msiDataObjChksum(*objectPath, "forceChksum=", *checksum); msiDataObjCopy(*objectPath, *vaultPath, "verifyChksum=", *status); if (*status == 0) { @@ -315,7 +315,7 @@ uuYc2Vault(*intakeRoot, *vaultRoot, *status) { # note that we have to allow for multiple types of datasets: # type A: a single toplevel collection with a tree underneath - # type B: one or more datafiles located within the same collection + # type B: one or more data files located within the same collection # processing varies slightly between them, so process each type in turn # # TYPE A: @@ -328,7 +328,7 @@ uuYc2Vault(*intakeRoot, *vaultRoot, *status) { if (*locked) { uuYcDatasetFreeze(*topLevelCollection, *datasetId, *status); if (*status == 0) { - # datset frozen, now move to fault and remove from intake area + # dataset frozen; now move to vault and remove from intake area uuYcDatasetCollectionMove2Vault( *intakeRoot, *topLevelCollection, From f32c1b38c4d49b20f66c45346017d1f4e06d0cdf Mon Sep 17 00:00:00 2001 From: claravox Date: Wed, 20 Sep 2023 11:11:58 +0200 Subject: [PATCH 20/30] YDA-5407 - Address unicode error --- json_landing_page.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/json_landing_page.py b/json_landing_page.py index 5b0a5d268..cf0bf87b5 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -133,9 +133,11 @@ def json_landing_page_create_json_landing_page(callback, rodsZone, template_name try: language = '' language_id = dictJsonData['Language'] - schema_lang_ids = json_schema['definitions']['optionsISO639-1']['enum'] - schema_lang_names = json_schema['definitions']['optionsISO639-1']['enumNames'] + # Convert just the language schemas to unicode to handle when a language has non-ascii characters (like Volapük) + schema_lang_ids = map(lambda x: x.decode("utf-8"), json_schema['definitions']['optionsISO639-1']['enum']) + schema_lang_names = map(lambda x: x.decode("utf-8"), json_schema['definitions']['optionsISO639-1']['enumNames']) index = schema_lang_ids.index(language_id) + # Language variable must be kept in unicode, otherwise landing page fails to build with a language with non-ascii characters language = schema_lang_names[index] except KeyError: language = '' From 18aa7eb2cc0db9859c5ab3e9c6c835e8647bb05c Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Wed, 20 Sep 2023 13:56:44 +0200 Subject: [PATCH 21/30] YDA-5423 Guard against inconsistencies when collecting the data. --- revisions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/revisions.py b/revisions.py index 730bd2168..15d139c33 100644 --- a/revisions.py +++ b/revisions.py @@ -572,7 +572,8 @@ def rule_revisions_info(ctx): for revisions in path_dict.values(): revision_list = [] for revision_id in revisions: - revision_list.append(rev_dict[revision_id]) + if revision_id in rev_dict: + revision_list.append(rev_dict[revision_id]) revisions_info.append(revision_list) return json.dumps(revisions_info) From a79684bd306d8688251f8b641203fcd2f61d3113 Mon Sep 17 00:00:00 2001 From: "Felix A. Croes" Date: Wed, 20 Sep 2023 14:01:20 +0200 Subject: [PATCH 22/30] YDA-5423 Reduce the cleanup batch size. --- tools/revision-clean-up.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/revision-clean-up.py b/tools/revision-clean-up.py index 9c57ad52a..9cc7d211f 100755 --- a/tools/revision-clean-up.py +++ b/tools/revision-clean-up.py @@ -38,7 +38,7 @@ def clean_up(revisions): 'ruleExecOut' ])) -while len(revisions_info) > 100: - clean_up(revisions_info[:100]) - revisions_info = revisions_info[100:] +while len(revisions_info) > 30: + clean_up(revisions_info[:30]) + revisions_info = revisions_info[30:] print(clean_up(revisions_info)) From 17c88a750a482423c979bcf3a84eda8112d7826d Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 21 Sep 2023 12:15:00 +0200 Subject: [PATCH 23/30] Tools: add script to generate affiliation vocabulary from ROR data dump. --- .../vocabularies/generate_ror_affiliations.py | 24 + vocabularies/affiliations.json | 568 ++++++++++++++++-- 2 files changed, 528 insertions(+), 64 deletions(-) create mode 100755 tools/vocabularies/generate_ror_affiliations.py diff --git a/tools/vocabularies/generate_ror_affiliations.py b/tools/vocabularies/generate_ror_affiliations.py new file mode 100755 index 000000000..a43fc8f31 --- /dev/null +++ b/tools/vocabularies/generate_ror_affiliations.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python +# +# Usage: python3 generate_ror_affiliations.py > affiliations.json +# +# Retrieve ROR data json from ROR data dump from https://zenodo.org/record/7926988 +# +import json + +with open('v1.25-2023-05-11-ror-data.json') as f: + # Load ROR data dump file. + ror_data = json.load(f) + + affiliations = [] + for organization in ror_data: + # Get active organizations from NL with type Education. + if (organization["status"] == "active" + and organization["country"]["country_code"] == "NL" + and "Education" in organization["types"]): + affiliations.append({"value": organization['id'], "label": organization['name']}) + + # Sort organizations on their name. + affiliations = sorted(affiliations, key=lambda d: d['label']) + + print(json.dumps(affiliations)) diff --git a/vocabularies/affiliations.json b/vocabularies/affiliations.json index 5f0260aed..9159fe198 100644 --- a/vocabularies/affiliations.json +++ b/vocabularies/affiliations.json @@ -1,66 +1,506 @@ [ - { - "value": "https://ror.org/04pp8hn57", - "label": "Utrecht University" - }, - { - "value": "https://ror.org/05grdyy37", - "label": "Amsterdam University Medical Centers" - }, - { - "value": "https://ror.org/02c2kyt77", - "label": "Eindhoven University of Technology" - }, - { - "value": "https://ror.org/057w15z03", - "label": "Erasmus University Rotterdam" - }, - { - "value": "https://ror.org/027bh9e22", - "label": "Leiden University" - }, - { - "value": "https://ror.org/05xvt9f17", - "label": "Leiden University Medical Center" - }, - { - "value": "https://ror.org/02jz4aj89", - "label": "Maastricht University" - }, - { - "value": "https://ror.org/02d9ce178", - "label": "Maastricht University Medical Centre" - }, - { - "value": "https://ror.org/016xsfp80", - "label": "Radboud University Nijmegen" - }, - { - "value": "https://ror.org/05wg1m734", - "label": "Radboud University Nijmegen Medical Centre" - }, - { - "value": "https://ror.org/0575yy874", - "label": "University Medical Center Utrecht" - }, - { - "value": "https://ror.org/04dkp9463", - "label": "University of Amsterdam" - }, - { - "value": "https://ror.org/012p63287", - "label": "University of Groningen" - }, - { - "value": "https://ror.org/006hf6230", - "label": "University of Twente" - }, - { - "value": "https://ror.org/008xxew50", - "label": "Vrije Universiteit Amsterdam" - }, - { - "value": "https://ror.org/04qw24q55", - "label": "Wageningen University & Research" - } + { + "value": "https://ror.org/04x5wnb75", + "label": "Academic Center for Dentistry Amsterdam" + }, + { + "value": "https://ror.org/02nt7ap43", + "label": "Academie Verloskunde Amsterdam Groningen" + }, + { + "value": "https://ror.org/030ms0k96", + "label": "Alfa College" + }, + { + "value": "https://ror.org/01g0n8690", + "label": "Amsterdam Institute for Advanced Metropolitan Solutions" + }, + { + "value": "https://ror.org/03dpjfc73", + "label": "Amsterdam University College" + }, + { + "value": "https://ror.org/00y2z2s03", + "label": "Amsterdam University of Applied Sciences" + }, + { + "value": "https://ror.org/04dde1554", + "label": "Amsterdam University of the Arts" + }, + { + "value": "https://ror.org/05az93f25", + "label": "ArtEZ Institute of the Arts" + }, + { + "value": "https://ror.org/015d5s513", + "label": "Avans University of Applied Sciences" + }, + { + "value": "https://ror.org/003meye08", + "label": "Avans+" + }, + { + "value": "https://ror.org/014dvgf17", + "label": "BASilar artery International Cooperation Study" + }, + { + "value": "https://ror.org/05rxmpb48", + "label": "BC Broekhin Roermond" + }, + { + "value": "https://ror.org/002g6sj22", + "label": "Bertrand Russell College" + }, + { + "value": "https://ror.org/03nk3sc73", + "label": "Bonaventuracollege" + }, + { + "value": "https://ror.org/020dp5a36", + "label": "C.T. de Wit Graduate School for Production Ecology and Resource Conservation" + }, + { + "value": "https://ror.org/041deym60", + "label": "Center for Technology & Innovation Management" + }, + { + "value": "https://ror.org/02gd5t807", + "label": "Christelijk Gymnasium Utrecht" + }, + { + "value": "https://ror.org/02rdwwh89", + "label": "Christelijke Scholengemeenschap Walcheren" + }, + { + "value": "https://ror.org/04tj5wz42", + "label": "Christian University of Applied Sciences" + }, + { + "value": "https://ror.org/04vtvrr13", + "label": "Codarts Rotterdam" + }, + { + "value": "https://ror.org/03kfjwh70", + "label": "College voor de Rechten van de Mens" + }, + { + "value": "https://ror.org/00e39jm60", + "label": "Curio" + }, + { + "value": "https://ror.org/02e2c7k09", + "label": "Delft University of Technology" + }, + { + "value": "https://ror.org/039vq9023", + "label": "Design Academy Eindhoven" + }, + { + "value": "https://ror.org/05jext738", + "label": "Driestar Christian University" + }, + { + "value": "https://ror.org/023p2b446", + "label": "Dutch Art Institute" + }, + { + "value": "https://ror.org/04jrdcy74", + "label": "Dutch Network of Systems and Control" + }, + { + "value": "https://ror.org/0265xav62", + "label": "Dutch postgraduate School for Art History" + }, + { + "value": "https://ror.org/02c2kyt77", + "label": "Eindhoven University of Technology" + }, + { + "value": "https://ror.org/057w15z03", + "label": "Erasmus University Rotterdam" + }, + { + "value": "https://ror.org/053qcv951", + "label": "European Association of Distance Teaching Universities" + }, + { + "value": "https://ror.org/00ftg6h50", + "label": "European Graduate School of Neuroscience" + }, + { + "value": "https://ror.org/05jrbdv26", + "label": "Expertise Center Vocational Education" + }, + { + "value": "https://ror.org/03tqe0950", + "label": "Expertisecentrum Nederlands" + }, + { + "value": "https://ror.org/01p232496", + "label": "Fons Vitae Lyceum" + }, + { + "value": "https://ror.org/01jwcme05", + "label": "Fontys University of Applied Sciences" + }, + { + "value": "https://ror.org/02fq0hm69", + "label": "Friesland College" + }, + { + "value": "https://ror.org/053jpjd80", + "label": "Gerrit Rietveld Academy" + }, + { + "value": "https://ror.org/0449h4696", + "label": "Graduate School Neurosciences Amsterdam Rotterdam" + }, + { + "value": "https://ror.org/0500gea42", + "label": "HAN University of Applied Sciences" + }, + { + "value": "https://ror.org/05p706d77", + "label": "HAS University of Applied Sciences" + }, + { + "value": "https://ror.org/047cqa323", + "label": "HZ University of Applied Sciences" + }, + { + "value": "https://ror.org/03skjap48", + "label": "Hague Academy of International Law" + }, + { + "value": "https://ror.org/00xqtxw43", + "label": "Hanze University of Applied Sciences" + }, + { + "value": "https://ror.org/056k6yz11", + "label": "Hogeschool de Kempel" + }, + { + "value": "https://ror.org/0230zs006", + "label": "Hogeschool iPabo" + }, + { + "value": "https://ror.org/018t8yw14", + "label": "Hotelschool The Hague" + }, + { + "value": "https://ror.org/03wyamy27", + "label": "Huizinga Instituut" + }, + { + "value": "https://ror.org/030deh410", + "label": "IHE Delft Institute for Water Education" + }, + { + "value": "https://ror.org/039ryse59", + "label": "Ichthus College" + }, + { + "value": "https://ror.org/03cfsyg37", + "label": "Inholland University of Applied Sciences" + }, + { + "value": "https://ror.org/04jxbkz90", + "label": "Institute for Programming research and Algorithmics" + }, + { + "value": "https://ror.org/04m5bjk54", + "label": "Instituut voor Nederlandse Lexicologie" + }, + { + "value": "https://ror.org/00bscyw28", + "label": "International Bureau of Fiscal Documentation" + }, + { + "value": "https://ror.org/027zjnr49", + "label": "International New Town Institute" + }, + { + "value": "https://ror.org/04484bb06", + "label": "International Research Universities Network" + }, + { + "value": "https://ror.org/005rems48", + "label": "Interuniversity Center for Social Science Theory and Methodology" + }, + { + "value": "https://ror.org/047kqmy39", + "label": "Iselinge Hogeschool" + }, + { + "value": "https://ror.org/02552gk22", + "label": "Islamic University of Rotterdam" + }, + { + "value": "https://ror.org/05nrjb178", + "label": "Katholieke Pabo Zwolle" + }, + { + "value": "https://ror.org/01rps2j56", + "label": "Koning Willem I College" + }, + { + "value": "https://ror.org/027hreq63", + "label": "Koninklijke Scholengemeenschap" + }, + { + "value": "https://ror.org/027bh9e22", + "label": "Leiden University" + }, + { + "value": "https://ror.org/04zb8fk81", + "label": "Maastricht School of Management" + }, + { + "value": "https://ror.org/02jz4aj89", + "label": "Maastricht University" + }, + { + "value": "https://ror.org/03pdfag88", + "label": "Marecollege" + }, + { + "value": "https://ror.org/03xws5b35", + "label": "Marnix Academie" + }, + { + "value": "https://ror.org/05mv4rb84", + "label": "Medicines Evaluation Board" + }, + { + "value": "https://ror.org/004pfc251", + "label": "Middelbaar Beroeps Onderwijs" + }, + { + "value": "https://ror.org/02xgxme97", + "label": "NHL Stenden University of Applied Sciences" + }, + { + "value": "https://ror.org/04mfj5474", + "label": "NHTV Breda University of Applied Sciences" + }, + { + "value": "https://ror.org/025gshh98", + "label": "NTI University" + }, + { + "value": "https://ror.org/03y974j42", + "label": "Nefrovisie" + }, + { + "value": "https://ror.org/025fr4535", + "label": "Netherlands Graduate Research School of Science, Technology and Modern Culture" + }, + { + "value": "https://ror.org/05dbptw67", + "label": "Netherlands Graduate School of Linguistics" + }, + { + "value": "https://ror.org/0567f1w19", + "label": "Netherlands Institute for Catalysis Research" + }, + { + "value": "https://ror.org/04mx1wt49", + "label": "Netherlands Research School for Literary Studies" + }, + { + "value": "https://ror.org/033958n71", + "label": "Netherlands School of Public Administration" + }, + { + "value": "https://ror.org/018528593", + "label": "Nyenrode Business University" + }, + { + "value": "https://ror.org/01n92vv28", + "label": "Oncode Institute" + }, + { + "value": "https://ror.org/018dfmf50", + "label": "Open University in the Netherlands" + }, + { + "value": "https://ror.org/008za0b72", + "label": "Openbare Scholengemeenschap De Hogeberg" + }, + { + "value": "https://ror.org/03naar428", + "label": "Postmaster Psychologie Opleidingen" + }, + { + "value": "https://ror.org/016w23120", + "label": "Protestant Theological University" + }, + { + "value": "https://ror.org/016xsfp80", + "label": "Radboud University Nijmegen" + }, + { + "value": "https://ror.org/02ar29j81", + "label": "Reformed University of Applied Sciences" + }, + { + "value": "https://ror.org/02z0ywx65", + "label": "Regionale scholengemeenschap Goeree-Overflakkee" + }, + { + "value": "https://ror.org/01jcjpa26", + "label": "Reinwardt Academie" + }, + { + "value": "https://ror.org/03bkq6a07", + "label": "Research School for Medieval Studies" + }, + { + "value": "https://ror.org/0481e1q24", + "label": "Rotterdam University of Applied Sciences" + }, + { + "value": "https://ror.org/00490vc18", + "label": "Royal Academy of Art" + }, + { + "value": "https://ror.org/01mwwwn80", + "label": "Royal Conservatory of The Hague" + }, + { + "value": "https://ror.org/0175ya539", + "label": "Royal Dutch Kentalis" + }, + { + "value": "https://ror.org/005t9n460", + "label": "Saxion University of Applied Sciences" + }, + { + "value": "https://ror.org/01dcz6f16", + "label": "Scalda" + }, + { + "value": "https://ror.org/04x99xv31", + "label": "Stoas University of Applied Sciences" + }, + { + "value": "https://ror.org/03q7ay915", + "label": "TIAS School for Business and Society" + }, + { + "value": "https://ror.org/01dn09e98", + "label": "Terra" + }, + { + "value": "https://ror.org/021zvq422", + "label": "The Hague University of Applied Sciences" + }, + { + "value": "https://ror.org/02x435584", + "label": "Theological University" + }, + { + "value": "https://ror.org/03s2fjy85", + "label": "Theological University of Apeldoorn" + }, + { + "value": "https://ror.org/0183vre95", + "label": "TiFN" + }, + { + "value": "https://ror.org/04b8v1s79", + "label": "Tilburg University" + }, + { + "value": "https://ror.org/054xxtt73", + "label": "Tinbergen Institute" + }, + { + "value": "https://ror.org/00560h931", + "label": "Tio University" + }, + { + "value": "https://ror.org/05f0rzm15", + "label": "Transnational University Limburg" + }, + { + "value": "https://ror.org/04ryr8437", + "label": "Udens College" + }, + { + "value": "https://ror.org/002fzqp75", + "label": "University for Peace" + }, + { + "value": "https://ror.org/04dkp9463", + "label": "University of Amsterdam" + }, + { + "value": "https://ror.org/0093src13", + "label": "University of Applied Sciences Leiden" + }, + { + "value": "https://ror.org/028z9kw20", + "label": "University of Applied Sciences Utrecht" + }, + { + "value": "https://ror.org/012p63287", + "label": "University of Groningen" + }, + { + "value": "https://ror.org/04w5ec154", + "label": "University of Humanistic Studies" + }, + { + "value": "https://ror.org/006hf6230", + "label": "University of Twente" + }, + { + "value": "https://ror.org/018bzp792", + "label": "University of the Arts Utrecht" + }, + { + "value": "https://ror.org/04pp8hn57", + "label": "Utrecht University" + }, + { + "value": "https://ror.org/02mdbnd10", + "label": "Van Hall Larenstein University of Applied Sciences" + }, + { + "value": "https://ror.org/02dnd4c36", + "label": "Van Lodenstein College" + }, + { + "value": "https://ror.org/026rmnx29", + "label": "Via Vinci Academy" + }, + { + "value": "https://ror.org/01xpgs822", + "label": "Vilentum University of applied sciences" + }, + { + "value": "https://ror.org/008xxew50", + "label": "Vrije Universiteit Amsterdam" + }, + { + "value": "https://ror.org/04qw24q55", + "label": "Wageningen University & Research" + }, + { + "value": "https://ror.org/036k3ee35", + "label": "Windesheim Flevoland" + }, + { + "value": "https://ror.org/04zmc0e16", + "label": "Windesheim University of Applied Sciences" + }, + { + "value": "https://ror.org/020z55s31", + "label": "Wittenborg University" + }, + { + "value": "https://ror.org/02m6k0m40", + "label": "Zuyd University of Applied Sciences" + } ] From 99632552aba4983f208b2cb93b8cb29508b8a8e3 Mon Sep 17 00:00:00 2001 From: Sirjan Kaur Date: Wed, 6 Sep 2023 09:03:44 -0400 Subject: [PATCH 24/30] YDA-5275 - Added UI tests for versioned publication --- tests/features/ui/ui_publication.feature | 99 ++++++++++++++++++++++- tests/step_defs/ui/test_ui_publication.py | 6 ++ 2 files changed, 104 insertions(+), 1 deletion(-) diff --git a/tests/features/ui/ui_publication.feature b/tests/features/ui/ui_publication.feature index f65e0176e..4727cdfae 100644 --- a/tests/features/ui/ui_publication.feature +++ b/tests/features/ui/ui_publication.feature @@ -1,7 +1,7 @@ @ui Feature: Publication UI - Scenario Outline: Publication of teclab datapackage and test landing page output + Scenario Outline: Publication of teclab datapackages and test landing page output Given user datamanager is logged in When all notifications are reset Given user is not logged in @@ -86,6 +86,103 @@ Feature: Publication UI | vault-teclab-0 | + Scenario Outline: Researcher checks published landingpage + Given user researcher is logged in + And module "vault" is shown + When user browses to data package in + And user downloads file yoda-metadata.json + And user opens landingpage through system metadata + Then landingpage content matches yoda-metadata.json + + Examples: + | vault | + | vault-teclab-0 | + + Scenario Outline: Publication of teclab datapackages and test landing page output + Given user datamanager is logged in + When all notifications are reset + Given user is not logged in + + + Scenario Outline: Researcher submits folder + Given user researcher is logged in + And all notifications are reset + And module "research" is shown + When user browses to folder + And user submits the folder + Then the folder status is "Submitted" + + Examples: + | folder | + | research-teclab-0 | + + + Scenario Outline: Datamanager accepts folder + Given user datamanager is logged in + When user checks and clears notifications for status "Submitted" + And module "research" is shown + When user browses to folder + And user accepts the folder + Then the folder status is "Accepted" + + Examples: + | folder | + | research-teclab-0 | + + + Scenario Outline: Researcher submits data package for publication + Given user researcher is logged in + When user checks and clears notifications for status "Accepted" + And module "vault" is shown + When user browses to data package in + And user submits the data package for publication + And user chooses new version of existing publication + And user agrees with terms and conditions + Then the data package status is "Submitted for publication" + + Examples: + | vault | + | vault-teclab-0 | + + + Scenario Outline: Datamanager approves data package for publication + Given user datamanager is logged in + When user checks and clears notifications for status "Submitted for publication" + And module "vault" is shown + When user browses to data package in + And user approves the data package for publication + Then the data package status is "Approved for publication" + And the data package status is "Published" + + Examples: + | vault | + | vault-teclab-0 | + + + Scenario Outline: Researcher checks research provenance + Given user researcher is logged in + When user checks and clears notifications for status "Approved for publication" + And module "research" is shown + When user browses to folder + And user checks provenance info research + + + Examples: + | folder | + | research-teclab-0 | + + + Scenario Outline: Researcher checks vault provenance + Given user researcher is logged in + And module "vault" is shown + When user browses to data package in + And user checks provenance info vault + + Examples: + | vault | + | vault-teclab-0 | + + Scenario Outline: Researcher checks published landingpage Given user researcher is logged in And module "vault" is shown diff --git a/tests/step_defs/ui/test_ui_publication.py b/tests/step_defs/ui/test_ui_publication.py index df6373892..fc9f158fe 100644 --- a/tests/step_defs/ui/test_ui_publication.py +++ b/tests/step_defs/ui/test_ui_publication.py @@ -197,6 +197,12 @@ def ui_data_package_choose(browser): browser.find_by_css('.action-confirm-data-package-select').click() +@when('user chooses new version of existing publication') +def ui_data_package_choose_version(browser): + browser.find_by_id('dataPackage1').click() + browser.find_by_css('.action-confirm-data-package-select').click() + + @when('user agrees with terms and conditions') def ui_data_package_agree(browser): browser.find_by_id('checkbox-confirm-conditions').check() From 0917ca383c7a648bd3d210f860b083c83365f66d Mon Sep 17 00:00:00 2001 From: Sirjan Kaur Date: Thu, 21 Sep 2023 10:07:41 -0400 Subject: [PATCH 25/30] YDA-5275 - Fixed Relation Type to be backward compatible. --- vault.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vault.py b/vault.py index 357490257..d738cfb0a 100644 --- a/vault.py +++ b/vault.py @@ -1307,7 +1307,7 @@ def meta_add_new_version(ctx, new_version, previous_version): "Identifier_Scheme": "DOI", "Identifier": "https://doi.org/{}".format(get_doi(ctx, previous_version)) }, - "Relation_Type": "IsNewVersionOf: Current datapackage is new version of", + "Relation_Type": "IsNewVersionOf", "Title": "{}".format(get_title(ctx, previous_version)) } From d06fa3c83174c298faffe23e79c60ba23449909c Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 21 Sep 2023 16:53:46 +0200 Subject: [PATCH 26/30] YDA-5275: fix name collisions in publication UI tests --- tests/features/ui/ui_publication.feature | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/features/ui/ui_publication.feature b/tests/features/ui/ui_publication.feature index 4727cdfae..25baf668b 100644 --- a/tests/features/ui/ui_publication.feature +++ b/tests/features/ui/ui_publication.feature @@ -40,7 +40,7 @@ Feature: Publication UI When user browses to data package in And user submits the data package for publication And user chooses new publication - And user agrees with terms and conditions + And user agrees with terms and conditions Then the data package status is "Submitted for publication" Examples: @@ -98,13 +98,14 @@ Feature: Publication UI | vault | | vault-teclab-0 | - Scenario Outline: Publication of teclab datapackages and test landing page output + + Scenario Outline: Publication of teclab datapackages and test landing page output (version publication) Given user datamanager is logged in When all notifications are reset Given user is not logged in - Scenario Outline: Researcher submits folder + Scenario Outline: Researcher submits folder (version publication) Given user researcher is logged in And all notifications are reset And module "research" is shown @@ -117,7 +118,7 @@ Feature: Publication UI | research-teclab-0 | - Scenario Outline: Datamanager accepts folder + Scenario Outline: Datamanager accepts folder (version publication) Given user datamanager is logged in When user checks and clears notifications for status "Submitted" And module "research" is shown @@ -130,14 +131,14 @@ Feature: Publication UI | research-teclab-0 | - Scenario Outline: Researcher submits data package for publication + Scenario Outline: Researcher submits data package for publication (version publication) Given user researcher is logged in When user checks and clears notifications for status "Accepted" And module "vault" is shown When user browses to data package in And user submits the data package for publication And user chooses new version of existing publication - And user agrees with terms and conditions + And user agrees with terms and conditions Then the data package status is "Submitted for publication" Examples: @@ -145,7 +146,7 @@ Feature: Publication UI | vault-teclab-0 | - Scenario Outline: Datamanager approves data package for publication + Scenario Outline: Datamanager approves data package for publication (version publication) Given user datamanager is logged in When user checks and clears notifications for status "Submitted for publication" And module "vault" is shown @@ -159,7 +160,7 @@ Feature: Publication UI | vault-teclab-0 | - Scenario Outline: Researcher checks research provenance + Scenario Outline: Researcher checks research provenance (version publication) Given user researcher is logged in When user checks and clears notifications for status "Approved for publication" And module "research" is shown @@ -172,7 +173,7 @@ Feature: Publication UI | research-teclab-0 | - Scenario Outline: Researcher checks vault provenance + Scenario Outline: Researcher checks vault provenance (version publication) Given user researcher is logged in And module "vault" is shown When user browses to data package in @@ -183,7 +184,7 @@ Feature: Publication UI | vault-teclab-0 | - Scenario Outline: Researcher checks published landingpage + Scenario Outline: Researcher checks published landingpage (version publication) Given user researcher is logged in And module "vault" is shown When user browses to data package in From 69622b083254b087919ab24356cbe861aa2348e1 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Sep 2023 09:25:23 +0200 Subject: [PATCH 27/30] YDA-5282: cleanup embargo handling --- publication.py | 311 +++++++++++++++++++++++++------------------------ 1 file changed, 156 insertions(+), 155 deletions(-) diff --git a/publication.py b/publication.py index b2dfe6e33..35e916b92 100644 --- a/publication.py +++ b/publication.py @@ -4,8 +4,6 @@ __copyright__ = 'Copyright (c) 2019-2023, Utrecht University' __license__ = 'GPLv3, see LICENSE' -# from datetime import datetime - from datetime import datetime import genquery @@ -24,159 +22,8 @@ 'rule_process_depublication', 'rule_process_republication', 'rule_update_publication', - 'rule_lift_embargos_on_data_access', - 'rule_add_lift_embargo_indications'] - - -@rule.make() -def rule_add_lift_embargo_indications(ctx): - """ - # PURPOSE: give all published datapackages that are under embargo a lift_embargo_date indication (for further cronjob processing) - - Each indication will hold the actual embargo date. - This will be picked up by a cronjob that will lift the embargo if applicable. - Then the indication will be removed so the datapackage will no longer be picked up - - """ - # check permissions - rodsadmin only - if user.user_type(ctx) != 'rodsadmin': - log.write(ctx, "User is no rodsadmin") - return 'Insufficient permissions - should only be called by rodsadmin' - - # select all vault packages with with embargo date - no matter when - zone = user.zone(ctx) - - # Find all packages that have embargo date - iter = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "' AND META_COLL_ATTR_NAME = 'Embargo_End_Date'", - genquery.AS_LIST, ctx - ) - for row in iter: - vault_package = row[0] - embargo_date = row[1] - - # Only look at the PUBLISHED packages so check first - iter2 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "vault_status'", - genquery.AS_LIST, ctx - ) - - for row2 in iter2: - # Check whether lift_embargo_date is present already - iter3 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", - genquery.AS_LIST, ctx - ) - if not len(list(iter3)) > 0: - # Add lift_embargo indication - avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', embargo_date) - - return 'OK' - - -@rule.make() -def rule_lift_embargos_on_data_access(ctx): - """ - PURPOSE: Find vault packages that have a data access embargo that can be lifted as the embargo expires. - - If lift_embargo_date < now: - - new landing page (add access link) - - secure copy new landing page to server - - set acls - - If all went well => remove lift_embargo_date indication - If something went wrong leave lift_embargo_date so it will be dealt with again the next time around - - :param ctx: Combined type of a callback and rei struct - - :returns: - """ - publication_state = {} - - # check permissions - rodsadmin only - if user.user_type(ctx) != 'rodsadmin': - log.write(ctx, "User is no rodsadmin") - return 'Insufficient permissions - should only be called by rodsadmin' - - # get publication configuration - publication_config = get_publication_config(ctx) - - zone = user.zone(ctx) - - # Find all packages that have embargo date for data access that must be lifted - iter = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "'" - " AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + 'lift_embargo_date' + "'" - " AND META_COLL_ATTR_VALUE > '{}'".format(datetime.now().strftime('%Y-%m-%d')), - genquery.AS_LIST, ctx - ) - for row in iter: - vault_package = row[0] - - log.write(ctx, "Lift embargo for package: " + vault_package) - - # Per package (re)initialize publication state - publication_state = {} - publication_state["status"] = 'OK' - - # For this vault package, that has an embargo date that needs to be lifted, find whether data access resrictions apply - iter2 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'Data_Access_Restriction'".format(vault_package), - genquery.AS_LIST, ctx - ) - for row2 in iter2: - # Check data access restriction - must be open - if row2[1].startswith('open'): - # Setup publication_state in such a way that landing page can be newly created and sent to the public host - - # First find DOI as all is hung up on that - org_publication_randomId - random_id = '' - iter3 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'org_publication_randomId'".format(vault_package), - genquery.AS_LIST, ctx - ) - for row3 in iter3: - random_id = row3[1] - - publication_state["combiJsonPath"] = '/{}/yoda/publication/{}-combi.json'.format(zone, random_id) - publication_state["randomId"] = random_id - publication_state["vaultPackage"] = vault_package - - # Adjust landing page add data access link - try: - generate_landing_page(ctx, publication_state, "publish") - # will hold publication_state["landingPagePath"] as required for secure copy - log.write(ctx, publication_state["landingPagePath"]) - except Exception: - log.write(ctx, "Error while creating landing page with data access.") - publication_state["status"] = "Unrecoverable" - - if publication_state["status"] == "OK": - # Secure copy to public host - copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) - - if publication_state.get("landingPageUploaded", None) != "yes": - log.write(ctx, 'Could not securely upload adjusted landing page to public host') - publication_state["status"] = "Unrecoverable" - else: - # Adjust ACLs so data can actually be reached - try: - msi.set_acl(ctx, "recursive", "read", "anonymous", vault_package) - except Exception: - log.write(ctx, "Could not set acls to read for: " + vault_package) - publication_state["status"] = "Unrecoverable" - - # if all went well remove the lift embargo attribute so it will not be selected again the next time around - if publication_state["status"] == 'OK': - # Only remove when embargo was lifted successfully. - # Not removing will ensure the entire process is repeated again next time around - avu.rmw_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', '%') + 'rule_add_lift_embargo_indications', + 'rule_lift_embargos_on_data_access'] def get_publication_config(ctx): @@ -1692,3 +1539,157 @@ def get_all_versions(ctx, path, doi): """Rule interface for processing republication of a vault package.""" rule_process_republication = rule.make(inputs=range(1), outputs=range(1, 3))(process_republication) + + +@rule.make() +def rule_add_lift_embargo_indications(ctx): + """ + # PURPOSE: give all published datapackages that are under embargo a lift_embargo_date indication (for further cronjob processing) + + Each indication will hold the actual embargo date. + This will be picked up by a cronjob that will lift the embargo if applicable. + Then the indication will be removed so the datapackage will no longer be picked up + + :param ctx: Combined type of a callback and rei struct + + :returns: Status of adding the lift embargo indications + """ + # check permissions - rodsadmin only + if user.user_type(ctx) != 'rodsadmin': + log.write(ctx, "User is no rodsadmin") + return 'Insufficient permissions - should only be called by rodsadmin' + + # select all vault packages with with embargo date - no matter when + zone = user.zone(ctx) + + # Find all packages that have embargo date + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "' AND META_COLL_ATTR_NAME = 'Embargo_End_Date'", + genquery.AS_LIST, ctx + ) + for row in iter: + vault_package = row[0] + embargo_date = row[1] + + # Only look at the PUBLISHED packages so check first + iter2 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "vault_status'", + genquery.AS_LIST, ctx + ) + + for row2 in iter2: + # Check whether lift_embargo_date is present already + iter3 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", + genquery.AS_LIST, ctx + ) + if not len(list(iter3)) > 0: + # Add lift_embargo indication + avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', embargo_date) + + return 'OK' + + +@rule.make() +def rule_lift_embargos_on_data_access(ctx): + """ + PURPOSE: Find vault packages that have a data access embargo that can be lifted as the embargo expires. + + If lift_embargo_date < now: + - new landing page (add access link) + - secure copy new landing page to server + - set acls + + If all went well => remove lift_embargo_date indication + If something went wrong leave lift_embargo_date so it will be dealt with again the next time around + + :param ctx: Combined type of a callback and rei struct + + :returns: Status of lifting the embargo indications + """ + publication_state = {} + + # check permissions - rodsadmin only + if user.user_type(ctx) != 'rodsadmin': + log.write(ctx, "User is no rodsadmin") + return 'Insufficient permissions - should only be called by rodsadmin' + + # get publication configuration + publication_config = get_publication_config(ctx) + + zone = user.zone(ctx) + + # Find all packages that have embargo date for data access that must be lifted + iter = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "'" + " AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + 'lift_embargo_date' + "'" + " AND META_COLL_ATTR_VALUE > '{}'".format(datetime.now().strftime('%Y-%m-%d')), + genquery.AS_LIST, ctx + ) + for row in iter: + vault_package = row[0] + + log.write(ctx, "Lift embargo for package: " + vault_package) + + # Per package (re)initialize publication state + publication_state = {} + publication_state["status"] = 'OK' + + # For this vault package, that has an embargo date that needs to be lifted, find whether data access resrictions apply + iter2 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'Data_Access_Restriction'".format(vault_package), + genquery.AS_LIST, ctx + ) + for row2 in iter2: + # Check data access restriction - must be open + if row2[1].startswith('open'): + # Setup publication_state in such a way that landing page can be newly created and sent to the public host + + # First find DOI as all is hung up on that - org_publication_randomId + random_id = '' + iter3 = genquery.row_iterator( + "COLL_NAME, META_COLL_ATTR_VALUE", + "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'org_publication_randomId'".format(vault_package), + genquery.AS_LIST, ctx + ) + for row3 in iter3: + random_id = row3[1] + + publication_state["combiJsonPath"] = '/{}/yoda/publication/{}-combi.json'.format(zone, random_id) + publication_state["randomId"] = random_id + publication_state["vaultPackage"] = vault_package + + # Adjust landing page add data access link + try: + generate_landing_page(ctx, publication_state, "publish") + # will hold publication_state["landingPagePath"] as required for secure copy + log.write(ctx, publication_state["landingPagePath"]) + except Exception: + log.write(ctx, "Error while creating landing page with data access.") + publication_state["status"] = "Unrecoverable" + + if publication_state["status"] == "OK": + # Secure copy to public host + copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) + + if publication_state.get("landingPageUploaded", None) != "yes": + log.write(ctx, 'Could not securely upload adjusted landing page to public host') + publication_state["status"] = "Unrecoverable" + else: + # Adjust ACLs so data can actually be reached + try: + msi.set_acl(ctx, "recursive", "read", "anonymous", vault_package) + except Exception: + log.write(ctx, "Could not set acls to read for: " + vault_package) + publication_state["status"] = "Unrecoverable" + + # if all went well remove the lift embargo attribute so it will not be selected again the next time around + if publication_state["status"] == 'OK': + # Only remove when embargo was lifted successfully. + # Not removing will ensure the entire process is repeated again next time around + avu.rmw_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', '%') From 56641f490a722c2946a4ddf812193dbc261aa03d Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Sep 2023 13:45:25 +0200 Subject: [PATCH 28/30] YDA-5282: simplify updating publication after embargo end date --- publication.py | 117 ++------------------------------ tools/add-embargo-indications.r | 20 ------ 2 files changed, 4 insertions(+), 133 deletions(-) delete mode 100644 tools/add-embargo-indications.r diff --git a/publication.py b/publication.py index 35e916b92..7cd18aaed 100644 --- a/publication.py +++ b/publication.py @@ -22,7 +22,6 @@ 'rule_process_depublication', 'rule_process_republication', 'rule_update_publication', - 'rule_add_lift_embargo_indications', 'rule_lift_embargos_on_data_access'] @@ -1541,58 +1540,6 @@ def get_all_versions(ctx, path, doi): rule_process_republication = rule.make(inputs=range(1), outputs=range(1, 3))(process_republication) -@rule.make() -def rule_add_lift_embargo_indications(ctx): - """ - # PURPOSE: give all published datapackages that are under embargo a lift_embargo_date indication (for further cronjob processing) - - Each indication will hold the actual embargo date. - This will be picked up by a cronjob that will lift the embargo if applicable. - Then the indication will be removed so the datapackage will no longer be picked up - - :param ctx: Combined type of a callback and rei struct - - :returns: Status of adding the lift embargo indications - """ - # check permissions - rodsadmin only - if user.user_type(ctx) != 'rodsadmin': - log.write(ctx, "User is no rodsadmin") - return 'Insufficient permissions - should only be called by rodsadmin' - - # select all vault packages with with embargo date - no matter when - zone = user.zone(ctx) - - # Find all packages that have embargo date - iter = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "' AND META_COLL_ATTR_NAME = 'Embargo_End_Date'", - genquery.AS_LIST, ctx - ) - for row in iter: - vault_package = row[0] - embargo_date = row[1] - - # Only look at the PUBLISHED packages so check first - iter2 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "vault_status'", - genquery.AS_LIST, ctx - ) - - for row2 in iter2: - # Check whether lift_embargo_date is present already - iter3 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '" + vault_package + "' AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + "lift_embargo_date'", - genquery.AS_LIST, ctx - ) - if not len(list(iter3)) > 0: - # Add lift_embargo indication - avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', embargo_date) - - return 'OK' - - @rule.make() def rule_lift_embargos_on_data_access(ctx): """ @@ -1610,16 +1557,11 @@ def rule_lift_embargos_on_data_access(ctx): :returns: Status of lifting the embargo indications """ - publication_state = {} - # check permissions - rodsadmin only if user.user_type(ctx) != 'rodsadmin': log.write(ctx, "User is no rodsadmin") return 'Insufficient permissions - should only be called by rodsadmin' - # get publication configuration - publication_config = get_publication_config(ctx) - zone = user.zone(ctx) # Find all packages that have embargo date for data access that must be lifted @@ -1634,62 +1576,11 @@ def rule_lift_embargos_on_data_access(ctx): vault_package = row[0] log.write(ctx, "Lift embargo for package: " + vault_package) + set_update_publication_state(ctx, vault_package) + publication_status = process_publication(ctx, vault_package) - # Per package (re)initialize publication state - publication_state = {} - publication_state["status"] = 'OK' - - # For this vault package, that has an embargo date that needs to be lifted, find whether data access resrictions apply - iter2 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'Data_Access_Restriction'".format(vault_package), - genquery.AS_LIST, ctx - ) - for row2 in iter2: - # Check data access restriction - must be open - if row2[1].startswith('open'): - # Setup publication_state in such a way that landing page can be newly created and sent to the public host - - # First find DOI as all is hung up on that - org_publication_randomId - random_id = '' - iter3 = genquery.row_iterator( - "COLL_NAME, META_COLL_ATTR_VALUE", - "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = 'org_publication_randomId'".format(vault_package), - genquery.AS_LIST, ctx - ) - for row3 in iter3: - random_id = row3[1] - - publication_state["combiJsonPath"] = '/{}/yoda/publication/{}-combi.json'.format(zone, random_id) - publication_state["randomId"] = random_id - publication_state["vaultPackage"] = vault_package - - # Adjust landing page add data access link - try: - generate_landing_page(ctx, publication_state, "publish") - # will hold publication_state["landingPagePath"] as required for secure copy - log.write(ctx, publication_state["landingPagePath"]) - except Exception: - log.write(ctx, "Error while creating landing page with data access.") - publication_state["status"] = "Unrecoverable" - - if publication_state["status"] == "OK": - # Secure copy to public host - copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state) - - if publication_state.get("landingPageUploaded", None) != "yes": - log.write(ctx, 'Could not securely upload adjusted landing page to public host') - publication_state["status"] = "Unrecoverable" - else: - # Adjust ACLs so data can actually be reached - try: - msi.set_acl(ctx, "recursive", "read", "anonymous", vault_package) - except Exception: - log.write(ctx, "Could not set acls to read for: " + vault_package) - publication_state["status"] = "Unrecoverable" - - # if all went well remove the lift embargo attribute so it will not be selected again the next time around - if publication_state["status"] == 'OK': + # If all went well remove the lift embargo attribute so it will not be selected again the next time around. + if publication_status == 'OK': # Only remove when embargo was lifted successfully. # Not removing will ensure the entire process is repeated again next time around avu.rmw_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', '%') diff --git a/tools/add-embargo-indications.r b/tools/add-embargo-indications.r deleted file mode 100644 index 516f52220..000000000 --- a/tools/add-embargo-indications.r +++ /dev/null @@ -1,20 +0,0 @@ -# Add Lift embargo inidications so the cron job to lift them can pick up these indicated vault packages -run { - uuGetUserType("$userNameClient#$rodsZoneClient", *usertype); - - if (*usertype != "rodsadmin") { - failmsg(-1, "This script needs to be run by a rodsadmin"); - } - - # Retrieve current timestamp. - msiGetIcatTime(*timestamp, "human"); - writeLine('stdout', '[' ++ *timestamp ++ '] Start adding lift embargo indications to vault packages'); - - *result = rule_add_lift_embargo_indications(); - - writeLine('stdout', 'Status: Finished adding lift embargo indications to vault packages'); - writeLine('stdout', *result); - -} -input null -output ruleExecOut From 9de77efd413ede613a5fd526787a41333c8bd91c Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Mon, 25 Sep 2023 08:57:55 +0200 Subject: [PATCH 29/30] YDA-5282: lift embargo metadata already removed during publication update --- publication.py | 23 ++++++----------------- tools/lift-embargos.r | 24 ++++++++++-------------- 2 files changed, 16 insertions(+), 31 deletions(-) diff --git a/publication.py b/publication.py index 7cd18aaed..ff5e2201c 100644 --- a/publication.py +++ b/publication.py @@ -1542,16 +1542,9 @@ def get_all_versions(ctx, path, doi): @rule.make() def rule_lift_embargos_on_data_access(ctx): - """ - PURPOSE: Find vault packages that have a data access embargo that can be lifted as the embargo expires. - - If lift_embargo_date < now: - - new landing page (add access link) - - secure copy new landing page to server - - set acls + """Find vault packages that have a data access embargo that can be lifted as the embargo expires. - If all went well => remove lift_embargo_date indication - If something went wrong leave lift_embargo_date so it will be dealt with again the next time around + If lift_embargo_date <= now, update publication. :param ctx: Combined type of a callback and rei struct @@ -1569,18 +1562,14 @@ def rule_lift_embargos_on_data_access(ctx): "COLL_NAME, META_COLL_ATTR_VALUE", "COLL_NAME like '" + "/{}/home/vault-%".format(zone) + "'" " AND META_COLL_ATTR_NAME = '" + constants.UUORGMETADATAPREFIX + 'lift_embargo_date' + "'" - " AND META_COLL_ATTR_VALUE > '{}'".format(datetime.now().strftime('%Y-%m-%d')), + " AND META_COLL_ATTR_VALUE <= '{}'".format(datetime.now().strftime('%Y-%m-%d')), genquery.AS_LIST, ctx ) for row in iter: vault_package = row[0] - log.write(ctx, "Lift embargo for package: " + vault_package) + log.write(ctx, "Lift embargo for vault package: " + vault_package) set_update_publication_state(ctx, vault_package) - publication_status = process_publication(ctx, vault_package) + process_publication(ctx, vault_package) - # If all went well remove the lift embargo attribute so it will not be selected again the next time around. - if publication_status == 'OK': - # Only remove when embargo was lifted successfully. - # Not removing will ensure the entire process is repeated again next time around - avu.rmw_from_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'lift_embargo_date', '%') + return 'OK' diff --git a/tools/lift-embargos.r b/tools/lift-embargos.r index 94f6d15c8..7f121d893 100644 --- a/tools/lift-embargos.r +++ b/tools/lift-embargos.r @@ -1,20 +1,16 @@ -# Lift embargo on data access when embargo date is passed +#!/usr/bin/irule -r irods_rule_engine_plugin-irods_rule_language-instance -F +# +# Lift embargo on data access when embargo date is passed. run { - uuGetUserType("$userNameClient#$rodsZoneClient", *usertype); + uuGetUserType("$userNameClient#$rodsZoneClient", *usertype); - if (*usertype != "rodsadmin") { - failmsg(-1, "This script needs to be run by a rodsadmin"); - } - - # Retrieve current timestamp. - msiGetIcatTime(*timestamp, "human"); - writeLine('stdout', '[' ++ *timestamp ++ '] Start finding data access under embargo that must be lifted'); - - *result = rule_lift_embargos_on_data_access(); - - writeLine('stdout', 'Status: Finished finding of data under embargo that must be lifted'); - writeLine('stdout', *result); + if (*usertype != "rodsadmin") { + failmsg(-1, "This script needs to be run by a rodsadmin"); + } + writeLine('stdout', 'Start finding vault packages under embargo that can be lifted'); + *result = rule_lift_embargos_on_data_access(); + writeLine('stdout', 'Finished finding vault packages under embargo that can be lifted with status *result'); } input null output ruleExecOut From 8dd57b5e8f487791e159ff63cbdd0077dff18bea Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Mon, 25 Sep 2023 09:08:48 +0200 Subject: [PATCH 30/30] YDA-5282: improve documentation --- publication.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/publication.py b/publication.py index ff5e2201c..3cac0f6b6 100644 --- a/publication.py +++ b/publication.py @@ -590,7 +590,8 @@ def set_access_restrictions(ctx, vault_package, publication_state): This function is called when (re)publishing a vault package. The embargo date of a package is essential determining access. - ALS EMBARGO => zet embargo end date in lift_embargo_date zodat later wordt opgepakt door cronjob + If current date < embargo end date, then set end date in `ord_lift_embargo_date` + to be picked up by lift embargo cronjob. :param ctx: Combined type of a callback and rei struct :param vault_package: Path to the package in the vault