From 9ecf5c8bc442147bdc1575241a4ce7f4dd1e1f8c Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 10 Dec 2024 12:57:34 +0100 Subject: [PATCH] YDA-5992: add type annotations --- .../workflows/api-and-integration-tests.yml | 2 +- .github/workflows/python.yml | 6 +- admin.py | 2 +- browse.py | 58 +-- data_access_token.py | 23 +- datacite.py | 9 +- datarequest.py | 403 +++++++----------- deposit.py | 25 +- epic.py | 7 +- folder.py | 121 +++--- groups.py | 111 ++--- groups_import.py | 24 +- json_datacite.py | 99 ++--- json_landing_page.py | 19 +- mail.py | 15 +- meta.py | 115 +++-- meta_form.py | 13 +- notifications.py | 27 +- policies.py | 132 ++++-- policies_datamanager.py | 7 +- policies_datapackage_status.py | 21 +- policies_datarequest_status.py | 8 +- policies_folder_status.py | 22 +- policies_intake.py | 16 +- policies_utils.py | 9 +- provenance.py | 15 +- publication.py | 101 +++-- publication_troubleshoot.py | 2 +- replication.py | 28 +- research.py | 60 +-- resources.py | 45 +- revision_strategies.py | 48 ++- revision_utils.py | 28 +- revisions.py | 108 +++-- schema.py | 29 +- schema_transformation.py | 26 +- schema_transformations.py | 23 +- schema_transformations_utils.py | 15 +- settings.py | 8 +- setup.cfg | 14 + sram.py | 21 +- vault.py | 292 +++++-------- vault_archive.py | 46 +- vault_download.py | 16 +- 44 files changed, 1082 insertions(+), 1137 deletions(-) diff --git a/.github/workflows/api-and-integration-tests.yml b/.github/workflows/api-and-integration-tests.yml index e69d7b878..530256846 100644 --- a/.github/workflows/api-and-integration-tests.yml +++ b/.github/workflows/api-and-integration-tests.yml @@ -114,7 +114,7 @@ jobs: cd tests nohup bash -c 'while true ; do sleep 5 ; ../yoda/docker/run-cronjob.sh copytovault >> ../copytovault.log 2>&1 ; ../yoda/docker/run-cronjob.sh publication >> ../publication.log 2>&1 ; done' & test -d mycache || mkdir -p mycache - python3 -m pytest --skip-ui --datarequest --deposit -o cache_dir=mycache --environment environments/docker.json + python3 -m pytest --skip-ui --deposit -o cache_dir=mycache --environment environments/docker.json cat ../copytovault.log cat ../publication.log diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 89a7b1704..3a5b5afa7 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -19,12 +19,16 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell types-requests + python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell mypy types-requests types-python-dateutil - name: Lint with flake8 run: | flake8 --statistics + - name: Check static typing + run: | + mypy . --explicit-package-bases + - name: Check code for common misspellings run: | codespell -q 3 --skip="*.r,*.xsd,*.json" || true diff --git a/admin.py b/admin.py index b869cf880..988a43096 100644 --- a/admin.py +++ b/admin.py @@ -11,7 +11,7 @@ @api.make() -def api_admin_has_access(ctx): +def api_admin_has_access(ctx: rule.Context) -> api.Result: """ Checks if the user has admin access based on user rights or membership in admin-priv group. diff --git a/browse.py b/browse.py index e9a80bf86..aff9e9e1c 100644 --- a/browse.py +++ b/browse.py @@ -5,6 +5,7 @@ import re from collections import OrderedDict +from typing import Dict import magic from genquery import AS_DICT, Query @@ -18,13 +19,13 @@ @api.make() -def api_browse_folder(ctx, - coll='/', - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_browse_folder(ctx: rule.Context, + coll: str = '/', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. :param ctx: Combined type of a callback and rei struct @@ -37,7 +38,7 @@ def api_browse_folder(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} if 'DATA_NAME' in x and 'META_DATA_ATTR_VALUE' in x: @@ -104,13 +105,13 @@ def transform(row): @api.make() -def api_browse_collections(ctx, - coll='/', - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_browse_collections(ctx: rule.Context, + coll: str = '/', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. This function browses a folder and only looks at the collections in it. No dataobjects. @@ -126,7 +127,7 @@ def api_browse_collections(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -184,13 +185,13 @@ def transform(row): @api.make() -def api_search(ctx, - search_string, - search_type='filename', - sort_on='name', - sort_order='asc', - offset=0, - limit=10): +def api_search(ctx: rule.Context, + search_string: str, + search_type: str = 'filename', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10) -> api.Result: """Get paginated search results, including size/modify date/location information. :param ctx: Combined type of a callback and rei struct @@ -203,7 +204,7 @@ def api_search(ctx, :returns: Dict with paginated search results """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -216,8 +217,7 @@ def transform(row): 'type': 'data', 'size': int(x['DATA_SIZE']), 'modify_time': int(x['DATA_MODIFY_TIME'])} - - if 'COLL_NAME' in x: + elif 'COLL_NAME' in x: _, _, path, subpath = pathutil.info(x['COLL_NAME']) if subpath != '': path = path + "/" + subpath @@ -225,6 +225,8 @@ def transform(row): return {'name': "/{}".format(path), 'type': 'coll', 'modify_time': int(x['COLL_MODIFY_TIME'])} + else: + return {} # Replace, %, _ and \ since iRODS does not handle those correctly. # HdR this can only be done in a situation where search_type is NOT status! @@ -285,7 +287,7 @@ def transform(row): ('items', datas)]) -def _filter_vault_deposit_index(row): +def _filter_vault_deposit_index(row: Dict) -> bool: """This internal function filters out index collections in deposit vault collections. These collections are used internally by Yoda for indexing data package metadata, and should not be displayed. @@ -302,7 +304,7 @@ def _filter_vault_deposit_index(row): @api.make() -def api_load_text_obj(ctx, file_path='/'): +def api_load_text_obj(ctx: rule.Context, file_path: str = '/') -> api.Result: """Retrieve a text file (as a string) in either the research, deposit, or vault space. :param ctx: Combined type of a callback and rei struct diff --git a/data_access_token.py b/data_access_token.py index 29afd5537..1425e6384 100644 --- a/data_access_token.py +++ b/data_access_token.py @@ -7,6 +7,7 @@ import secrets from datetime import datetime, timedelta from traceback import print_exc +from typing import List from pysqlcipher3 import dbapi2 as sqlite3 @@ -19,7 +20,7 @@ @api.make() -def api_token_generate(ctx, label=None): +def api_token_generate(ctx: rule.Context, label: str = "") -> api.Result: """Generates a token for user authentication. :param ctx: Combined type of a callback and rei struct @@ -27,7 +28,7 @@ def api_token_generate(ctx, label=None): :returns: Generated token or API error """ - def generate_token(): + def generate_token() -> str: length = int(config.token_length) token = secrets.token_urlsafe(length) return token[:length] @@ -63,14 +64,13 @@ def generate_token(): @api.make() -def api_token_load(ctx): +def api_token_load(ctx: rule.Context) -> api.Result: """Loads valid tokens of user. :param ctx: Combined type of a callback and rei struct :returns: Valid tokens """ - if not token_database_initialized(): return api.Error('DatabaseError', 'Internal error: token database unavailable') @@ -83,8 +83,8 @@ def api_token_load(ctx): conn.execute("PRAGMA key='%s'" % (config.token_database_password)) for row in conn.execute('''SELECT label, exp_time FROM tokens WHERE user=:user_id AND exp_time > :now''', {"user_id": user_id, "now": datetime.now()}): - exp_time = datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S.%f') - exp_time = exp_time.strftime('%Y-%m-%d %H:%M:%S') + date_time = datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S.%f') + exp_time = date_time.strftime('%Y-%m-%d %H:%M:%S') result.append({"label": row[0], "exp_time": exp_time}) except Exception: print_exc() @@ -98,7 +98,7 @@ def api_token_load(ctx): @api.make() -def api_token_delete(ctx, label): +def api_token_delete(ctx: rule.Context, label: str) -> api.Result: """Deletes a token of the user. :param ctx: Combined type of a callback and rei struct @@ -130,10 +130,10 @@ def api_token_delete(ctx, label): @api.make() -def api_token_delete_expired(ctx): +def api_token_delete_expired(ctx: rule.Context) -> api.Result: """Deletes expired tokens of current user - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :returns: Status of token deletion """ @@ -160,8 +160,9 @@ def api_token_delete_expired(ctx): return result -def get_all_tokens(ctx): +def get_all_tokens(ctx: rule.Context) -> List: """Retrieve all valid tokens. + :param ctx: Combined type of a callback and rei struct :returns: Valid tokens @@ -192,7 +193,7 @@ def get_all_tokens(ctx): return result -def token_database_initialized(): +def token_database_initialized() -> bool: """Checks whether token database has been initialized :returns: Boolean value diff --git a/datacite.py b/datacite.py index 3f5146f56..4c2605ef3 100644 --- a/datacite.py +++ b/datacite.py @@ -5,13 +5,14 @@ import random import string +from typing import Dict import requests from util import * -def metadata_post(ctx, payload): +def metadata_post(payload: Dict) -> int: """Register DOI metadata with DataCite.""" url = "{}/dois".format(config.datacite_rest_api_url) auth = (config.datacite_username, config.datacite_password) @@ -27,7 +28,7 @@ def metadata_post(ctx, payload): return response.status_code -def metadata_put(ctx, doi, payload): +def metadata_put(doi: str, payload: str) -> int: """Update metadata with DataCite.""" url = "{}/dois/{}".format(config.datacite_rest_api_url, doi) auth = (config.datacite_username, config.datacite_password) @@ -43,7 +44,7 @@ def metadata_put(ctx, doi, payload): return response.status_code -def metadata_get(ctx, doi): +def metadata_get(doi: str) -> int: """Check with DataCite if DOI is available.""" url = "{}/dois/{}".format(config.datacite_rest_api_url, doi) auth = (config.datacite_username, config.datacite_password) @@ -58,7 +59,7 @@ def metadata_get(ctx, doi): return response.status_code -def generate_random_id(ctx, length): +def generate_random_id(length: int) -> str: """Generate random ID for DOI.""" characters = string.ascii_uppercase + string.digits return ''.join(random.choice(characters) for x in range(int(length))) diff --git a/datarequest.py b/datarequest.py index 71b98fd14..ec5b73896 100644 --- a/datarequest.py +++ b/datarequest.py @@ -10,6 +10,7 @@ from collections import OrderedDict from datetime import datetime from enum import Enum +from typing import Dict, List, Optional import jsonschema from genquery import AS_DICT, AS_LIST, Query, row_iterator @@ -198,14 +199,14 @@ class status(Enum): ('DTA_SIGNED', 'DATA_READY')]] -def status_transition_allowed(ctx, current_status, new_status): +def status_transition_allowed(ctx: rule.Context, current_status: status, new_status: status) -> bool: transition = (current_status, new_status) return transition in status_transitions -def status_set(ctx, request_id, status): - """Set the status of a data request +def status_set(ctx: rule.Context, request_id: str, status: status) -> None: + """Set the status of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -214,8 +215,8 @@ def status_set(ctx, request_id, status): metadata_set(ctx, request_id, "status", status.value) -def status_get_from_path(ctx, path): - """Get the status of a datarequest from a path +def status_get_from_path(ctx: rule.Context, path: str) -> status: + """Get the status of a datarequest from a path. :param ctx: Combined type of a callback and rei struct :param path: Path of the datarequest collection @@ -228,8 +229,8 @@ def status_get_from_path(ctx, path): return status_get(ctx, request_id) -def status_get(ctx, request_id): - """Get the status of a data request +def status_get(ctx: rule.Context, request_id: str) -> status: + """Get the status of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -256,8 +257,8 @@ def status_get(ctx, request_id): raise error.UUError("Could not unambiguously determine the current status for datarequest <{}>".format(request_id)) -def type_get(ctx, request_id): - """Get the type of a data request +def type_get(ctx: rule.Context, request_id: str) -> type: + """Get the type of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -281,8 +282,7 @@ def type_get(ctx, request_id): return datarequest_type -def available_documents_get(ctx, request_id, datarequest_type, datarequest_status): - +def available_documents_get(ctx: rule.Context, request_id: str, datarequest_type: str, datarequest_status: str) -> List: # Construct list of existing documents available_documents = [] if datarequest_type == type.REGULAR.value: @@ -327,15 +327,14 @@ def available_documents_get(ctx, request_id, datarequest_type, datarequest_statu # Helper functions # ################################################### -def metadata_set(ctx, request_id, key, value): - """Set an arbitrary metadata field on a data request +def metadata_set(ctx: rule.Context, request_id: str, key: str, value: str) -> None: + """Set an arbitrary metadata field on a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param key: Key of the metadata field :param value: Value of the metadata field """ - # Construct path to the collection of the data request coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -349,7 +348,7 @@ def metadata_set(ctx, request_id, key, value): ctx.adminDatarequestActions() -def generate_request_id(ctx): +def generate_request_id(ctx: rule.Context) -> int: coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) max_request_id = 0 @@ -362,38 +361,35 @@ def generate_request_id(ctx): @api.make() -def api_datarequest_action_permitted(ctx, request_id, roles, statuses): - """Wrapper around datarequest_action_permitted +def api_datarequest_action_permitted(ctx: rule.Context, request_id: str, roles: List, statuses: List) -> api.Result: + """Wrapper around datarequest_action_permitted. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request + :param roles: List of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) + :param statuses: List of permitted current data request statuses or None (check skipped) - :param roles: Array of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) - :param statuses: Array of permitted current data request statuses or None (check skipped) - - :returns: True if permitted, False if not - :rtype: Boolean + :returns: True if permitted, False if not """ # Convert statuses to list of status enumeration elements if statuses is not None: - def get_status(stat): + def get_status(stat: str) -> status: return status[stat] statuses = list(map(get_status, statuses)) return datarequest_action_permitted(ctx, request_id, roles, statuses) -def datarequest_action_permitted(ctx, request_id, roles, statuses): - """Check if current user and data request status meet specified restrictions +def datarequest_action_permitted(ctx: rule.Context, request_id: str, roles: List, statuses: Optional[List]) -> bool: + """Check if current user and data request status meet specified restrictions. - :param ctx: Combined type of a callback and rei struct - :param request_id: Unique identifier of the data request - :param roles: Array of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) - :param statuses: Array of permitted current data request statuses or None (check skipped) + :param ctx: Combined type of a callback and rei struct + :param request_id: Unique identifier of the data request + :param roles: List of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) + :param statuses: List of permitted current data request statuses or None (check skipped) - :returns: True if permitted, False if not - :rtype: Boolean + :returns: True if permitted, False if not """ try: # Force conversion of request_id to string @@ -428,28 +424,26 @@ def datarequest_action_permitted(ctx, request_id, roles, statuses): @api.make() -def api_datarequest_roles_get(ctx, request_id=None): - """Get roles of invoking user +def api_datarequest_roles_get(ctx: rule.Context, request_id: Optional[str] = None) -> api.Result: + """Get roles of invoking user. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request (OWN and REV roles will not be checked if this parameter is missing) - :returns: Array of user roles - :rtype: Array + :returns: List of user roles """ return datarequest_roles_get(ctx, request_id) -def datarequest_roles_get(ctx, request_id): - """Get roles of invoking user +def datarequest_roles_get(ctx: rule.Context, request_id: Optional[str] = None) -> List: + """Get roles of invoking user. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request (OWN and REV roles will not be checked if this parameter is missing) - :returns: Array of user roles - :rtype: Array + :returns: List of user roles """ roles = [] if user.is_member_of(ctx, GROUP_PM): @@ -467,27 +461,24 @@ def datarequest_roles_get(ctx, request_id): return roles -def datarequest_is_owner(ctx, request_id): - """Check if the invoking user is also the owner of a given data request +def datarequest_is_owner(ctx: rule.Context, request_id: str) -> bool: + """Check if the invoking user is also the owner of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :return: True if user_name is owner of specified data request else False - :rtype: bool + :return: True if user_name is owner of specified data request else False """ return datarequest_owner_get(ctx, request_id) == user.name(ctx) -def datarequest_owner_get(ctx, request_id): - """Get the account name (i.e. email address) of the owner of a data request +def datarequest_owner_get(ctx: rule.Context, request_id: str) -> Optional[str]: + """Get the account name (i.e. email address) of the owner of a data request. :param ctx: Combined type of a callback and a rei struct :param request_id: Unique identifier of the data request - :type request_id: str - :return: Account name of data request owner - :rtype: string + :return: Account name of data request owner """ # Construct path to the data request file_path = "/{}/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id, DATAREQUEST @@ -500,8 +491,8 @@ def datarequest_owner_get(ctx, request_id): return None -def datarequest_is_reviewer(ctx, request_id, pending=False): - """Check if a user is assigned as reviewer to a data request +def datarequest_is_reviewer(ctx: rule.Context, request_id: str, pending: bool = False) -> bool: + """Check if a user is assigned as reviewer to a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -525,8 +516,8 @@ def datarequest_is_reviewer(ctx, request_id, pending=False): return is_reviewer -def datarequest_reviewers_get(ctx, request_id, pending=False): - """Return a list of users assigned as reviewers to a data request +def datarequest_reviewers_get(ctx: rule.Context, request_id: str, pending: bool = False) -> List[str]: + """Return a list of users assigned as reviewers to a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -557,12 +548,12 @@ def datarequest_reviewers_get(ctx, request_id, pending=False): @api.make() -def api_datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): +def api_datarequest_schema_get(ctx: rule.Context, schema_name: str, version: str = SCHEMA_VERSION) -> api.Result: return datarequest_schema_get(ctx, schema_name, version) -def datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): - """Get schema and UI schema of a datarequest form +def datarequest_schema_get(ctx: rule.Context, schema_name: str, version: str = SCHEMA_VERSION) -> api.Result: + """Get schema and UI schema of a datarequest form. :param ctx: Combined type of a callback and rei struct :param schema_name: Name of schema @@ -587,13 +578,13 @@ def datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): @api.make() -def api_datarequest_resubmission_id_get(ctx, request_id): - """Given a request ID, get the request ID of the associated resubmitted data request +def api_datarequest_resubmission_id_get(ctx: rule.Context, request_id: str) -> api.Result: + """Given a request ID, get the request ID of the associated resubmitted data request. - :param ctx: Combined type of a callback and rei struct - :param request_id: Unique identifier of the data request + :param ctx: Combined type of a callback and rei struct + :param request_id: Unique identifier of the data request - :returns: String containing the request ID of the resubmitted data request + :returns: String containing the request ID of the resubmitted data request """ coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) coll_path = list(Query(ctx, ['COLL_NAME'], "COLL_PARENT_NAME = '{}' AND DATA_NAME = '{}' AND META_DATA_ATTR_NAME = 'previous_request_id' AND META_DATA_ATTR_VALUE in '{}'".format(coll, DATAREQUEST + JSON_EXT, request_id), output=AS_DICT)) @@ -605,14 +596,14 @@ def api_datarequest_resubmission_id_get(ctx, request_id): return api.Error("metadata_read_error", "Not exactly 1 match for when searching for data requests with previous_request_id = {}".format(request_id)) -def datarequest_provenance_write(ctx, request_id, request_status): - """Write the timestamp of a status transition to a provenance log +def datarequest_provenance_write(ctx: rule.Context, request_id: str, request_status: status) -> api.Result: + """Write the timestamp of a status transition to a provenance log. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param request_status: Status of which to write a timestamp - :returns: Nothing + :returns: Nothing or API error """ # Check if request ID is valid if re.search(r"^\d+$", request_id) is None: @@ -644,7 +635,7 @@ def datarequest_provenance_write(ctx, request_id, request_status): return api.Error("write_error", "Could not write timestamp to provenance log: {}.".format(e)) -def datarequest_data_valid(ctx, data, schema_name=False, schema=False): +def datarequest_data_valid(ctx: rule.Context, data: Dict, schema_name: Optional[str] = None, schema: Optional[str] = None) -> bool: """Check if form data contains no errors Default mode of operation is to provide schema data and the schema name of the schema against @@ -665,7 +656,7 @@ def datarequest_data_valid(ctx, data, schema_name=False, schema=False): :param schema: JSON schema against which to validate the form data (in case a default schema doesn't suffice) - :returns: Boolean indicating if datarequest is valid or API error + :returns: Boolean indicating if datarequest is valid """ # Check if a schema is specified if not (schema_name or schema): @@ -682,11 +673,10 @@ def datarequest_data_valid(ctx, data, schema_name=False, schema=False): return len(errors) == 0 except error.UUJsonValidationError: # File may be missing or not valid JSON - return api.Error("validation_error", - "{} form data could not be validated against its schema.".format(schema_name)) + return False -def cc_email_addresses_get(contact_object): +def cc_email_addresses_get(contact_object: Dict) -> Optional[str]: try: cc = contact_object['cc_email_addresses'] return cc.replace(' ', '') @@ -695,7 +685,7 @@ def cc_email_addresses_get(contact_object): @rule.make(inputs=[], outputs=[0, 1]) -def rule_datarequest_review_period_expiration_check(ctx): +def rule_datarequest_review_period_expiration_check(ctx: rule.Context) -> None: coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) criteria = "COLL_PARENT_NAME = '{}' AND DATA_NAME = '{}' AND META_DATA_ATTR_NAME = 'endOfReviewPeriod' AND META_DATA_ATTR_VALUE < '{}' AND META_DATA_ATTR_NAME = 'status' AND META_DATA_ATTR_VALUE = 'UNDER_REVIEW'".format(coll, DATAREQUEST + JSON_EXT, int(time.time())) ccols = ['COLL_NAME'] @@ -704,7 +694,7 @@ def rule_datarequest_review_period_expiration_check(ctx): datarequest_process_expired_review_periods(ctx, [result['COLL_NAME'].split('/')[-1] for result in list(qcoll)]) -def datarequest_sync_avus(ctx, request_id): +def datarequest_sync_avus(ctx: rule.Context, request_id: str) -> None: """Sometimes data requests are manually edited in place (e.g. for small textual changes). This in-place editing is done on the datarequest.json file. @@ -741,8 +731,13 @@ def datarequest_sync_avus(ctx, request_id): ################################################### @api.make() -def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limit=10, - archived=False, dacrequests=True): +def api_datarequest_browse(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + archived: bool = False, + dacrequests: bool = True) -> api.Result: """Get paginated datarequests, including size/modify date information. :param ctx: Combined type of a callback and rei struct @@ -755,7 +750,7 @@ def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limi :param dacrequests: If true, show a DAC member's own data requests (instead of data requests to be reviewed - :returns: Dict with paginated datarequests + :returns: Dict with paginated datarequests """ # Convert parameters that couldn't be passed as actual boolean values to booleans archived = archived == "True" @@ -764,7 +759,7 @@ def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limi dac_member = user.is_member_of(ctx, GROUP_DAC) coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -773,14 +768,14 @@ def transform(row): 'create_time': int(x['COLL_CREATE_TIME']), 'status': x['META_DATA_ATTR_VALUE']} - def transform_title(row): + def transform_title(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} return {'id': x['COLL_NAME'].split('/')[-1], 'title': x['META_DATA_ATTR_VALUE']} - def transform_status(row): + def transform_status(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -852,26 +847,25 @@ def transform_status(row): return OrderedDict([('total', qcoll.total_rows()), ('items', colls)]) -def datarequest_process_expired_review_periods(ctx, request_ids): +def datarequest_process_expired_review_periods(ctx: rule.Context, request_ids: List) -> None: """Process expired review periods by setting their status to REVIEWED. :param ctx: Combined type of a callback and rei struct - :param request_ids: Array of unique data request identifiers + :param request_ids: List of unique data request identifiers """ for request_id in request_ids: status_set(ctx, request_id, status.REVIEWED) -def file_write_and_lock(ctx, coll_path, filename, data, readers): +def file_write_and_lock(ctx: rule.Context, coll_path: str, filename: str, data: Dict, readers: List[str]) -> None: """Grant temporary write permission and write file to disk. :param ctx: Combined type of a callback and rei struct :param coll_path: Path to collection of file :param filename: Name of file :param data: The data to be written to disk - :param readers: Array of user names that should be given read access to the file + :param readers: List of user names that should be given read access to the file """ - file_path = "{}/{}".format(coll_path, filename) # Grant temporary write permission @@ -896,7 +890,7 @@ def file_write_and_lock(ctx, coll_path, filename, data, readers): @api.make() -def api_datarequest_submit(ctx, data, draft, draft_request_id=None): +def api_datarequest_submit(ctx: rule.Context, data: Dict, draft: bool, draft_request_id: Optional[str] = None) -> api.Result: """Persist a data request to disk. :param ctx: Combined type of a callback and rei struct @@ -935,7 +929,7 @@ def api_datarequest_submit(ctx, data, draft, draft_request_id=None): request_id = draft_request_id else: # Generate request ID and construct data request collection path. - request_id = generate_request_id(ctx) + request_id = str(generate_request_id(ctx)) # Construct data request collection and file path. coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -1030,7 +1024,7 @@ def api_datarequest_submit(ctx, data, draft, draft_request_id=None): @api.make() -def api_datarequest_get(ctx, request_id): +def api_datarequest_get(ctx: rule.Context, request_id: int) -> api.Result: """Retrieve a data request. :param ctx: Combined type of a callback and rei struct @@ -1039,25 +1033,25 @@ def api_datarequest_get(ctx, request_id): :returns: Dict with request JSON and status or API error on failure """ # Force conversion of request_id to string - request_id = str(request_id) + request_id_str = str(request_id) # Permission check - datarequest_action_permitted(ctx, request_id, ["PM", "DM", "DAC", "OWN"], None) + datarequest_action_permitted(ctx, request_id_str, ["PM", "DM", "DAC", "OWN"], None) # Get request type try: - datarequest_type = type_get(ctx, request_id).value + datarequest_type = type_get(ctx, request_id_str).value except Exception as e: return api.Error("datarequest_type_fail", "Error: {}".format(e)) # Get request status - datarequest_status = status_get(ctx, request_id).value + datarequest_status = status_get(ctx, request_id_str).value # Get list of available documents - datarequest_available_documents = available_documents_get(ctx, request_id, datarequest_type, datarequest_status) + datarequest_available_documents = available_documents_get(ctx, request_id_str, datarequest_type, datarequest_status) # Get request - datarequest_json = datarequest_get(ctx, request_id) + datarequest_json = datarequest_get(ctx, request_id_str) datarequest = json.loads(datarequest_json) # Get request schema version @@ -1080,7 +1074,7 @@ def api_datarequest_get(ctx, request_id): 'requestStatus': datarequest_status, 'requestAvailableDocuments': datarequest_available_documents} -def datarequest_get(ctx, request_id): +def datarequest_get(ctx: rule.Context, request_id: str) -> str | api.Error: """Retrieve a data request. :param ctx: Combined type of a callback and rei struct @@ -1088,9 +1082,6 @@ def datarequest_get(ctx, request_id): :returns: Datarequest JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename and filepath coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = DATAREQUEST + JSON_EXT @@ -1104,18 +1095,15 @@ def datarequest_get(ctx, request_id): @api.make() -def api_datarequest_attachment_upload_permission(ctx, request_id, action): +def api_datarequest_attachment_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("grantread" or "revoke") - :returns: Nothing + :returns: Nothing """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PENDING_ATTACHMENTS]) @@ -1131,16 +1119,13 @@ def api_datarequest_attachment_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_attachment_post_upload_actions(ctx, request_id, filename): +def api_datarequest_attachment_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the attachment to the owner of the associated data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of attachment """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PENDING_ATTACHMENTS]) @@ -1152,31 +1137,28 @@ def api_datarequest_attachment_post_upload_actions(ctx, request_id, filename): @api.make() -def api_datarequest_attachments_get(ctx, request_id): - """Get all attachments of a given data request +def api_datarequest_attachments_get(ctx: rule.Context, request_id: str) -> api.Result: + """Get all attachments of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: List of attachment filenames + :returns: List of attachment filenames """ return datarequest_attachments_get(ctx, request_id) -def datarequest_attachments_get(ctx, request_id): - """Get all attachments of a given data request +def datarequest_attachments_get(ctx: rule.Context, request_id: str) -> List: + """Get all attachments of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: List of attachment filenames + :returns: List of attachment filenames """ - def get_filename(file_path): + def get_filename(file_path: str) -> str: return file_path.split('/')[-1] - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "DAC", "OWN"], None) @@ -1187,7 +1169,7 @@ def get_filename(file_path): @api.make() -def api_datarequest_attachments_submit(ctx, request_id): +def api_datarequest_attachments_submit(ctx: rule.Context, request_id: str) -> api.Result: """Finalize the submission of uploaded attachments :param ctx: Combined type of a callback and rei struct @@ -1209,7 +1191,7 @@ def api_datarequest_attachments_submit(ctx, request_id): @api.make() -def api_datarequest_preliminary_review_submit(ctx, data, request_id): +def api_datarequest_preliminary_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a preliminary review to disk. :param ctx: Combined type of a callback and rei struct @@ -1218,9 +1200,6 @@ def api_datarequest_preliminary_review_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, PR_REVIEW): return api.Error("validation_fail", @@ -1255,7 +1234,7 @@ def api_datarequest_preliminary_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_preliminary_review_get(ctx, request_id): +def api_datarequest_preliminary_review_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preliminary review. :param ctx: Combined type of a callback and rei struct @@ -1263,16 +1242,13 @@ def api_datarequest_preliminary_review_get(ctx, request_id): :returns: Preliminary review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "REV"], None) return datarequest_preliminary_review_get(ctx, request_id) -def datarequest_preliminary_review_get(ctx, request_id): +def datarequest_preliminary_review_get(ctx: rule.Context, request_id: str) -> str | api.Result: """Retrieve a preliminary review. :param ctx: Combined type of a callback and rei struct @@ -1280,9 +1256,6 @@ def datarequest_preliminary_review_get(ctx, request_id): :returns: Preliminary review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = PR_REVIEW + JSON_EXT @@ -1296,7 +1269,7 @@ def datarequest_preliminary_review_get(ctx, request_id): @api.make() -def api_datarequest_datamanager_review_submit(ctx, data, request_id): +def api_datarequest_datamanager_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a datamanager review to disk. :param ctx: Combined type of a callback and rei struct @@ -1305,9 +1278,6 @@ def api_datarequest_datamanager_review_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, DM_REVIEW): return api.Error("validation_fail", @@ -1343,7 +1313,7 @@ def api_datarequest_datamanager_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_datamanager_review_get(ctx, request_id): +def api_datarequest_datamanager_review_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a data manager review. :param ctx: Combined type of a callback and rei struct @@ -1351,9 +1321,6 @@ def api_datarequest_datamanager_review_get(ctx, request_id): :returns: Datamanager review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "REV"], None) @@ -1361,7 +1328,7 @@ def api_datarequest_datamanager_review_get(ctx, request_id): return datarequest_datamanager_review_get(ctx, request_id) -def datarequest_datamanager_review_get(ctx, request_id): +def datarequest_datamanager_review_get(ctx: rule.Context, request_id: str) -> str | api.Result: """Retrieve a data manager review. :param ctx: Combined type of a callback and rei struct @@ -1369,9 +1336,6 @@ def datarequest_datamanager_review_get(ctx, request_id): :returns: Datamanager review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = DM_REVIEW + JSON_EXT @@ -1385,11 +1349,11 @@ def datarequest_datamanager_review_get(ctx, request_id): @api.make() -def api_datarequest_dac_members_get(ctx, request_id): +def api_datarequest_dac_members_get(ctx: rule.Context, request_id: str) -> api.Result: return datarequest_dac_members_get(ctx, request_id) -def datarequest_dac_members_get(ctx, request_id): +def datarequest_dac_members_get(ctx: rule.Context, request_id: str) -> List: """Get list of DAC members :param ctx: Combined type of a callback and rei struct @@ -1408,7 +1372,7 @@ def datarequest_dac_members_get(ctx, request_id): @api.make() -def api_datarequest_assignment_submit(ctx, data, request_id): +def api_datarequest_assignment_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist an assignment to disk. :param ctx: Combined type of a callback and rei struct @@ -1417,9 +1381,6 @@ def api_datarequest_assignment_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema dac_members = datarequest_dac_members_get(ctx, request_id) schema = datarequest_schema_get(ctx, ASSIGNMENT) @@ -1475,7 +1436,7 @@ def api_datarequest_assignment_submit(ctx, data, request_id): return api.Error("InvalidData", "Invalid value for 'decision' key in datamanager review review JSON data.") -def assign_request(ctx, assignees, request_id): +def assign_request(ctx: rule.Context, assignees: str, request_id: str) -> None: """Assign a data request to one or more DAC members for review. :param ctx: Combined type of a callback and rei struct @@ -1509,7 +1470,7 @@ def assign_request(ctx, assignees, request_id): @api.make() -def api_datarequest_assignment_get(ctx, request_id): +def api_datarequest_assignment_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve assignment. :param ctx: Combined type of a callback and rei struct @@ -1517,16 +1478,13 @@ def api_datarequest_assignment_get(ctx, request_id): :returns: Datarequest assignment JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], None) return datarequest_assignment_get(ctx, request_id) -def datarequest_assignment_get(ctx, request_id): +def datarequest_assignment_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an assignment :param ctx: Combined type of a callback and rei struct @@ -1550,7 +1508,7 @@ def datarequest_assignment_get(ctx, request_id): @api.make() -def api_datarequest_review_submit(ctx, data, request_id): +def api_datarequest_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a data request review to disk. :param ctx: Combined type of a callback and rei struct @@ -1559,9 +1517,6 @@ def api_datarequest_review_submit(ctx, data, request_id): :returns: A JSON dict with status info for the front office """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, REVIEW): return api.Error("validation_fail", @@ -1616,7 +1571,7 @@ def api_datarequest_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_reviews_get(ctx, request_id): +def api_datarequest_reviews_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a data request review. :param ctx: Combined type of a callback and rei struct @@ -1624,9 +1579,6 @@ def api_datarequest_reviews_get(ctx, request_id): :returns: Datarequest review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "REV"], None) @@ -1650,7 +1602,7 @@ def api_datarequest_reviews_get(ctx, request_id): @api.make() -def api_datarequest_evaluation_submit(ctx, data, request_id): +def api_datarequest_evaluation_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist an evaluation to disk. :param ctx: Combined type of a callback and rei struct @@ -1658,10 +1610,9 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): :param request_id: Unique identifier of the data request :returns: API status - """ - # Force conversion of request_id to string - request_id = str(request_id) + :raises UUError: If datarequest owner could not be determined + """ # Validate data against schema if not datarequest_data_valid(ctx, data, EVALUATION): return api.Error("validation_fail", @@ -1676,8 +1627,11 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): # Write approval conditions to disk if applicable if 'approval_conditions' in data: try: + datarequest_owner = datarequest_owner_get(ctx, request_id) + if datarequest_owner is None: + raise error.UUError file_write_and_lock(ctx, coll_path, APPROVAL_CONDITIONS + JSON_EXT, - data['approval_conditions'], [datarequest_owner_get(ctx, request_id)]) + data['approval_conditions'], [datarequest_owner]) except error.UUError: return api.Error('write_error', 'Could not write approval conditions to disk') @@ -1709,7 +1663,7 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): @api.make() -def api_datarequest_approval_conditions_get(ctx, request_id): +def api_datarequest_approval_conditions_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve approval conditions :param ctx: Combined type of a callback and rei struct @@ -1717,9 +1671,6 @@ def api_datarequest_approval_conditions_get(ctx, request_id): :returns: Approval conditions JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], None) @@ -1741,7 +1692,7 @@ def api_datarequest_approval_conditions_get(ctx, request_id): @api.make() -def api_datarequest_evaluation_get(ctx, request_id): +def api_datarequest_evaluation_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an evaluation. :param ctx: Combined type of a callback and rei struct @@ -1749,16 +1700,13 @@ def api_datarequest_evaluation_get(ctx, request_id): :returns: Evaluation JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DAC"], None) return datarequest_evaluation_get(ctx, request_id) -def datarequest_evaluation_get(ctx, request_id): +def datarequest_evaluation_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an evaluation :param ctx: Combined type of a callback and rei struct @@ -1766,9 +1714,6 @@ def datarequest_evaluation_get(ctx, request_id): :returns: Evaluation JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = EVALUATION + JSON_EXT @@ -1781,7 +1726,7 @@ def datarequest_evaluation_get(ctx, request_id): return api.Error("ReadError", "Could not get evaluation data.") -def datarequest_feedback_write(ctx, request_id, feedback): +def datarequest_feedback_write(ctx: rule.Context, request_id: str, feedback: str) -> api.Result: """ Write feedback to researcher to a separate file and grant the researcher read access :param ctx: Combined type of a callback and rei struct @@ -1790,9 +1735,6 @@ def datarequest_feedback_write(ctx, request_id, feedback): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct path to feedback file coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -1811,7 +1753,7 @@ def datarequest_feedback_write(ctx, request_id, feedback): @api.make() -def api_datarequest_feedback_get(ctx, request_id): +def api_datarequest_feedback_get(ctx: rule.Context, request_id: str) -> api.Result: """Get feedback for researcher :param ctx: Combined type of a callback and rei struct @@ -1819,9 +1761,6 @@ def api_datarequest_feedback_get(ctx, request_id): :returns: JSON-formatted string containing feedback for researcher """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PRELIMINARY_REJECT, status.PRELIMINARY_RESUBMIT, @@ -1841,7 +1780,7 @@ def api_datarequest_feedback_get(ctx, request_id): @api.make() -def api_datarequest_preregistration_submit(ctx, data, request_id): +def api_datarequest_preregistration_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a preregistration to disk. :param ctx: Combined type of a callback and rei struct @@ -1850,9 +1789,6 @@ def api_datarequest_preregistration_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, PREREGISTRATION): return api.Error("validation_fail", @@ -1875,7 +1811,7 @@ def api_datarequest_preregistration_submit(ctx, data, request_id): @api.make() -def api_datarequest_preregistration_get(ctx, request_id): +def api_datarequest_preregistration_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preregistration. :param ctx: Combined type of a callback and rei struct @@ -1883,16 +1819,13 @@ def api_datarequest_preregistration_get(ctx, request_id): :returns: Preregistration JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], None) return datarequest_preregistration_get(ctx, request_id) -def datarequest_preregistration_get(ctx, request_id): +def datarequest_preregistration_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preregistration. :param ctx: Combined type of a callback and rei struct @@ -1900,9 +1833,6 @@ def datarequest_preregistration_get(ctx, request_id): :returns: Preregistration JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = PREREGISTRATION + JSON_EXT @@ -1916,15 +1846,12 @@ def datarequest_preregistration_get(ctx, request_id): @api.make() -def api_datarequest_preregistration_confirm(ctx, request_id): +def api_datarequest_preregistration_confirm(ctx: rule.Context, request_id: str) -> api.Result: """Set the status of a submitted datarequest to PREREGISTRATION_CONFIRMED. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], [status.PREREGISTRATION_SUBMITTED]) @@ -1932,18 +1859,15 @@ def api_datarequest_preregistration_confirm(ctx, request_id): @api.make() -def api_datarequest_dta_upload_permission(ctx, request_id, action): +def api_datarequest_dta_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("revoke") - :returns: Nothing + :returns: API result """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.APPROVED, status.DAO_APPROVED]) @@ -1958,16 +1882,15 @@ def api_datarequest_dta_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_dta_post_upload_actions(ctx, request_id, filename): +def api_datarequest_dta_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the DTA to the owner of the associated data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of DTA - """ - # Force conversion of request_id to string - request_id = str(request_id) + :returns: API result + """ # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.APPROVED, status.DAO_APPROVED]) @@ -1981,25 +1904,22 @@ def api_datarequest_dta_post_upload_actions(ctx, request_id, filename): # Set status to dta_ready status_set(ctx, request_id, status.DTA_READY) + return api.OK() @api.make() -def api_datarequest_dta_path_get(ctx, request_id): +def api_datarequest_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: return datarequest_dta_path_get(ctx, request_id) -def datarequest_dta_path_get(ctx, request_id): - - """Get path to DTA +def datarequest_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: + """Get path to DTA. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: Path to DTA + :returns: Path to DTA """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "OWN"], None) @@ -2008,18 +1928,15 @@ def datarequest_dta_path_get(ctx, request_id): @api.make() -def api_datarequest_signed_dta_upload_permission(ctx, request_id, action): +def api_datarequest_signed_dta_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("revoke") - :returns: Nothing + :returns: API result """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.DTA_READY]) @@ -2033,16 +1950,15 @@ def api_datarequest_signed_dta_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_signed_dta_post_upload_actions(ctx, request_id, filename): +def api_datarequest_signed_dta_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the signed DTA to the datamanagers group. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of signed DTA - """ - # Force conversion of request_id to string - request_id = str(request_id) + :return: API result + """ # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.DTA_READY]) @@ -2055,10 +1971,11 @@ def api_datarequest_signed_dta_post_upload_actions(ctx, request_id, filename): # Set status to dta_signed status_set(ctx, request_id, status.DTA_SIGNED) + return api.OK() @api.make() -def api_datarequest_signed_dta_path_get(ctx, request_id): +def api_datarequest_signed_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: """Get path to signed DTA :param ctx: Combined type of a callback and rei struct @@ -2066,9 +1983,6 @@ def api_datarequest_signed_dta_path_get(ctx, request_id): :returns: Path to signed DTA """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "OWN"], None) @@ -2077,15 +1991,12 @@ def api_datarequest_signed_dta_path_get(ctx, request_id): @api.make() -def api_datarequest_data_ready(ctx, request_id): +def api_datarequest_data_ready(ctx: rule.Context, request_id: str) -> api.Result: """Set the status of a submitted datarequest to DATA_READY. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.DTA_SIGNED]) @@ -2096,14 +2007,14 @@ def api_datarequest_data_ready(ctx, request_id): # Email logic # ################################################### -def truncated_title_get(ctx, request_id): +def truncated_title_get(ctx: rule.Context, request_id: str) -> str: datarequest = json.loads(datarequest_get(ctx, request_id)) study_title = datarequest['datarequest']['study_information']['title'] return study_title if len(study_title) < 16 else study_title[0:15] + "..." -def send_emails(ctx, obj_name, status_to): +def send_emails(ctx: rule.Context, obj_name: str, status_to: str) -> None: # Get request ID temp, _ = pathutil.chop(obj_name) _, request_id = pathutil.chop(temp) @@ -2160,7 +2071,7 @@ def send_emails(ctx, obj_name, status_to): data_ready_emails(ctx, request_id) -def datarequest_submit_emails(ctx, request_id, dao=False): +def datarequest_submit_emails(ctx: rule.Context, request_id: str, dao: bool = False) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2190,7 +2101,7 @@ def datarequest_submit_emails(ctx, request_id, dao=False): researcher['department'], timestamp, study_title) -def preliminary_review_emails(ctx, request_id, datarequest_status): +def preliminary_review_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datamanager_members = group.members(ctx, GROUP_DM) truncated_title = truncated_title_get(ctx, request_id) @@ -2222,7 +2133,7 @@ def preliminary_review_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def datamanager_review_emails(ctx, request_id, datarequest_status): +def datamanager_review_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters pm_members = group.members(ctx, GROUP_PM) datamanager_review = json.loads(datarequest_datamanager_review_get(ctx, request_id)) @@ -2242,7 +2153,7 @@ def datamanager_review_emails(ctx, request_id, datarequest_status): request_id) -def assignment_emails(ctx, request_id, datarequest_status): +def assignment_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2275,7 +2186,7 @@ def assignment_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def review_emails(ctx, request_id): +def review_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2292,7 +2203,7 @@ def review_emails(ctx, request_id): mail_review_pm(ctx, truncated_title, pm_email, request_id) -def evaluation_emails(ctx, request_id, datarequest_status): +def evaluation_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2315,7 +2226,7 @@ def evaluation_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def preregistration_submit_emails(ctx, request_id): +def preregistration_submit_emails(ctx: rule.Context, request_id: str) -> None: # Get parameters truncated_title = truncated_title_get(ctx, request_id) @@ -2324,7 +2235,7 @@ def preregistration_submit_emails(ctx, request_id): mail_preregistration_submit(ctx, truncated_title, pm_email, request_id) -def datarequest_approved_emails(ctx, request_id, dao=False): +def datarequest_approved_emails(ctx: rule.Context, request_id: str, dao: bool = False) -> None: # Get parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2347,7 +2258,7 @@ def datarequest_approved_emails(ctx, request_id, dao=False): request_id) -def dta_post_upload_actions_emails(ctx, request_id): +def dta_post_upload_actions_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2362,7 +2273,7 @@ def dta_post_upload_actions_emails(ctx, request_id): mail_dta(ctx, truncated_title, researcher_email, researcher['name'], request_id, cc) -def signed_dta_post_upload_actions_emails(ctx, request_id): +def signed_dta_post_upload_actions_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datamanager_members = group.members(ctx, GROUP_DM) authoring_dm = data_object.owner(ctx, datarequest_dta_path_get(ctx, request_id))[0] @@ -2375,7 +2286,7 @@ def signed_dta_post_upload_actions_emails(ctx, request_id): mail_signed_dta(ctx, truncated_title, authoring_dm, datamanager_email, request_id, cc) -def data_ready_emails(ctx, request_id): +def data_ready_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] diff --git a/deposit.py b/deposit.py index 69161a9f2..0b7f5095f 100644 --- a/deposit.py +++ b/deposit.py @@ -5,6 +5,7 @@ import re from collections import OrderedDict +from typing import Dict, Optional import genquery from genquery import AS_DICT, Query @@ -24,7 +25,7 @@ @api.make() -def api_deposit_copy_data_package(ctx, reference, deposit_group): +def api_deposit_copy_data_package(ctx: rule.Context, reference: str, deposit_group: str) -> api.Result: """Create deposit collection and copies selected datapackage into the newly created deposit :param ctx: Combined type of a callback and rei struct @@ -82,7 +83,7 @@ def api_deposit_copy_data_package(ctx, reference, deposit_group): @api.make() -def api_deposit_create(ctx, deposit_group): +def api_deposit_create(ctx: rule.Context, deposit_group: str) -> api.Result: """Create deposit collection through API :param ctx: Combined type of a callback and rei struct @@ -98,7 +99,7 @@ def api_deposit_create(ctx, deposit_group): return {"deposit_path": result["deposit_path"]} -def deposit_create(ctx, deposit_group): +def deposit_create(ctx: rule.Context, deposit_group: Optional[str]) -> Dict: """Create deposit collection. :param ctx: Combined type of a callback and rei struct @@ -139,7 +140,7 @@ def deposit_create(ctx, deposit_group): @api.make() -def api_deposit_status(ctx, path): +def api_deposit_status(ctx: rule.Context, path: str) -> api.Result: """Retrieve status of deposit. :param ctx: Combined type of a callback and rei struct @@ -174,7 +175,7 @@ def api_deposit_status(ctx, path): @api.make() -def api_deposit_submit(ctx, path): +def api_deposit_submit(ctx: rule.Context, path: str) -> api.Result: """Submit deposit collection. :param ctx: Combined type of a callback and rei struct @@ -195,12 +196,12 @@ def api_deposit_submit(ctx, path): @api.make() -def api_deposit_overview(ctx, - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_deposit_overview(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. This function browses a folder and only looks at the collections in it. No dataobjects. @@ -215,7 +216,7 @@ def api_deposit_overview(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} diff --git a/epic.py b/epic.py index 335c9288b..209fb799a 100644 --- a/epic.py +++ b/epic.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import uuid +from typing import Dict import publication from util import * @@ -11,7 +12,7 @@ __all__ = ['rule_generate_uuid'] -def generate_uuid(ctx): +def generate_uuid(ctx: rule.Context) -> str: """Generate random ID for DOI.""" randomuuid = str(uuid.uuid4()) return randomuuid.upper() @@ -20,7 +21,7 @@ def generate_uuid(ctx): rule_generate_uuid = rule.make(inputs=[], outputs=[0])(generate_uuid) -def register_epic_pid(ctx, target): +def register_epic_pid(ctx: rule.Context, target: str) -> Dict: """Create and try to register an EPIC PID. :param ctx: Combined type of a callback and rei struct @@ -43,7 +44,7 @@ def register_epic_pid(ctx, target): 'httpCode': ret['arguments'][2]} -def save_epic_pid(ctx, target, url, pid): +def save_epic_pid(ctx: rule.Context, target: str, url: str, pid: str) -> None: """Save persistent EPIC ID. :param ctx: Combined type of a callback and rei struct diff --git a/folder.py b/folder.py index bec8e82d5..506baa3d5 100644 --- a/folder.py +++ b/folder.py @@ -5,6 +5,7 @@ import time import uuid +from typing import List, Optional, Tuple import genquery import irods_types @@ -28,7 +29,7 @@ 'rule_folder_secure'] -def set_status(ctx, coll, status): +def set_status(ctx: rule.Context, coll: str, status: constants.research_package_state) -> api.Result: """Change a folder's status. Status changes are validated by policy (AVU modify preproc). @@ -68,7 +69,7 @@ def set_status(ctx, coll, status): return api.Result.ok() -def set_status_as_datamanager(ctx, coll, status): +def set_status_as_datamanager(ctx: rule.Context, coll: str, status: constants.research_package_state) -> api.Result: """Change a folder's status as a datamanager. :param ctx: Combined type of a callback and rei struct @@ -87,7 +88,7 @@ def set_status_as_datamanager(ctx, coll, status): @api.make() -def api_folder_lock(ctx, coll): +def api_folder_lock(ctx: rule.Context, coll: str) -> api.Result: """Lock a folder. :param ctx: Combined type of a callback and rei struct @@ -99,7 +100,7 @@ def api_folder_lock(ctx, coll): @api.make() -def api_folder_unlock(ctx, coll): +def api_folder_unlock(ctx: rule.Context, coll: str) -> api.Result: """Unlock a folder. Unlocking is implemented by clearing the folder status. Since this action @@ -119,7 +120,7 @@ def api_folder_unlock(ctx, coll): @api.make() -def api_folder_submit(ctx, coll): +def api_folder_submit(ctx: rule.Context, coll: str) -> api.Result: """Submit a folder. :param ctx: Combined type of a callback and rei struct @@ -131,7 +132,7 @@ def api_folder_submit(ctx, coll): @api.make() -def api_folder_unsubmit(ctx, coll): +def api_folder_unsubmit(ctx: rule.Context, coll: str) -> api.Result: """Unsubmit a folder. :param ctx: Combined type of a callback and rei struct @@ -147,7 +148,7 @@ def api_folder_unsubmit(ctx, coll): @api.make() -def api_folder_accept(ctx, coll): +def api_folder_accept(ctx: rule.Context, coll: str) -> api.Result: """Accept a folder. :param ctx: Combined type of a callback and rei struct @@ -159,7 +160,7 @@ def api_folder_accept(ctx, coll): @api.make() -def api_folder_reject(ctx, coll): +def api_folder_reject(ctx: rule.Context, coll: str) -> api.Result: """Reject a folder. :param ctx: Combined type of a callback and rei struct @@ -171,7 +172,7 @@ def api_folder_reject(ctx, coll): @rule.make(inputs=[0], outputs=[1]) -def rule_folder_secure(ctx, coll): +def rule_folder_secure(ctx: rule.Context, coll: str) -> str: """Rule interface for processing vault status transition request. :param ctx: Combined type of a callback and rei struct :param coll: Collection to be copied to vault @@ -188,7 +189,7 @@ def rule_folder_secure(ctx, coll): return '1' -def precheck_folder_secure(ctx, coll): +def precheck_folder_secure(ctx: rule.Context, coll: str) -> bool: """Whether to continue with securing. Should not touch the retry attempts, these are prechecks and don't count toward the retry attempts limit @@ -210,7 +211,7 @@ def precheck_folder_secure(ctx, coll): return True -def folder_secure(ctx, coll): +def folder_secure(ctx: rule.Context, coll: str) -> bool: """Secure a folder to the vault. If the previous copy did not finish, retry This function should only be called by a rodsadmin @@ -292,7 +293,7 @@ def folder_secure(ctx, coll): return True -def check_folder_secure(ctx, coll): +def check_folder_secure(ctx: rule.Context, coll: str) -> bool: """Some initial set up that determines whether folder secure can continue. These WILL affect the retry attempts. @@ -309,7 +310,7 @@ def check_folder_secure(ctx, coll): return True -def correct_copytovault_start_status(ctx, coll): +def correct_copytovault_start_status(ctx: rule.Context, coll: str) -> bool: """Confirm that the copytovault cronjob avu status is correct state to start securing""" cronjob_status = get_cronjob_status(ctx, coll) if cronjob_status in (constants.CRONJOB_STATE['PENDING'], constants.CRONJOB_STATE['RETRY']): @@ -318,7 +319,7 @@ def correct_copytovault_start_status(ctx, coll): return False -def correct_copytovault_start_location(coll): +def correct_copytovault_start_location(coll: str) -> bool: """Confirm that the folder to be copied is in the correct location. For example: in a research or deposit folder and not in the trash. @@ -330,7 +331,7 @@ def correct_copytovault_start_location(coll): return space in (pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT) -def get_last_run_time(ctx, coll): +def get_last_run_time(ctx: rule.Context, coll: str) -> Tuple[bool, int]: """Get the last run time, if found""" found = False last_run = 1 @@ -346,13 +347,13 @@ def get_last_run_time(ctx, coll): return found, last_run -def set_last_run_time(ctx, coll): +def set_last_run_time(ctx: rule.Context, coll: str) -> bool: """Set last run time, return True for successful set""" now = int(time.time()) return avu.set_on_coll(ctx, coll, constants.IICOPYLASTRUN, str(now), True) -def set_can_modify(ctx, coll): +def set_can_modify(ctx: rule.Context, coll: str) -> bool: """Check if have permission to modify, set if necessary""" check_access_result = msi.check_access(ctx, coll, 'modify_object', irods_types.BytesBuf()) modify_access = check_access_result['arguments'][2] @@ -367,7 +368,7 @@ def set_can_modify(ctx, coll): return True -def get_retry_count(ctx, coll): +def get_retry_count(ctx: rule.Context, coll: str) -> int: """ Get the retry count, if not such AVU, return 0 """ retry_count = 0 iter = genquery.row_iterator( @@ -381,7 +382,7 @@ def get_retry_count(ctx, coll): return retry_count -def retry_attempts(ctx, coll): +def retry_attempts(ctx: rule.Context, coll: str) -> bool: """ Check if there have been too many retries. """ retry_count = get_retry_count(ctx, coll) @@ -391,7 +392,7 @@ def retry_attempts(ctx, coll): return True -def folder_secure_succeed_avus(ctx, coll, group_name): +def folder_secure_succeed_avus(ctx: rule.Context, coll: str, group_name: str) -> bool: """Set/rm AVUs on source folder when successfully secured folder""" attributes = [x[0] for x in get_org_metadata(ctx, coll)] @@ -425,7 +426,7 @@ def folder_secure_succeed_avus(ctx, coll, group_name): return True -def folder_secure_set_retry(ctx, coll): +def folder_secure_set_retry(ctx: rule.Context, coll: str) -> None: # When a folder secure fails, try to set the retry AVU and other applicable AVUs on source folder. # If too many attempts, fail. new_retry_count = get_retry_count(ctx, coll) + 1 @@ -436,12 +437,12 @@ def folder_secure_set_retry(ctx, coll): send_folder_secure_notification(ctx, coll, "Failed to set retry state on data package") -def folder_secure_set_retry_avus(ctx, coll, retry_count): +def folder_secure_set_retry_avus(ctx: rule.Context, coll: str, retry_count: int) -> bool: avu.set_on_coll(ctx, coll, constants.IICOPYRETRYCOUNT, str(retry_count), True) return set_cronjob_status(ctx, constants.CRONJOB_STATE['RETRY'], coll) -def folder_secure_fail(ctx, coll): +def folder_secure_fail(ctx: rule.Context, coll: str) -> None: """When there are too many retries, give up, set the AVUs and send notifications""" # Errors are caught here in hopes that will still be able to set UNRECOVERABLE status at least avu.rmw_from_coll(ctx, coll, constants.IICOPYRETRYCOUNT, "%", True) @@ -450,7 +451,7 @@ def folder_secure_fail(ctx, coll): set_cronjob_status(ctx, constants.CRONJOB_STATE['UNRECOVERABLE'], coll) -def send_folder_secure_notification(ctx, coll, message): +def send_folder_secure_notification(ctx: rule.Context, coll: str, message: str) -> None: """Send notification about folder secure to relevant datamanagers""" if datamanager_exists(ctx, coll): datamanagers = get_datamanagers(ctx, coll) @@ -459,7 +460,7 @@ def send_folder_secure_notification(ctx, coll, message): notifications.set(ctx, "system", datamanager, coll, message) -def set_epic_pid(ctx, target): +def set_epic_pid(ctx: rule.Context, target: str) -> bool: """Try to set epic pid, if fails return False""" if config.epic_pid_enabled: ret = epic.register_epic_pid(ctx, target) @@ -479,7 +480,7 @@ def set_epic_pid(ctx, target): return True -def get_cronjob_status(ctx, coll): +def get_cronjob_status(ctx: rule.Context, coll: str) -> Optional[str]: """Get the cronjob status of given collection""" iter = genquery.row_iterator( "META_COLL_ATTR_VALUE", @@ -489,8 +490,10 @@ def get_cronjob_status(ctx, coll): for row in iter: return row[0] + return None -def rm_cronjob_status(ctx, coll): + +def rm_cronjob_status(ctx: rule.Context, coll: str) -> bool: """Remove cronjob_copy_to_vault attribute on source collection :param ctx: Combined type of a callback and rei struct @@ -501,7 +504,7 @@ def rm_cronjob_status(ctx, coll): return avu.rmw_from_coll(ctx, coll, constants.UUORGMETADATAPREFIX + "cronjob_copy_to_vault", "%", True) -def set_cronjob_status(ctx, status, coll): +def set_cronjob_status(ctx: rule.Context, status: str, coll: str) -> bool: """Set cronjob_copy_to_vault attribute on source collection :param ctx: Combined type of a callback and rei struct @@ -513,7 +516,7 @@ def set_cronjob_status(ctx, status, coll): return avu.set_on_coll(ctx, coll, constants.UUORGMETADATAPREFIX + "cronjob_copy_to_vault", status, True) -def set_acl_parents(ctx, acl_recurse, acl_type, coll): +def set_acl_parents(ctx: rule.Context, acl_recurse: str, acl_type: str, coll: str) -> None: """Set ACL for parent collections""" parent, _ = pathutil.chop(coll) while parent != "/" + user.zone(ctx) + "/home": @@ -521,7 +524,7 @@ def set_acl_parents(ctx, acl_recurse, acl_type, coll): parent, _ = pathutil.chop(parent) -def set_acl_check(ctx, acl_recurse, acl_type, coll, error_msg=''): +def set_acl_check(ctx: rule.Context, acl_recurse: str, acl_type: str, coll: str, error_msg: str = '') -> bool: """Set the ACL if possible, log error_msg if it goes wrong""" # TODO turn acl_recurse into a boolean try: @@ -534,7 +537,7 @@ def set_acl_check(ctx, acl_recurse, acl_type, coll, error_msg=''): return True -def get_existing_vault_target(ctx, coll): +def get_existing_vault_target(ctx: rule.Context, coll: str) -> Tuple[bool, str]: """Determine vault target on coll, if it was already determined before """ found = False target = "" @@ -550,7 +553,7 @@ def get_existing_vault_target(ctx, coll): return found, target -def set_vault_target(ctx, coll, target): +def set_vault_target(ctx: rule.Context, coll: str, target: str) -> bool: """Create vault target and AVUs""" msi.coll_create(ctx, target, '', irods_types.BytesBuf()) if not avu.set_on_coll(ctx, target, constants.IIVAULTSTATUSATTRNAME, constants.vault_package_state.INCOMPLETE, True): @@ -563,7 +566,7 @@ def set_vault_target(ctx, coll, target): return True -def determine_and_set_vault_target(ctx, coll): +def determine_and_set_vault_target(ctx: rule.Context, coll: str) -> str: """Determine and set target on coll""" found, target = get_existing_vault_target(ctx, coll) @@ -581,7 +584,7 @@ def determine_and_set_vault_target(ctx, coll): return target -def determine_new_vault_target(ctx, folder): +def determine_new_vault_target(ctx: rule.Context, folder: str) -> str: """Determine vault target path for a folder.""" group = collection_group_name(ctx, folder) @@ -614,7 +617,7 @@ def determine_new_vault_target(ctx, folder): return target -def collection_group_name(callback, coll): +def collection_group_name(ctx: rule.Context, coll: str) -> str: """Return the name of the group a collection belongs to.""" if pathutil.info(coll).space is pathutil.Space.DEPOSIT: @@ -624,7 +627,7 @@ def collection_group_name(callback, coll): iter = genquery.row_iterator( "COLL_ACCESS_USER_ID", "COLL_NAME = '{}'".format(coll), - genquery.AS_LIST, callback + genquery.AS_LIST, ctx ) for row in iter: @@ -634,7 +637,7 @@ def collection_group_name(callback, coll): iter2 = genquery.row_iterator( "USER_GROUP_NAME", "USER_GROUP_ID = '{}'".format(id), - genquery.AS_LIST, callback + genquery.AS_LIST, ctx ) for row2 in iter2: @@ -654,14 +657,14 @@ def collection_group_name(callback, coll): return group_name # No results found. Not a group folder - log.write(callback, "{} does not belong to a research or intake group or is not available to current user.".format(coll)) + log.write(ctx, "{} does not belong to a research or intake group or is not available to current user.".format(coll)) return "" rule_collection_group_name = rule.make(inputs=[0], outputs=[1])(collection_group_name) -def get_org_metadata(ctx, path, object_type=pathutil.ObjectType.COLL): +def get_org_metadata(ctx: rule.Context, path: str, object_type: pathutil.ObjectType = pathutil.ObjectType.COLL) -> List[Tuple[str, str]]: """Obtain a (k,v) list of all organisation metadata on a given collection or data object.""" typ = 'DATA' if object_type is pathutil.ObjectType.DATA else 'COLL' @@ -673,7 +676,7 @@ def get_org_metadata(ctx, path, object_type=pathutil.ObjectType.COLL): else " AND COLL_NAME = '{}'".format(path)))] -def get_locks(ctx, path, org_metadata=None, object_type=pathutil.ObjectType.COLL): +def get_locks(ctx: rule.Context, path: str, org_metadata: Optional[List[Tuple[str, str]]] = None, object_type: pathutil.ObjectType = pathutil.ObjectType.COLL) -> List[str]: """Return all locks on a collection or data object (includes locks on parents and children).""" if org_metadata is None: org_metadata = get_org_metadata(ctx, path, object_type=object_type) @@ -684,7 +687,7 @@ def get_locks(ctx, path, org_metadata=None, object_type=pathutil.ObjectType.COLL @api.make() -def api_folder_get_locks(ctx, coll): +def api_folder_get_locks(ctx: rule.Context, coll: str) -> api.Result: """Return a list of locks on a collection.""" locks = [] @@ -697,12 +700,12 @@ def api_folder_get_locks(ctx, coll): return locks -def has_locks(ctx, coll, org_metadata=None): +def has_locks(ctx: rule.Context, coll: str, org_metadata: Optional[List[Tuple[str, str]]] = None) -> bool: """Check whether a lock exists on the given collection, its parents or children.""" return len(get_locks(ctx, coll, org_metadata=org_metadata)) > 0 -def is_locked(ctx, coll, org_metadata=None): +def is_locked(ctx: rule.Context, coll: str, org_metadata: Optional[List[Tuple[str, str]]] = None) -> bool: """Check whether a lock exists on the given collection itself or a parent collection. Locks on subcollections are not counted. @@ -719,22 +722,22 @@ def is_locked(ctx, coll, org_metadata=None): return len([x for x in locks if coll.startswith(x)]) > 0 -def is_data_locked(ctx, path, org_metadata=None): +def is_data_locked(ctx: rule.Context, path: str, org_metadata: Optional[List[Tuple[str, str]]] = None) -> bool: """Check whether a lock exists on the given data object.""" locks = get_locks(ctx, path, org_metadata=org_metadata, object_type=pathutil.ObjectType.DATA) return len(locks) > 0 -def get_status(ctx, path, org_metadata=None): +def get_status(ctx: rule.Context, path: str, org_metadata: Optional[List[Tuple[str, str]]] = None) -> constants.research_package_state: """Get the status of a research folder.""" if org_metadata is None: org_metadata = get_org_metadata(ctx, path) # Don't care about duplicate attr names here. - org_metadata = dict(org_metadata) - if constants.IISTATUSATTRNAME in org_metadata: - x = org_metadata[constants.IISTATUSATTRNAME] + org_metadata_dict = dict(org_metadata) + if constants.IISTATUSATTRNAME in org_metadata_dict: + x = org_metadata_dict[constants.IISTATUSATTRNAME] try: x = "" if x == "FOLDER" else x return constants.research_package_state(x) @@ -744,7 +747,7 @@ def get_status(ctx, path, org_metadata=None): return constants.research_package_state.FOLDER -def datamanager_exists(ctx, coll): +def datamanager_exists(ctx: rule.Context, coll: str) -> bool: """Check if a datamanager exists for a given collection.""" group_name = collection_group_name(ctx, coll) category = group.get_category(ctx, group_name) @@ -752,7 +755,7 @@ def datamanager_exists(ctx, coll): return group.exists(ctx, "datamanager-" + category) -def get_datamanagers(ctx, coll): +def get_datamanagers(ctx: rule.Context, coll: str) -> List[str]: """Retrieve datamanagers for a given collection.""" group_name = collection_group_name(ctx, coll) category = group.get_category(ctx, group_name) @@ -760,13 +763,13 @@ def get_datamanagers(ctx, coll): return group.members(ctx, "datamanager-" + category) -def set_submitter(ctx, path, actor): +def set_submitter(ctx: rule.Context, path: str, actor: str) -> None: """Set submitter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "submitted_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_submitter(ctx, path): +def get_submitter(ctx: rule.Context, path: str) -> str: """Get submitter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "submitted_actor" org_metadata = dict(get_org_metadata(ctx, path)) @@ -774,16 +777,16 @@ def get_submitter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_accepter(ctx, path, actor): +def set_accepter(ctx: rule.Context, path: str, actor: str) -> None: """Set accepter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "accepted_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_accepter(ctx, path): +def get_accepter(ctx: rule.Context, path: str) -> str: """Get accepter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "accepted_actor" org_metadata = dict(get_org_metadata(ctx, path)) @@ -791,16 +794,16 @@ def get_accepter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_vault_data_package(ctx, path, vault): +def set_vault_data_package(ctx: rule.Context, path: str, vault: str) -> None: """Set vault data package for deposit.""" attribute = constants.UUORGMETADATAPREFIX + "vault_data_package" avu.set_on_coll(ctx, path, attribute, vault) -def get_vault_data_package(ctx, path): +def get_vault_data_package(ctx: rule.Context, path: str) -> str: """Get vault data package for deposit.""" attribute = constants.UUORGMETADATAPREFIX + "vault_data_package" org_metadata = dict(get_org_metadata(ctx, path)) @@ -808,4 +811,4 @@ def get_vault_data_package(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" diff --git a/groups.py b/groups.py index ac995813a..094fe7131 100644 --- a/groups.py +++ b/groups.py @@ -6,6 +6,7 @@ import time from collections import OrderedDict from datetime import datetime +from typing import Any, Dict, Iterable, List, Optional, Tuple import genquery import requests @@ -40,7 +41,7 @@ 'rule_group_sram_sync'] -def getGroupsData(ctx): +def getGroupsData(ctx: rule.Context) -> Iterable[Any]: """Return groups and related data.""" groups = {} @@ -117,7 +118,7 @@ def getGroupsData(ctx): return groups.values() -def getGroupData(ctx, name): +def getGroupData(ctx: rule.Context, name: str) -> Optional[Dict]: """Get data for one group.""" group = None @@ -192,7 +193,7 @@ def getGroupData(ctx, name): return group -def getCategories(ctx): +def getCategories(ctx: rule.Context) -> List[str]: """Get a list of all group categories.""" categories = [] @@ -208,7 +209,7 @@ def getCategories(ctx): return categories -def getDatamanagerCategories(ctx): +def getDatamanagerCategories(ctx: rule.Context) -> List: """Get a list of all datamanager group categories.""" categories = [] @@ -229,7 +230,7 @@ def getDatamanagerCategories(ctx): return categories -def getSubcategories(ctx, category): +def getSubcategories(ctx: rule.Context, category: str) -> List: """Get a list of all subcategories within a given group category. :param ctx: Combined type of a ctx and rei struct @@ -272,7 +273,7 @@ def getSubcategories(ctx, category): return list(categories) -def user_role(ctx, username, group_name): +def user_role(ctx: rule.Context, username: str, group_name: str) -> str: """Get role of user in group. :param ctx: Combined type of a ctx and rei struct @@ -300,7 +301,7 @@ def user_role(ctx, username, group_name): api_group_get_user_role = api.make()(user_role) -def user_is_datamanager(ctx, category, user): +def user_is_datamanager(ctx: rule.Context, category: str, user: str) -> bool: """Return if user is datamanager of category. :param ctx: Combined type of a ctx and rei struct @@ -313,7 +314,7 @@ def user_is_datamanager(ctx, category, user): in ('normal', 'manager') -def group_category(ctx, group): +def group_category(ctx: rule.Context, group: str) -> str: """Return category of group. :param ctx: Combined type of a ctx and rei struct @@ -327,7 +328,7 @@ def group_category(ctx, group): @api.make() -def api_group_data(ctx): +def api_group_data(ctx: rule.Context) -> Dict: """Retrieve group data as hierarchy for user. The structure of the group hierarchy parameter is as follows: @@ -355,7 +356,7 @@ def api_group_data(ctx): return (internal_api_group_data(ctx)) -def internal_api_group_data(ctx): +def internal_api_group_data(ctx: rule.Context) -> Dict: # This is the entry point for integration tests against api_group_data if user.is_admin(ctx): groups = getGroupsData(ctx) @@ -460,7 +461,7 @@ def internal_api_group_data(ctx): return {'group_hierarchy': subcat_ordered_group_hierarchy, 'user_type': user.user_type(ctx), 'user_zone': user.zone(ctx)} -def user_is_a_datamanager(ctx): +def user_is_a_datamanager(ctx: rule.Context) -> bool: """Return groups whether current user is datamanager of a group, not specifically of a specific group. :param ctx: Combined type of a ctx and rei struct @@ -485,7 +486,7 @@ def user_is_a_datamanager(ctx): @api.make() -def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): +def api_group_process_csv(ctx: rule.Context, csv_header_and_data: str, allow_update: bool, delete_users: bool) -> api.Result: """Process contents of CSV file containing group definitions. Parsing is stopped immediately when an error is found and the rownumber is returned to the user. @@ -520,7 +521,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): return api.Result.ok(info=[status_msg['message']]) -def validate_data(ctx, data, allow_update): +def validate_data(ctx: rule.Context, data: Dict, allow_update: bool) -> List: """Validation of extracted data. :param ctx: Combined type of a ctx and rei struct @@ -551,7 +552,7 @@ def validate_data(ctx, data, allow_update): return errors -def apply_data(ctx, data, allow_update, delete_users): +def apply_data(ctx: rule.Context, data: Dict, allow_update: bool, delete_users: bool) -> Dict: """ Update groups with the validated data :param ctx: Combined type of a ctx and rei struct @@ -581,7 +582,7 @@ def apply_data(ctx, data, allow_update, delete_users): log.write(ctx, 'CSV import - WARNING: group "{}" not created, it already exists'.format(group_name)) message += "Group '{}' already exists.".format(group_name) else: - return {status: 'error', message: "Error while attempting to create group {}. Status/message: {} / {}".format(group_name, response.status, response.status_info)} + return {"status": "error", "message": "Error while attempting to create group {}. Status/message: {} / {}".format(group_name, response.status, response.status_info)} # Now add the users and set their role if other than member allusers = managers + members + viewers @@ -675,7 +676,7 @@ def apply_data(ctx, data, allow_update, delete_users): return {"status": "ok", "message": message} -def _are_roles_equivalent(a, b): +def _are_roles_equivalent(a: str, b: str) -> bool: """Checks whether two roles are equivalent, Yoda and Yoda-clienttools use slightly different names.""" r_role_names = ["viewer", "reader"] m_role_names = ["member", "normal"] @@ -690,7 +691,7 @@ def _are_roles_equivalent(a, b): return False -def group_user_exists(ctx, group_name, username, include_readonly): +def group_user_exists(ctx: rule.Context, group_name: str, username: str, include_readonly: bool) -> bool: group = getGroupData(ctx, group_name) if '#' not in username: username = username + "#" + session_vars.get_map(ctx.rei)["client_user"]["irods_zone"] @@ -705,7 +706,7 @@ def group_user_exists(ctx, group_name, username, include_readonly): @rule.make(inputs=[0], outputs=[1]) -def rule_user_exists(ctx, username): +def rule_user_exists(ctx: rule.Context, username: str) -> str: """Rule wrapper to check if a user exists. :param ctx: Combined type of a ctx and rei struct @@ -716,7 +717,8 @@ def rule_user_exists(ctx, username): return "true" if user.exists(ctx, username) else "false" -def rule_group_user_exists(rule_args, callback, rei): +@rule.make(inputs=[0, 1, 2], outputs=[3]) +def rule_group_user_exists(ctx: rule.Context, group_name: str, user_name: str, include_readonly: bool) -> str: """Check if a user is a member of the given group. rule_group_user_exists(group, user, includeRo, membership) @@ -724,25 +726,25 @@ def rule_group_user_exists(rule_args, callback, rei): considered as well. Otherwise, the user must be a normal member or manager of the given group. - :param rule_args: [0] Group to check for user membership - [1] User to check for membership - [2] Include read-only shadow group users - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a ctx and rei struct + :param group_name: Group to check for user membership + :param user_name: User to check for membership + :param include_readonly: Include read-only shadow group users + + :returns: Indicator if user is a member of the given group. """ - ctx = rule.Context(callback, rei) - exists = group_user_exists(ctx, rule_args[0], rule_args[1], rule_args[2]) - rule_args[3] = "true" if exists else "false" + exists = group_user_exists(ctx, group_name, user_name, include_readonly) + return "true" if exists else "false" @api.make() -def api_group_categories(ctx): +def api_group_categories(ctx: rule.Context) -> api.Result: """Retrieve category list.""" return getCategories(ctx) @api.make() -def api_group_subcategories(ctx, category): +def api_group_subcategories(ctx: rule.Context, category: str) -> api.Result: """Retrieve subcategory list. :param ctx: Combined type of a ctx and rei struct @@ -753,7 +755,7 @@ def api_group_subcategories(ctx, category): return getSubcategories(ctx, category) -def provisionExternalUser(ctx, username, creatorUser, creatorZone): +def provisionExternalUser(ctx: rule.Context, username: str, creatorUser: str, creatorZone: str) -> int: """Call External User Service API to add new user. :param ctx: Combined type of a ctx and rei struct @@ -824,7 +826,7 @@ def rule_group_provision_external_user(rule_args, ctx, rei): rule_args[4] = message -def removeExternalUser(ctx, username, userzone): +def removeExternalUser(ctx: rule.Context, username: str, userzone: str) -> str: """Call External User Service API to remove user. :param ctx: Combined type of a ctx and rei struct @@ -854,7 +856,7 @@ def removeExternalUser(ctx, username, userzone): @rule.make(inputs=[0, 1], outputs=[]) -def rule_group_remove_external_user(ctx, username, userzone): +def rule_group_remove_external_user(ctx: rule.Context, username: str, userzone: str) -> str: """Remove external user from EUS :param ctx: Combined type of a ctx and rei struct @@ -878,7 +880,7 @@ def rule_group_remove_external_user(ctx, username, userzone): @rule.make(inputs=[0], outputs=[1]) -def rule_group_check_external_user(ctx, username): +def rule_group_check_external_user(ctx: rule.Context, username: str) -> str: """Check that a user is external. :param ctx: Combined type of a ctx and rei struct @@ -896,7 +898,7 @@ def rule_group_check_external_user(ctx, username): @rule.make(inputs=[0], outputs=[1]) -def rule_group_expiration_date_validate(ctx, expiration_date): +def rule_group_expiration_date_validate(ctx: rule.Context, expiration_date: str) -> str: """Validation of expiration date. :param ctx: Combined type of a callback and rei struct @@ -920,7 +922,7 @@ def rule_group_expiration_date_validate(ctx, expiration_date): @api.make() -def api_group_search_users(ctx, pattern): +def api_group_search_users(ctx: rule.Context, pattern: str) -> api.Result: (username, zone_name) = user.from_str(ctx, pattern) userList = list() @@ -942,7 +944,7 @@ def api_group_search_users(ctx, pattern): @api.make() -def api_group_exists(ctx, group_name): +def api_group_exists(ctx: rule.Context, group_name: str) -> api.Result: """Check if group exists. :param ctx: Combined type of a ctx and rei struct @@ -953,7 +955,14 @@ def api_group_exists(ctx, group_name): return group.exists(ctx, group_name) -def group_create(ctx, group_name, category, subcategory, schema_id, expiration_date, description, data_classification): +def group_create(ctx: rule.Context, + group_name: str, + category: str, + subcategory: str, + schema_id: str, + expiration_date: str, + description: str, + data_classification: str) -> api.Result: """Create a new group. :param ctx: Combined type of a ctx and rei struct @@ -965,7 +974,7 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d :param description: Description of the group to create :param data_classification: Data classification of the group to create - :returns: Dict with API status result + :returns: API status result """ try: co_identifier = '' @@ -1005,7 +1014,7 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d @api.make() -def api_group_update(ctx, group_name, property_name, property_value): +def api_group_update(ctx: rule.Context, group_name: str, property_name: str, property_value: str) -> api.Result: """Update group property. :param ctx: Combined type of a ctx and rei struct @@ -1013,7 +1022,7 @@ def api_group_update(ctx, group_name, property_name, property_value): :param property_name: Name of the property to update :param property_value: Value of the property to update - :returns: Dict with API status result + :returns: API status result """ try: response = ctx.uuGroupModify(group_name, property_name, property_value, '', '')['arguments'] @@ -1028,13 +1037,13 @@ def api_group_update(ctx, group_name, property_name, property_value): @api.make() -def api_group_delete(ctx, group_name): +def api_group_delete(ctx: rule.Context, group_name: str) -> api.Result: """Delete a group. :param ctx: Combined type of a ctx and rei struct :param group_name: Name of the group to delete - :returns: Dict with API status result + :returns: API status result """ try: # Delete SRAM collaboration if group is a SRAM group. @@ -1057,7 +1066,7 @@ def api_group_delete(ctx, group_name): @api.make() -def api_group_get_description(ctx, group_name): +def api_group_get_description(ctx: rule.Context, group_name: str) -> api.Result: """Retrieve description of a group. :param ctx: Combined type of a ctx and rei struct @@ -1072,7 +1081,7 @@ def api_group_get_description(ctx, group_name): @api.make() -def api_group_user_is_member(ctx, username, group_name): +def api_group_user_is_member(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Check if user is member of a group. :param ctx: Combined type of a ctx and rei struct @@ -1084,7 +1093,7 @@ def api_group_user_is_member(ctx, username, group_name): return group_user_exists(ctx, group_name, username, True) -def group_user_add(ctx, username, group_name): +def group_user_add(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Add a user to a group. :param ctx: Combined type of a ctx and rei struct @@ -1127,7 +1136,7 @@ def group_user_add(ctx, username, group_name): api_group_user_add = api.make()(group_user_add) -def group_user_update_role(ctx, username, group_name, new_role): +def group_user_update_role(ctx: rule.Context, username: str, group_name: str, new_role: str) -> api.Result: """Update role of a user in a group. :param ctx: Combined type of a ctx and rei struct @@ -1135,7 +1144,7 @@ def group_user_update_role(ctx, username, group_name, new_role): :param group_name: Name of the group :param new_role: New role of the user - :returns: Dict with API status result + :returns: API status result """ try: if config.enable_sram: @@ -1164,14 +1173,14 @@ def group_user_update_role(ctx, username, group_name, new_role): api_group_user_update_role = api.make()(group_user_update_role) -def group_remove_user_from_group(ctx, username, group_name): +def group_remove_user_from_group(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Remove a user from a group. :param ctx: Combined type of a ctx and rei struct :param username: Name of the user :param group_name: Name of the group - :returns: Dict with API status result + :returns: API status result """ try: if config.enable_sram: @@ -1200,7 +1209,7 @@ def group_remove_user_from_group(ctx, username, group_name): api_group_remove_user_from_group = api.make()(group_remove_user_from_group) -def sram_enabled(ctx, group_name): +def sram_enabled(ctx: rule.Context, group_name: str) -> Tuple[bool, str]: """Checks if the group is SRAM enabled :param ctx: Combined type of a ctx and rei struct @@ -1226,7 +1235,7 @@ def sram_enabled(ctx, group_name): @rule.make() -def rule_group_sram_sync(ctx): +def rule_group_sram_sync(ctx: rule.Context) -> None: """Synchronize groups with SRAM. :param ctx: Combined type of a ctx and rei struct diff --git a/groups_import.py b/groups_import.py index 1933c95fd..a95cf846a 100644 --- a/groups_import.py +++ b/groups_import.py @@ -3,18 +3,20 @@ __copyright__ = 'Copyright (c) 2018-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, List, Set, Tuple + from iteration_utilities import duplicates, unique_everseen from util import * -def process_csv_line(ctx, line): +def process_csv_line(ctx: 'rule.Context', line: Dict) -> Tuple: """Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers, and optionally schema id and expiration date. - :param ctx: Combined type of a ctx and rei struct - :param line: Dictionary of labels and corresponding lists of values + :param ctx: Combined type of a ctx and rei struct + :param line: Dictionary of labels and corresponding lists of values :returns: Tuple of processed row data (None if error), and error message """ @@ -78,29 +80,29 @@ def process_csv_line(ctx, line): return row_data, None -def column_name_is_role_label(column_name): +def column_name_is_role_label(column_name: str) -> bool: return (column_name.lower() in get_role_labels() or column_name.lower().startswith(tuple(map(lambda s: s + ":", get_role_labels())))) -def get_role_labels(): +def get_role_labels() -> List[str]: return ['viewer', 'member', 'manager'] -def get_csv_possible_labels(): +def get_csv_possible_labels() -> List[str]: return ['category', 'subcategory', 'groupname', 'viewer', 'member', 'manager', 'schema_id', 'expiration_date'] -def get_csv_required_labels(): +def get_csv_required_labels() -> List[str]: return ['category', 'subcategory', 'groupname'] -def get_csv_predefined_labels(): +def get_csv_predefined_labels() -> List[str]: """These labels should not repeat""" return ['category', 'subcategory', 'groupname', 'schema_id', 'expiration_date'] -def get_duplicate_columns(fields_list): +def get_duplicate_columns(fields_list: List) -> Set: fields_seen = set() duplicate_fields = set() @@ -152,12 +154,12 @@ def parse_csv_file(ctx): return extracted_data -def get_duplicate_groups(row_data): +def get_duplicate_groups(row_data: List) -> List: group_names = list(map(lambda r: r[2], row_data)) return list(unique_everseen(duplicates(group_names))) -def parse_data(ctx, csv_header_and_data): +def parse_data(ctx: 'rule.Context', csv_header_and_data: str) -> Tuple: """Process contents of csv data consisting of header and rows of data. :param ctx: Combined type of a ctx and rei struct diff --git a/json_datacite.py b/json_datacite.py index 2ee72f0dc..5f5efc6ca 100644 --- a/json_datacite.py +++ b/json_datacite.py @@ -3,53 +3,21 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, List + from dateutil import parser from util import * -__all__ = ['rule_json_datacite_create_combi_metadata_json', - 'rule_json_datacite_create_datacite_json'] - - -@rule.make() -def rule_json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri): - """Frontend function to add system info to yoda-metadata in json format. - :param ctx: Combined type of a callback and rei struct - :param metadataJsonPath: Path to the most recent vault yoda-metadata.json in the corresponding vault - :param combiJsonPath: Path to where the combined info will be placed so it can be used for DataciteXml & landingpage generation - other are system info parameters - :param lastModifiedDateTime: Last modification time of publication - :param yodaDOI: DOI of publication - :param publicationDate: Date of publication - :param openAccessLink: Open access link to data of publication - :param licenseUri: URI to license of publication - """ - json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri) - - -def json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri): +def json_datacite_create_combi_metadata_json(ctx: rule.Context, + metadataJsonPath: str, + combiJsonPath: str, + lastModifiedDateTime: str, + yodaDOI: str, + publicationDate: str, + openAccessLink: str, + licenseUri: str) -> None: """Frontend function to add system info to yoda-metadata in json format. :param ctx: Combined type of a callback and rei struct @@ -81,12 +49,7 @@ def json_datacite_create_combi_metadata_json(ctx, jsonutil.write(ctx, combiJsonPath, metaDict) -@rule.make(inputs=[0], outputs=[1]) -def rule_json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): - return json_datacite_create_datacite_json(ctx, landing_page_url, combi_path) - - -def json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): +def json_datacite_create_datacite_json(ctx: rule.Context, landing_page_url: str, combi_path: str) -> Dict: """Based on content of combi json, get Datacite metadata as a dict. :param ctx: Combined type of a callback and rei struct @@ -134,32 +97,32 @@ def json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): return metadata -def get_DOI(combi): +def get_DOI(combi: Dict) -> str: return combi['System']['Persistent_Identifier_Datapackage']['Identifier'] -def get_identifiers(combi): +def get_identifiers(combi: Dict) -> List: return [{'identifier': combi['System']['Persistent_Identifier_Datapackage']['Identifier'], 'identifierType': 'DOI'}] -def get_titles(combi): +def get_titles(combi: Dict) -> List: return [{'title': combi['Title'], 'language': 'en-us'}] -def get_descriptions(combi): +def get_descriptions(combi: Dict) -> List: return [{'description': combi['Description'], 'descriptionType': 'Abstract'}] -def get_publisher(combi): +def get_publisher(combi: Dict) -> str: return config.datacite_publisher -def get_publication_year(combi): +def get_publication_year(combi: Dict) -> str: return combi['System']['Publication_Date'][0:4] -def get_subjects(combi): +def get_subjects(combi: Dict) -> List: """Get list in DataCite format containing: 1) standard objects like tags/disciplne @@ -204,7 +167,7 @@ def get_subjects(combi): return subjects -def get_funders(combi): +def get_funders(combi: Dict) -> List: funders = [] try: for funder in combi.get('Funding_Reference', []): @@ -216,7 +179,7 @@ def get_funders(combi): return funders -def get_creators(combi): +def get_creators(combi: Dict) -> List: """Return creator information in DataCite format. :param combi: Combined JSON file that holds both user and system metadata @@ -253,7 +216,7 @@ def get_creators(combi): return all_creators -def get_contributors(combi): +def get_contributors(combi: Dict) -> List: """Get string in DataCite format containing contributors, including contact persons if these were added explicitly (GEO). @@ -327,7 +290,7 @@ def get_contributors(combi): return all -def get_dates(combi): +def get_dates(combi: Dict) -> List: """Return list of dates in DataCite format.""" # Format last modified date for DataCite: https://support.datacite.org/docs/schema-optional-properties-v41#8-date @@ -339,11 +302,11 @@ def get_dates(combi): dates = [{'date': last_modified_date, 'dateType': 'Updated'}] - embargo_end_date = combi.get('Embargo_End_Date', None) + embargo_end_date = combi.get('Embargo_End_Date') if embargo_end_date is not None: dates.append({'date': embargo_end_date, 'dateType': 'Available'}) - collected = combi.get('Collected', None) + collected = combi.get('Collected') if collected is not None: try: x = collected.get('Start_Date') @@ -356,12 +319,12 @@ def get_dates(combi): return dates -def get_version(combi): +def get_version(combi: Dict) -> str: """Get string in DataCite format containing version info.""" return combi.get('Version', '') -def get_rights_list(combi): +def get_rights_list(combi: Dict) -> List: """Get list in DataCite format containing rights related information.""" options = {'Open': 'info:eu-repo/semantics/openAccess', 'Restricted': 'info:eu-repo/semantics/restrictedAccess', @@ -374,12 +337,12 @@ def get_rights_list(combi): return rights_list -def get_language(combi): +def get_language(combi: Dict) -> str: """Get string in DataCite format containing language.""" return 'en-us' -def get_resource_type(combi): +def get_resource_type(combi: Dict) -> Dict: """Get dict in DataCite format containing Resource type and default handling.""" """ "types": { @@ -409,7 +372,7 @@ def get_resource_type(combi): return {"resourceTypeGeneral": type, "resourceType": descr} -def get_related_resources(combi): +def get_related_resources(combi: Dict) -> List: """Get list in DataCite format containing related datapackages.""" """ "relatedIdentifiers": [ @@ -444,7 +407,7 @@ def get_related_resources(combi): return related_dps -def get_geo_locations(combi): +def get_geo_locations(combi: Dict) -> List: """Get list of geoLocation elements in datacite format containing the information of geo locations. There are two versions of this: @@ -491,6 +454,6 @@ def get_geo_locations(combi): if location: geoLocations.append({'geoLocationPlace': location}) except KeyError: - return + return [] return geoLocations diff --git a/json_landing_page.py b/json_landing_page.py index 38ff78fee..d12cedfca 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' from datetime import datetime +from typing import Dict import jinja2 from dateutil import parser @@ -11,7 +12,7 @@ from util import * -def persistent_identifier_to_uri(identifier_scheme, identifier): +def persistent_identifier_to_uri(identifier_scheme: str, identifier: str) -> str: """Transform a persistent identifier to URI. Supported identifier schemes are Handle, DOI, ORCID and URL. @@ -41,7 +42,13 @@ def persistent_identifier_to_uri(identifier_scheme, identifier): return uri -def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_json_path, json_schema, base_doi, versions): +def json_landing_page_create_json_landing_page(ctx: rule.Context, + zone: str, + template_name: str, + combi_json_path: str, + json_schema: Dict, + base_doi: str, + versions: Dict) -> str: """Get the landing page of published YoDa metadata as a string. :param ctx: Combined type of a ctx and rei struct @@ -187,12 +194,12 @@ def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_j # Format last modified and publication date. # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # last_modified_date = date.fromisoformat(json_data['System']['Last_Modified_Date']) - last_modified_date = parser.parse(json_data["System"]["Last_Modified_Date"]) - last_modified_date = last_modified_date.strftime("%Y-%m-%d %H:%M:%S%z") + last_modified_date_time = parser.parse(json_data["System"]["Last_Modified_Date"]) + last_modified_date = last_modified_date_time.strftime("%Y-%m-%d %H:%M:%S%z") # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # publication_date = date.fromisoformat(json_data['System']['Publication_Date']) - publication_date = parser.parse(json_data["System"]["Publication_Date"]) - publication_date = publication_date.strftime("%Y-%m-%d %H:%M:%S%z") + publication_date_time = parser.parse(json_data["System"]["Publication_Date"]) + publication_date = publication_date_time.strftime("%Y-%m-%d %H:%M:%S%z") tm = Template(template) # Add custom function to transform a persistent identifier to URI. diff --git a/mail.py b/mail.py index 98e9025f9..0493906ad 100644 --- a/mail.py +++ b/mail.py @@ -7,13 +7,14 @@ import re import smtplib from email.mime.text import MIMEText +from typing import Optional, Tuple from util import * __all__ = ['rule_mail_test'] -def send(ctx, to, actor, subject, body, cc=None): +def send(ctx: rule.Context, to: str, actor: str, subject: str, body: str, cc: Optional[str] = None) -> api.Result: """Send an e-mail with specified recipient, subject and body. The originating address and mail server credentials are taken from the @@ -26,6 +27,8 @@ def send(ctx, to, actor, subject, body, cc=None): :param body: Body of mail :param cc: Comma-separated list of CC recipient(s) of email (optional) + :raises: When smtp is not configer correctly + :returns: API status """ if not config.notifications_enabled: @@ -51,7 +54,11 @@ def send(ctx, to, actor, subject, body, cc=None): try: # e.g. 'smtps://smtp.gmail.com:465' for SMTP over TLS, or # 'smtp://smtp.gmail.com:587' for STARTTLS on the mail submission port. - proto, host, port = re.search(r'^(smtps?)://([^:]+)(?::(\d+))?$', cfg['server']).groups() + smtp_config = re.search(r'^(smtps?)://([^:]+)(?::(\d+))?$', cfg['server'])\ + + if smtp_config is None: + raise Exception + proto, host, port = smtp_config.groups() # Default to port 465 for SMTP over TLS, and 587 for standard mail # submission with STARTTLS. @@ -107,7 +114,7 @@ def send(ctx, to, actor, subject, body, cc=None): pass -def wrapper(ctx, to, actor, subject, body): +def wrapper(ctx: rule.Context, to: str, actor: str, subject: str, body: str) -> Tuple[str, str]: """Send mail, returns status/statusinfo in rule-language style.""" x = send(ctx, to, actor, subject, body) @@ -117,7 +124,7 @@ def wrapper(ctx, to, actor, subject, body): @rule.make(inputs=[0], outputs=[1, 2]) -def rule_mail_test(ctx, to): +def rule_mail_test(ctx: rule.Context, to: str) -> Tuple[str, str]: if not user.is_admin(ctx): return api.Error('not_allowed', 'Only rodsadmin can send test mail') diff --git a/meta.py b/meta.py index 52b51ff93..333114481 100644 --- a/meta.py +++ b/meta.py @@ -7,6 +7,7 @@ import re from collections import OrderedDict from datetime import datetime +from typing import Dict, List, Optional import genquery import irods_types @@ -28,7 +29,7 @@ 'rule_get_latest_vault_metadata_path'] -def metadata_get_links(metadata): +def metadata_get_links(metadata: Dict) -> List: if 'links' not in metadata or type(metadata['links']) is not list: return [] return list(filter(lambda x: type(x) in (dict, OrderedDict) @@ -39,13 +40,14 @@ def metadata_get_links(metadata): metadata['links'])) -def metadata_get_schema_id(metadata): +def metadata_get_schema_id(metadata: Dict) -> Optional[str]: desc = list(filter(lambda x: x['rel'] == 'describedby', metadata_get_links(metadata))) if len(desc) > 0: return desc[0]['href'] + return None -def metadata_set_schema_id(metadata, schema_id): +def metadata_set_schema_id(metadata: Dict, schema_id: str) -> None: other_links = list(filter(lambda x: x['rel'] != 'describedby', metadata_get_links(metadata))) metadata['links'] = [OrderedDict([ @@ -54,11 +56,11 @@ def metadata_set_schema_id(metadata, schema_id): ])] + other_links -def get_json_metadata_errors(callback, - metadata_path, - metadata=None, - schema=None, - ignore_required=False): +def get_json_metadata_errors(ctx: rule.Context, + metadata_path: str, + metadata: Optional[Dict] = None, + schema: Optional[Dict] = None, + ignore_required: bool = False) -> List: """ Validate JSON metadata, and return a list of errors, if any. @@ -72,7 +74,7 @@ def get_json_metadata_errors(callback, This will throw exceptions on missing metadata / schema files and invalid JSON formats. - :param callback: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param metadata_path: Path to the JSON object :param metadata: Pre-parsed JSON object :param schema: Schema to check against @@ -81,10 +83,10 @@ def get_json_metadata_errors(callback, :returns: List of errors in JSON object """ if schema is None: - schema = schema_.get_active_schema(callback, metadata_path) + schema = schema_.get_active_schema(ctx, metadata_path) if metadata is None: - metadata = jsonutil.read(callback, metadata_path) + metadata = jsonutil.read(ctx, metadata_path) # Perform validation and filter errors. # Validation is handed to a Python 3 interpreter to validate with the Draft201909 validator. @@ -144,22 +146,21 @@ def transform_error(e): # Log metadata errors. for error in errors: - log.write(callback, error) + log.write(ctx, error) return errors -def is_json_metadata_valid(callback, - metadata_path, - metadata=None, - ignore_required=False): - """ - Check if json metadata contains no errors. +def is_json_metadata_valid(ctx: rule.Context, + metadata_path: str, + metadata: Optional[Dict] = None, + ignore_required: bool = False) -> bool: + """Check if json metadata contains no errors. Argument 'metadata' may contain a preparsed JSON document, otherwise it is loaded from the provided path. - :param callback: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param metadata_path: Path to the JSON object :param metadata: Pre-parsed JSON object :param ignore_required: Ignore required fields @@ -167,7 +168,7 @@ def is_json_metadata_valid(callback, :returns: Boolean indicating if JSON metadata is valid """ try: - return len(get_json_metadata_errors(callback, + return len(get_json_metadata_errors(ctx, metadata_path, metadata=metadata, ignore_required=ignore_required)) == 0 @@ -176,14 +177,13 @@ def is_json_metadata_valid(callback, return False -def get_collection_metadata_path(ctx, coll): - """ - Check if a collection has a JSON metadata file and provide its path, if any. +def get_collection_metadata_path(ctx: rule.Context, coll: str) -> Optional[str]: + """Check if a collection has a JSON metadata file and provide its path, if any. :param ctx: Combined type of a callback and rei struct :param coll: Path of collection to check for metadata - :returns: String with path to metadata file + :returns: Path to metadata file """ path = '{}/{}'.format(coll, constants.IIJSONMETADATA) if data_object.exists(ctx, path): @@ -192,14 +192,13 @@ def get_collection_metadata_path(ctx, coll): return None -def get_latest_vault_metadata_path(ctx, vault_pkg_coll): - """ - Get the latest vault metadata JSON file. +def get_latest_vault_metadata_path(ctx: rule.Context, vault_pkg_coll: str) -> Optional[str]: + """Get the latest vault metadata JSON file. :param ctx: Combined type of a callback and rei struct :param vault_pkg_coll: Vault package collection - :returns: string -- Metadata JSON path + :returns: Metadata JSON path """ name = None @@ -239,28 +238,27 @@ def rule_meta_validate(rule_args, callback, rei): rule_args[2] = 'metadata validated' -def collection_has_cloneable_metadata(callback, coll): - """ - Check if a collection has metadata, and validate it. +def collection_has_cloneable_metadata(ctx: rule.Context, coll: str) -> Optional[str]: + """Check if a collection has metadata, and validate it. This always ignores 'required' schema attributes, since metadata can only be cloned in the research area. - :param callback: Combined type of a callback and rei struct - :param coll: Path of collection to check for cloneable metadata + :param ctx: Combined type of a callback and rei struct + :param coll: Path of collection to check for cloneable metadata - :returns: String with the parent metadata_path on success, or False otherwise. + :returns: String with the parent metadata_path on success or None otherwise. """ - path = get_collection_metadata_path(callback, coll) + path = get_collection_metadata_path(ctx, coll) if path is None: - return False + return None if path.endswith('.json'): - if is_json_metadata_valid(callback, path, ignore_required=True): + if is_json_metadata_valid(ctx, path, ignore_required=True): return path - return False + return None rule_meta_collection_has_cloneable_metadata = ( @@ -270,7 +268,7 @@ def collection_has_cloneable_metadata(callback, coll): @api.make() -def api_meta_remove(ctx, coll): +def api_meta_remove(ctx: rule.Context, coll: str) -> None: """Remove a collection's metadata JSON, if it exists.""" log.write(ctx, 'Remove metadata of coll {}'.format(coll)) @@ -283,34 +281,31 @@ def api_meta_remove(ctx, coll): @api.make() -def api_meta_clone_file(ctx, target_coll): +def api_meta_clone_file(ctx: rule.Context, target_coll: str) -> api.Result: """Clone a metadata file from a parent collection to a subcollection. :param ctx: Combined type of a callback and rei struct :param target_coll: Target collection (where the metadata is copied to) - :returns: None - - :raises Error: The metadata file could not be copied + :returns: API result """ source_coll = pathutil.chop(target_coll)[0] # = parent collection source_data = get_collection_metadata_path(ctx, source_coll) - if source_data.endswith('.json'): + if source_data and source_data.endswith('.json'): target_data = '{}/{}'.format(target_coll, constants.IIJSONMETADATA) else: - # No metadata to clone? Abort. - return + return api.Error('no_metadata', 'No metadata file exists to clone') try: data_object.copy(ctx, source_data, target_data) except msi.Error as e: - raise api.Error('copy_failed', 'The metadata file could not be copied', str(e)) + return api.Error('copy_failed', 'The metadata file could not be copied', str(e)) # Functions that deal with ingesting metadata into AVUs {{{ -def ingest_metadata_research(ctx, path): +def ingest_metadata_research(ctx: rule.Context, path: str) -> None: """Validate JSON metadata (without requiredness) and ingests as AVUs in the research space.""" coll, data = pathutil.chop(path) @@ -334,7 +329,7 @@ def ingest_metadata_research(ctx, path): jsonutil.dump(metadata)) -def ingest_metadata_deposit(ctx, path): +def ingest_metadata_deposit(ctx: rule.Context, path: str) -> None: """Validate JSON metadata (without requiredness) and ingests as AVUs in the deposit space.""" coll, data = pathutil.chop(path) @@ -355,7 +350,7 @@ def ingest_metadata_deposit(ctx, path): avu.associate_to_coll(ctx, coll, 'Data_Access_Restriction', metadata['Data_Access_Restriction']) -def ingest_metadata_staging(ctx, path): +def ingest_metadata_staging(ctx: rule.Context, path: str) -> None: """Set cronjob metadata flag and triggers vault ingest.""" ret = msi.string_2_key_val_pair(ctx, '{}{}{}'.format(constants.UUORGMETADATAPREFIX, @@ -373,7 +368,7 @@ def ingest_metadata_staging(ctx, path): ctx.iiAdminVaultIngest() -def update_index_metadata(ctx, path, metadata, creation_time, data_package): +def update_index_metadata(ctx: rule.Context, path: str, metadata: Dict, creation_time: str, data_package: str) -> None: """Update the index attributes for JSON metadata.""" msi.coll_create(ctx, path, "", irods_types.BytesBuf()) ctx.msi_rmw_avu('-C', path, '%', '%', constants.UUFLATINDEX) @@ -511,7 +506,7 @@ def update_index_metadata(ctx, path, metadata, creation_time, data_package): log.write(ctx, 'update_index_metadata: Metadata index update unsuccessful on path {}'.format(path)) -def ingest_metadata_vault(ctx, path): +def ingest_metadata_vault(ctx: rule.Context, path: str) -> None: """Ingest (pre-validated) JSON metadata in the vault.""" # The JSON metadata file has just landed in the vault, required validation / # logging / provenance has already taken place. @@ -560,7 +555,7 @@ def ingest_metadata_vault(ctx, path): @rule.make() -def rule_meta_modified_post(ctx, path, user, zone): +def rule_meta_modified_post(ctx: rule.Context, path: str, user: str, zone: str) -> None: if re.match('^/{}/home/datamanager-[^/]+/vault-[^/]+/.*'.format(zone), path): ingest_metadata_staging(ctx, path) elif re.match('^/{}/home/vault-[^/]+/.*'.format(zone), path): @@ -744,9 +739,8 @@ def set_result(msg_short, msg_long): set_result('Success', '') -def copy_user_metadata(ctx, source, target): - """ - Copy the user metadata (AVUs) of a collection to another collection. +def copy_user_metadata(ctx: rule.Context, source: str, target: str) -> None: + """Copy the user metadata (AVUs) of a collection to another collection. This only copies user metadata, so it ignores system metadata. @@ -759,7 +753,7 @@ def copy_user_metadata(ctx, source, target): user_metadata = list(avu.inside_coll(ctx, source, recursive=True)) # Group AVUs by entity and filter system metadata. - grouped_user_metadata = {} + grouped_user_metadata: Dict = {} for path, type, attribute, value, unit in user_metadata: if not attribute.startswith(constants.UUORGMETADATAPREFIX) and unit != constants.UUFLATINDEX and not unit.startswith(constants.UUUSERMETADATAROOT + '_'): grouped_user_metadata.setdefault(path, {"type": type, "avus": []}) @@ -792,7 +786,7 @@ def copy_user_metadata(ctx, source, target): log.write(ctx, "copy_user_metadata: failed to copy user metadata from <{}> to <{}/original>".format(source, target)) -def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, write_stdout): +def vault_metadata_matches_schema(ctx: rule.Context, coll_name: str, schema_cache: Dict, report_name: str, write_stdout: bool) -> Optional[Dict]: """Process a single data package to retrieve and validate that its metadata conforms to the schema. :param ctx: Combined type of a callback and rei struct @@ -801,7 +795,7 @@ def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, wri :param report_name: Name of report script (for logging) :param write_stdout: A boolean representing whether to write to stdout or rodsLog - :returns: A dictionary result containing if schema matches and the schema short name. + :returns: A dictionary result containing if schema matches and the schema short name. """ metadata_path = get_latest_vault_metadata_path(ctx, coll_name) @@ -818,6 +812,9 @@ def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, wri # Determine schema schema_id = schema_.get_schema_id(ctx, metadata_path) + if schema_id is None: + return None + schema_shortname = schema_id.split("/")[-2] # Retrieve schema and cache it for future use diff --git a/meta_form.py b/meta_form.py index add877018..94f8fa4f0 100644 --- a/meta_form.py +++ b/meta_form.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Dict, List, Optional, Tuple import irods_types @@ -19,7 +20,7 @@ 'api_meta_form_save'] -def get_coll_lock(ctx, path, org_metadata=None): +def get_coll_lock(ctx: rule.Context, path: str, org_metadata: Optional[List] = None) -> Tuple[str, str]: """Check for existence of locks on a collection. path -> ((no|here|outoftree|ancestor|descendant), rootcoll) @@ -33,7 +34,7 @@ def get_coll_lock(ctx, path, org_metadata=None): if org_metadata is None: org_metadata = folder.get_org_metadata(ctx, path) - ret = ('no', None) + ret = ('no', '') for root in [v for k, v in org_metadata if k == constants.IILOCKATTRNAME]: if root == path: @@ -49,7 +50,7 @@ def get_coll_lock(ctx, path, org_metadata=None): return ret -def get_coll_lock_count(ctx, path, org_metadata=None): +def get_coll_lock_count(ctx: rule.Context, path: str, org_metadata: Optional[List] = None) -> int: """Count locks on a collection. :param ctx: Combined type of a callback and rei struct @@ -69,7 +70,7 @@ def get_coll_lock_count(ctx, path, org_metadata=None): return count -def humanize_validation_error(e): +def humanize_validation_error(e: str) -> str: """Transform a jsonschema validation error such that it is readable by humans. :param e: a jsonschema.exceptions.ValidationError @@ -98,7 +99,7 @@ def humanize_validation_error(e): return 'This field contains an error: ' + ' -> '.join(path_out) -def load(ctx, coll): +def load(ctx: rule.Context, coll: str) -> api.Result: """Retrieve all information required to load a metadata form in either the research or vault space. This produces a JSON struct on stdout. If no transformation is required @@ -278,7 +279,7 @@ def load(ctx, coll): 'is_locked': is_locked} -def save(ctx, coll, metadata): +def save(ctx: rule.Context, coll: str, metadata: Dict) -> api.Result: """Validate and store JSON metadata for a given collection. :param ctx: Combined type of a callback and rei struct diff --git a/notifications.py b/notifications.py index c875e9956..5d0b27db0 100644 --- a/notifications.py +++ b/notifications.py @@ -10,6 +10,7 @@ import time import urllib.parse from datetime import datetime, timedelta +from typing import List, Tuple import genquery from dateutil import relativedelta @@ -34,13 +35,13 @@ NOTIFICATION_KEY = constants.UUORGMETADATAPREFIX + "notification" -def generate_random_id(ctx): +def generate_random_id(ctx: rule.Context) -> str: """Generate random ID for notification.""" characters = string.ascii_lowercase + string.digits return ''.join(random.choice(characters) for x in range(10)) -def set(ctx, actor, receiver, target, message): +def set(ctx: rule.Context, actor: str, receiver: str, target: str, message: str) -> None: """Set user notification and send mail notification when configured. :param ctx: Combined type of a callback and rei struct @@ -63,13 +64,13 @@ def set(ctx, actor, receiver, target, message): @api.make() -def api_notifications_load(ctx, sort_order="desc"): +def api_notifications_load(ctx: rule.Context, sort_order: str = "desc") -> List: """Load user notifications. :param ctx: Combined type of a callback and rei struct :param sort_order: Sort order of notifications on timestamp ("asc" or "desc", default "desc") - :returns: Dict with all notifications + :returns: List with all notifications """ results = [v for v in Query(ctx, "META_USER_ATTR_VALUE", @@ -144,7 +145,7 @@ def api_notifications_load(ctx, sort_order="desc"): @api.make() -def api_notifications_dismiss(ctx, identifier): +def api_notifications_dismiss(ctx: rule.Context, identifier: str) -> api.Result: """Dismiss user notification. :param ctx: Combined type of a callback and rei struct @@ -156,7 +157,7 @@ def api_notifications_dismiss(ctx, identifier): @api.make() -def api_notifications_dismiss_all(ctx): +def api_notifications_dismiss_all(ctx: rule.Context) -> api.Result: """Dismiss all user notifications. :param ctx: Combined type of a callback and rei struct @@ -166,7 +167,7 @@ def api_notifications_dismiss_all(ctx): ctx.uuUserMetaRemove(user_name, key, '', '') -def send_notification(ctx, to, actor, message): +def send_notification(ctx: rule.Context, to: str, actor: str, message: str) -> api.Result: return mail.send(ctx, to=to, actor=actor, @@ -183,9 +184,9 @@ def send_notification(ctx, to, actor, message): @rule.make(inputs=[0, 1], outputs=[2, 3]) -def rule_mail_notification_report(ctx, to, notifications): +def rule_mail_notification_report(ctx: rule.Context, to: str, notifications: str) -> Tuple[str, str]: if not user.is_admin(ctx): - return api.Error('not_allowed', 'Only rodsadmin can send test mail') + return '0', 'Only rodsadmin can send test mail' return mail.wrapper(ctx, to=to, @@ -203,7 +204,7 @@ def rule_mail_notification_report(ctx, to, notifications): @rule.make() -def rule_process_ending_retention_packages(ctx): +def rule_process_ending_retention_packages(ctx: rule.Context) -> None: """Rule interface for checking vault packages for ending retention. :param ctx: Combined type of a callback and rei struct @@ -296,7 +297,7 @@ def rule_process_ending_retention_packages(ctx): @rule.make() -def rule_process_groups_expiration_date(ctx): +def rule_process_groups_expiration_date(ctx: rule.Context) -> None: """Rule interface for checking research groups for reaching group expiration date. :param ctx: Combined type of a callback and rei struct @@ -346,7 +347,7 @@ def rule_process_groups_expiration_date(ctx): @rule.make() -def rule_process_inactive_research_groups(ctx): +def rule_process_inactive_research_groups(ctx: rule.Context) -> None: """Rule interface for checking for research groups that have not been modified after a certain amount of months. :param ctx: Combined type of a callback and rei struct @@ -459,7 +460,7 @@ def rule_process_inactive_research_groups(ctx): @rule.make() -def rule_process_data_access_token_expiry(ctx): +def rule_process_data_access_token_expiry(ctx: rule.Context) -> None: """Rule interface for checking for data access tokens that are expiring soon. :param ctx: Combined type of a callback and rei struct diff --git a/policies.py b/policies.py index 0935f8abd..335ba5a6c 100644 --- a/policies.py +++ b/policies.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Optional import session_vars @@ -33,7 +34,7 @@ # Separate from ACLs, we deny certain operations on collections and data in # research or deposit folders when paths are locked. -def can_coll_create(ctx, actor, coll): +def can_coll_create(ctx: rule.Context, actor: str, coll: str) -> policy.Succeed | policy.Fail: """Disallow creating collections in locked folders.""" log.debug(ctx, 'check coll create <{}>'.format(coll)) @@ -48,7 +49,7 @@ def can_coll_create(ctx, actor, coll): return policy.succeed() -def can_coll_delete(ctx, actor, coll): +def can_coll_delete(ctx: rule.Context, actor: str, coll: str) -> policy.Succeed | policy.Fail: """Disallow deleting collections in locked folders and collections containing locked folders.""" log.debug(ctx, 'check coll delete <{}>'.format(coll)) @@ -66,14 +67,14 @@ def can_coll_delete(ctx, actor, coll): return policy.succeed() -def can_coll_move(ctx, actor, src, dst): +def can_coll_move(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check coll move <{}> -> <{}>'.format(src, dst)) return policy.all(can_coll_delete(ctx, actor, src), can_coll_create(ctx, actor, dst)) -def can_data_create(ctx, actor, path): +def can_data_create(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data create <{}>'.format(path)) if pathutil.info(path).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT]: @@ -95,7 +96,7 @@ def can_data_create(ctx, actor, path): return policy.succeed() -def can_data_write(ctx, actor, path): +def can_data_write(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data write <{}>'.format(path)) # Disallow writing to locked objects in research and deposit folders. @@ -111,7 +112,7 @@ def can_data_write(ctx, actor, path): return policy.succeed() -def can_data_delete(ctx, actor, path): +def can_data_delete(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: if re.match(r'^/[^/]+/home/[^/]+$', path) and not user.is_admin(ctx, actor): return policy.fail('Cannot delete or move data directly under /home') @@ -126,12 +127,12 @@ def can_data_delete(ctx, actor, path): return policy.succeed() -def can_data_copy(ctx, actor, src, dst): +def can_data_copy(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data copy <{}> -> <{}>'.format(src, dst)) return can_data_create(ctx, actor, dst) -def can_data_move(ctx, actor, src, dst): +def can_data_move(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data move <{}> -> <{}>'.format(src, dst)) return policy.all(can_data_delete(ctx, actor, src), can_data_create(ctx, actor, dst)) @@ -151,7 +152,7 @@ def can_data_move(ctx, actor, src, dst): # Most of them 'cut' and call identically named Python functions in this file. @policy.require() -def py_acPreprocForCollCreate(ctx): +def py_acPreprocForCollCreate(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForCollCreate') # print(jsonutil.dump(session_vars.get_map(ctx.rei))) return can_coll_create(ctx, user.user_and_zone(ctx), @@ -159,7 +160,7 @@ def py_acPreprocForCollCreate(ctx): @policy.require() -def py_acPreprocForRmColl(ctx): +def py_acPreprocForRmColl(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForRmColl') # print(jsonutil.dump(session_vars.get_map(ctx.rei))) return can_coll_delete(ctx, user.user_and_zone(ctx), @@ -167,7 +168,7 @@ def py_acPreprocForRmColl(ctx): @policy.require() -def py_acPreprocForDataObjOpen(ctx): +def py_acPreprocForDataObjOpen(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForDataObjOpen') # data object reads are always allowed. # writes are blocked e.g. when the object is locked (unless actor is a rodsadmin). @@ -179,7 +180,7 @@ def py_acPreprocForDataObjOpen(ctx): @policy.require() -def py_acDataDeletePolicy(ctx): +def py_acDataDeletePolicy(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acDataDeletePolicy') return (policy.succeed() if can_data_delete(ctx, user.user_and_zone(ctx), @@ -188,7 +189,7 @@ def py_acDataDeletePolicy(ctx): @policy.require() -def py_acPreProcForObjRename(ctx, src, dst): +def py_acPreProcForObjRename(ctx: rule.Context, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreProcForObjRename') # irods/lib/api/include/dataObjInpOut.h @@ -200,11 +201,11 @@ def py_acPreProcForObjRename(ctx, src, dst): elif session_vars.get_map(ctx.rei)['operation_type'] == RENAME_COLL: return can_coll_move(ctx, user.user_and_zone(ctx), src, dst) - # if ($objPath like regex "/[^/]+/home/" ++ IIGROUPPREFIX ++ ".[^/]*/.*") { + return policy.succeed() @policy.require() -def py_acPostProcForPut(ctx): +def py_acPostProcForPut(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPostProcForPut') # Data object creation cannot be prevented by API dynpeps and static PEPs, # due to how MSIs work. Thus, this ugly workaround specifically for MSIs. @@ -218,7 +219,7 @@ def py_acPostProcForPut(ctx): @policy.require() -def py_acPostProcForCopy(ctx): +def py_acPostProcForCopy(ctx: rule.Context) -> policy.Succeed | policy.Fail: # See py_acPostProcForPut. log.debug(ctx, 'py_acPostProcForCopy') @@ -255,7 +256,10 @@ def py_acPostProcForCopy(ctx): @policy.require() -def pep_api_data_obj_create_pre(ctx, instance_name, rs_comm, data_obj_inp): +def pep_api_data_obj_create_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_create_pre') # Catch object creation/overwrite via Davrods and PRC. @@ -268,7 +272,11 @@ def pep_api_data_obj_create_pre(ctx, instance_name, rs_comm, data_obj_inp): @policy.require() -def pep_api_data_obj_create_and_stat_pre(ctx, instance_name, rs_comm, data_obj_inp, open_stat): +def pep_api_data_obj_create_and_stat_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object, + open_stat: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_create_and_stat_pre') # Not triggered by any of our clients currently, but needed for completeness. @@ -306,14 +314,20 @@ def pep_api_data_obj_create_and_stat_pre(ctx, instance_name, rs_comm, data_obj_i @policy.require() -def pep_api_data_obj_trim_pre(ctx, instance_name, rs_comm, data_obj_inp): +def pep_api_data_obj_trim_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_trim_pre') return can_data_write(ctx, user.user_and_zone(ctx), str(data_obj_inp.objPath)) @policy.require() -def pep_api_data_obj_truncate_pre(ctx, instance_name, rs_comm, data_obj_truncate_inp): +def pep_api_data_obj_truncate_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_truncate_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_truncate_pre') return can_data_write(ctx, user.user_and_zone(ctx), str(data_obj_truncate_inp.objPath)) @@ -337,8 +351,13 @@ def pep_api_data_obj_truncate_pre(ctx, instance_name, rs_comm, data_obj_truncate # Policy for most AVU changes @policy.require() -def py_acPreProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, value, unit): - +def py_acPreProcForModifyAVUMetadata(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + attr: str, + value: str, + unit: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if obj_type not in ['-d', '-C']: @@ -412,21 +431,32 @@ def py_acPreProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, valu # imeta mod @policy.require() -def py_acPreProcForModifyAVUMetadata_mod(ctx, *args): +def py_acPreProcForModifyAVUMetadata_mod(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + a_attr: str, + a_value: str, + a_unit: str, + b_name: str, + b_value: str, + b_unit: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): return policy.succeed() - if t_dst not in ['-d', '-C']: + if obj_type not in ['-d', '-C']: return policy.succeed() - if pathutil.info(dst).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT, pathutil.Space.VAULT]: + if pathutil.info(obj_name).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT, pathutil.Space.VAULT]: return policy.fail('Metadata mod not allowed') + return policy.succeed() + # imeta cp @policy.require() -def py_acPreProcForModifyAVUMetadata_cp(ctx, _, t_src, t_dst, src, dst): +def py_acPreProcForModifyAVUMetadata_cp(ctx: rule.Context, option: str, t_src: str, t_dst: str, src: str, dst: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): return policy.succeed() @@ -449,7 +479,13 @@ def py_acPreProcForModifyAVUMetadata_cp(ctx, _, t_src, t_dst, src, dst): # conditions defined in folder.py and iiVaultTransitions.r # are called here. @rule.make() -def py_acPostProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, value, unit): +def py_acPostProcForModifyAVUMetadata(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + attr: str, + value: str, + unit: str) -> None: info = pathutil.info(obj_name) if attr == constants.IISTATUSATTRNAME and info.space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT]: @@ -472,7 +508,10 @@ def py_acPostProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, val # ichmod @policy.require() -def pep_api_mod_access_control_pre(ctx, instance_name, rs_comm, mod_access_control_inp): +def pep_api_mod_access_control_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + mod_access_control_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_mod_access_control_pre') actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): @@ -490,7 +529,11 @@ def pep_api_mod_access_control_pre(ctx, instance_name, rs_comm, mod_access_contr # ExecCmd {{{ @policy.require() -def py_acPreProcForExecCmd(ctx, cmd, args, addr, hint): +def py_acPreProcForExecCmd(ctx: rule.Context, + cmd: str, + args: str, + addr: str, + hint: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) # No restrictions for rodsadmin and priv group. @@ -519,7 +562,7 @@ def py_acPreProcForExecCmd(ctx, cmd, args, addr, hint): # Internal function to determine whether changes to data objects on a particular # resource need to trigger policies (e.g. asynchronous replication) by default. -def resource_should_trigger_policies(resource): +def resource_should_trigger_policies(resource: str) -> bool: if resource in config.resource_primary: return True @@ -534,7 +577,10 @@ def resource_should_trigger_policies(resource): @rule.make() -def pep_resource_modified_post(ctx, instance_name, _ctx, out): +def pep_resource_modified_post(ctx: rule.Context, + instance_name: str, + _ctx: rule.Context, + out: str) -> None: if not resource_should_trigger_policies(instance_name): return @@ -578,7 +624,7 @@ def pep_resource_modified_post(ctx, instance_name, _ctx, out): @rule.make() -def py_acPostProcForObjRename(ctx, src, dst): +def py_acPostProcForObjRename(ctx: rule.Context, src: str, dst: str) -> None: # Update ACLs to give correct group ownership when an object is moved into # a different research- or grp- collection. info = pathutil.info(dst) @@ -588,9 +634,16 @@ def py_acPostProcForObjRename(ctx, src, dst): @rule.make(inputs=[0, 1, 2, 3, 4, 5, 6], outputs=[2]) -def pep_resource_resolve_hierarchy_pre(ctx, resource, _ctx, out, operation, host, parser, vote): +def pep_resource_resolve_hierarchy_pre(ctx: rule.Context, + resource: str, + _ctx: rule.Context, + out: str, + operation: str, + host: str, + parser: str, + vote: str) -> Optional[str]: if not config.arb_enabled or operation != "CREATE": - return + return None arb_data = arb_data_manager.ARBDataManager() arb_status = arb_data.get(ctx, resource) @@ -602,7 +655,7 @@ def pep_resource_resolve_hierarchy_pre(ctx, resource, _ctx, out, operation, host @rule.make(inputs=[0], outputs=[1]) -def rule_check_anonymous_access_allowed(ctx, address): +def rule_check_anonymous_access_allowed(ctx: rule.Context, address: str) -> str: """Check if access to the anonymous account is allowed from a particular network address. Non-local access to the anonymous account should only be allowed from DavRODS servers, for security reasons. @@ -617,7 +670,7 @@ def rule_check_anonymous_access_allowed(ctx, address): @rule.make(inputs=[], outputs=[0]) -def rule_check_max_connections_exceeded(ctx): +def rule_check_max_connections_exceeded(ctx: rule.Context) -> str: """Check if user exceeds the maximum number of connections. :param ctx: Combined type of a callback and rei struct @@ -636,7 +689,12 @@ def rule_check_max_connections_exceeded(ctx): @rule.make(inputs=[0, 1, 2, 3, 4], outputs=[]) -def pep_database_gen_query_pre(ctx, dbtype, _ctx, results, genquery_inp, genquery_out): +def pep_database_gen_query_pre(ctx: rule.Context, + dbtype: str, + _ctx: rule.Context, + results: str, + genquery_inp: object, + genquery_out: object) -> None: if not is_safe_genquery_inp(genquery_inp): # We can't use log here, because the REI is not (always) available. print("Refused unsafe query: " + str(genquery_inp)) diff --git a/policies_datamanager.py b/policies_datamanager.py index cfa51f54f..3800ba6ff 100644 --- a/policies_datamanager.py +++ b/policies_datamanager.py @@ -6,7 +6,12 @@ from util import * -def can_datamanager_acl_set(ctx, obj, actor, other_name, recursive, access): +def can_datamanager_acl_set(ctx: rule.Context, + obj: str, + actor: str, + other_name: str, + recursive: str, + access: str) -> policy.Succeed | policy.Fail: x = ctx.iiCanDatamanagerAclSet(obj, actor, other_name, recursive, access, '', '') if x['arguments'][5] == '\x01': return policy.succeed() diff --git a/policies_datapackage_status.py b/policies_datapackage_status.py index e4d5bbd34..16666251b 100644 --- a/policies_datapackage_status.py +++ b/policies_datapackage_status.py @@ -11,7 +11,10 @@ from util import * -def pre_status_transition(ctx, coll, current, new): +def pre_status_transition(ctx: rule.Context, + coll: str, + current: constants.research_package_state, + new: constants.research_package_state) -> policy.Succeed | policy.Fail: """Action taken before status transition.""" if current is constants.vault_package_state.SUBMITTED_FOR_PUBLICATION \ and new is constants.vault_package_state.UNPUBLISHED: @@ -21,7 +24,11 @@ def pre_status_transition(ctx, coll, current, new): return policy.succeed() -def can_transition_datapackage_status(ctx, actor, coll, status_from, status_to): +def can_transition_datapackage_status(ctx: rule.Context, + actor: str, + coll: str, + status_from: str, + status_to: str) -> policy.Succeed | policy.Fail: transition = (constants.vault_package_state(status_from), constants.vault_package_state(status_to)) if transition not in constants.datapackage_transitions: @@ -38,7 +45,10 @@ def can_transition_datapackage_status(ctx, actor, coll, status_from, status_to): return policy.succeed() -def can_set_datapackage_status_attr(ctx, actor, coll, status): +def can_set_datapackage_status_attr(ctx: rule.Context, + actor: str, + coll: str, + status: str) -> policy.Succeed | policy.Fail: try: new = constants.vault_package_state(status) except ValueError: @@ -53,7 +63,10 @@ def can_set_datapackage_status_attr(ctx, actor, coll, status): return (current, new) -def post_status_transition(ctx, path, actor, status): +def post_status_transition(ctx: rule.Context, + path: str, + actor: str, + status: str) -> None: """Post data package status transition actions.""" status = constants.vault_package_state(status) actor = ctx.iiVaultGetActionActor(path, actor, '')['arguments'][2] diff --git a/policies_datarequest_status.py b/policies_datarequest_status.py index 16e3bc482..c7381b760 100644 --- a/policies_datarequest_status.py +++ b/policies_datarequest_status.py @@ -9,8 +9,9 @@ from util import * -def can_set_datarequest_status(ctx, obj_name, status_to): - +def can_set_datarequest_status(ctx: rule.Context, + obj_name: str, + status_to: str) -> policy.Succeed | policy.Fail: # Get current status. try: status_from = datarequest.status_get_from_path(ctx, obj_name) @@ -26,8 +27,7 @@ def can_set_datarequest_status(ctx, obj_name, status_to): return policy.succeed() -def post_status_transition(ctx, obj_name, value): - +def post_status_transition(ctx: rule.Context, obj_name: str, value: str) -> None: # Write timestamp to provenance log request_id = re.sub(r"^[^0-9]*/(\d+).*", r"\1", obj_name) status = datarequest.status[value] diff --git a/policies_folder_status.py b/policies_folder_status.py index f9483928f..2afae97e3 100644 --- a/policies_folder_status.py +++ b/policies_folder_status.py @@ -12,7 +12,10 @@ from util import * -def pre_status_transition(ctx, coll, current, new): +def pre_status_transition(ctx: rule.Context, + coll: str, + current: constants.research_package_state, + new: constants.research_package_state) -> policy.Succeed | policy.Fail: """Action taken before status transition.""" if current != constants.research_package_state.LOCKED \ and new in [constants.research_package_state.LOCKED, @@ -50,7 +53,11 @@ def pre_status_transition(ctx, coll, current, new): return policy.succeed() -def can_transition_folder_status(ctx, actor, coll, status_from, status_to): +def can_transition_folder_status(ctx: rule.Context, + actor: str, + coll: str, + status_from: str, + status_to: str) -> policy.Succeed | policy.Fail: transition = (constants.research_package_state(status_from), constants.research_package_state(status_to)) if transition not in constants.folder_transitions: @@ -84,7 +91,10 @@ def can_transition_folder_status(ctx, actor, coll, status_from, status_to): return policy.succeed() -def can_set_folder_status_attr(ctx, actor, coll, status): +def can_set_folder_status_attr(ctx: rule.Context, + actor: str, + coll: str, + status: str) -> policy.Succeed | policy.Fail: try: status = "" if status == "FOLDER" else status new = constants.research_package_state(status) @@ -100,9 +110,11 @@ def can_set_folder_status_attr(ctx, actor, coll, status): return (current, new) -def post_status_transition(ctx, path, actor, status): +def post_status_transition(ctx: rule.Context, + path: str, + actor: str, + status: str) -> None: """Post folder status transition actions.""" - status = "" if status == "FOLDER" else status status = constants.research_package_state(status) diff --git a/policies_intake.py b/policies_intake.py index 3981bbfeb..aaca62c57 100644 --- a/policies_intake.py +++ b/policies_intake.py @@ -3,12 +3,14 @@ __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict + import genquery from util import * -def object_is_locked(ctx, path, is_collection): +def object_is_locked(ctx: rule.Context, path: str, is_collection: bool) -> Dict: """Returns whether given object in path (collection or dataobject) is locked or frozen :param ctx: Combined type of a callback and rei struct @@ -48,7 +50,7 @@ def object_is_locked(ctx, path, is_collection): return locked_state -def is_data_in_locked_dataset(ctx, actor, path): +def is_data_in_locked_dataset(ctx: rule.Context, actor: str, path: str) -> bool: """ Check whether given data object is within a locked dataset """ dataset_id = '' coll = pathutil.chop(path)[0] @@ -114,7 +116,7 @@ def is_data_in_locked_dataset(ctx, actor, path): return False -def is_coll_in_locked_dataset(ctx, actor, coll): +def is_coll_in_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool: """ Check whether given collection is within a locked dataset """ dataset_id = '' intake_group_prefix = _get_intake_group_prefix(coll) @@ -160,14 +162,14 @@ def is_coll_in_locked_dataset(ctx, actor, coll): return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor) else: # Lock status could not be determined. Assume collection is not locked. - log.debug(ctx, "Could not determine lock state of data object " + path) + log.debug(ctx, "Could not determine lock state of data object " + coll) return False log.debug(ctx, 'After check for datasetid - no dataset found') return False -def coll_in_path_of_locked_dataset(ctx, actor, coll): +def coll_in_path_of_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool: """ If collection is part of a locked dataset, or holds one on a deeper level, then deletion is not allowed """ dataset_id = '' intake_group_prefix = _get_intake_group_prefix(coll) @@ -211,7 +213,7 @@ def coll_in_path_of_locked_dataset(ctx, actor, coll): log.debug(ctx, locked_state) return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor) else: - log.debug(ctx, "Could not determine lock state of data object " + path) + log.debug(ctx, "Could not determine lock state of data object " + coll) # Pretend presence of a lock so no unwanted data gets deleted return True else: @@ -242,7 +244,7 @@ def coll_in_path_of_locked_dataset(ctx, actor, coll): return False -def _get_intake_group_prefix(coll): +def _get_intake_group_prefix(coll: str) -> str: """ Get the group prefix of a intake collection name: 'grp-intake' or 'intake' """ parts = coll.split('/')[3].split('-') del parts[-1] diff --git a/policies_utils.py b/policies_utils.py index c8ebe0eda..a4b4dec19 100644 --- a/policies_utils.py +++ b/policies_utils.py @@ -4,11 +4,12 @@ __license__ = 'GPLv3, see LICENSE' import ast +from typing import Set from util.genquery_col_constants import * -def is_safe_genquery_inp(genquery_inp): +def is_safe_genquery_inp(genquery_inp: object) -> bool: """Checks if a GenQuery input matches Yoda policies :param genquery_inp: GenQueryInp object containing query information @@ -19,7 +20,7 @@ def is_safe_genquery_inp(genquery_inp): return _is_safe_genquery_inp(genquery_inp.selectInp, genquery_inp.sqlCondInp.inx) -def _column_in_select_inp(selectInp, columns): +def _column_in_select_inp(selectInp: Set[int], columns: Set[int]) -> bool: selectedInpHash = ast.literal_eval(str(selectInp)) selected_columns = selectedInpHash.keys() for column in columns: @@ -28,7 +29,7 @@ def _column_in_select_inp(selectInp, columns): return False -def _column_in_cond_inp(sqlCondInp, columns): +def _column_in_cond_inp(sqlCondInp: Set[int], columns: Set[int]) -> bool: condition_columns = ast.literal_eval(str(sqlCondInp)) for column in columns: if column in condition_columns: @@ -36,7 +37,7 @@ def _column_in_cond_inp(sqlCondInp, columns): return False -def _is_safe_genquery_inp(selectInp, sqlCondInp): +def _is_safe_genquery_inp(selectInp: Set[int], sqlCondInp: Set[int]) -> bool: # Defines groups of GenQuery columns dataobject_columns = {COL_D_DATA_ID, COL_D_COLL_ID, COL_DATA_NAME, COL_DATA_REPL_NUM, COL_DATA_VERSION, COL_DATA_TYPE_NAME, COL_DATA_SIZE, diff --git a/provenance.py b/provenance.py index 171cb8493..0ba487cee 100644 --- a/provenance.py +++ b/provenance.py @@ -5,6 +5,7 @@ import json import time +from typing import List import genquery @@ -17,7 +18,7 @@ @rule.make() -def rule_provenance_log_action(ctx, actor, coll, action): +def rule_provenance_log_action(ctx: rule.Context, actor: str, coll: str, action: str) -> None: """Function to add action log record to provenance of specific folder. :param ctx: Combined type of a callback and rei struct @@ -34,7 +35,7 @@ def rule_provenance_log_action(ctx, actor, coll, action): log.write(ctx, "rule_provenance_log_action: failed to log action <{}> to provenance".format(action)) -def log_action(ctx, actor, coll, action, update=True): +def log_action(ctx: rule.Context, actor: str, coll: str, action: str, update: bool = True) -> None: """Function to add action log record to provenance of specific folder. :param ctx: Combined type of a callback and rei struct @@ -54,7 +55,7 @@ def log_action(ctx, actor, coll, action, update=True): @rule.make() -def rule_copy_provenance_log(ctx, source, target): +def rule_copy_provenance_log(ctx: rule.Context, source: str, target: str) -> None: """Copy the provenance log of a collection to another collection. :param ctx: Combined type of a callback and rei struct @@ -64,7 +65,7 @@ def rule_copy_provenance_log(ctx, source, target): provenance_copy_log(ctx, source, target) -def provenance_copy_log(ctx, source, target): +def provenance_copy_log(ctx: rule.Context, source: str, target: str) -> None: """Copy the provenance log of a collection to another collection. :param ctx: Combined type of a callback and rei struct @@ -88,7 +89,7 @@ def provenance_copy_log(ctx, source, target): log.write(ctx, "rule_copy_provenance_log: failed to copy provenance log from <{}> to <{}>".format(source, target)) -def get_provenance_log(ctx, coll): +def get_provenance_log(ctx: rule.Context, coll: str) -> List: """Return provenance log of a collection. :param ctx: Combined type of a callback and rei struct @@ -113,7 +114,7 @@ def get_provenance_log(ctx, coll): @api.make() -def api_provenance_log(ctx, coll): +def api_provenance_log(ctx: rule.Context, coll: str) -> api.Result: """Return formatted provenance log of a collection. :param ctx: Combined type of a callback and rei struct @@ -134,7 +135,7 @@ def api_provenance_log(ctx, coll): return output -def latest_action_actor(ctx, path): +def latest_action_actor(ctx: rule.Context, path: str) -> str: """Return the actor of the latest provenance action. :param ctx: Combined type of a callback and rei struct diff --git a/publication.py b/publication.py index c30aca478..b3b5c9fef 100644 --- a/publication.py +++ b/publication.py @@ -3,8 +3,10 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +import json import re from datetime import datetime +from typing import Dict, List, Tuple import genquery from requests.exceptions import ReadTimeout @@ -25,7 +27,7 @@ 'rule_lift_embargos_on_data_access'] -def get_publication_config(ctx): +def get_publication_config(ctx: rule.Context) -> Dict: """Get all publication config keys and their values and report any missing keys.""" zone = user.zone(ctx) system_coll = "/" + zone + constants.UUSYSTEMCOLLECTION @@ -70,12 +72,14 @@ def get_publication_config(ctx): return config_keys -def generate_combi_json(ctx, publication_config, publication_state): +def generate_combi_json(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Join system metadata with the user metadata in yoda-metadata.json. :param ctx: Combined type of a callback and rei struct :param publication_config: Dict with publication configuration :param publication_state: Dict with state of the publication process + + :raises Exception: When latest metadata is not found """ temp_coll = "/" + user.zone(ctx) + constants.IIPUBLICATIONCOLLECTION davrodsAnonymousVHost = publication_config["davrodsAnonymousVHost"] @@ -100,6 +104,8 @@ def generate_combi_json(ctx, publication_config, publication_state): # metadataJsonPath contains latest json metadataJsonPath = meta.get_latest_vault_metadata_path(ctx, vaultPackage) + if metadataJsonPath is None: + raise Exception # Combine content of current *metadataJsonPath with system info and creates a new file in *combiJsonPath: json_datacite.json_datacite_create_combi_metadata_json(ctx, metadataJsonPath, combiJsonPath, lastModifiedDateTime, versionDOI, publicationDate, openAccessLink, licenseUri) @@ -107,7 +113,7 @@ def generate_combi_json(ctx, publication_config, publication_state): publication_state["combiJsonPath"] = combiJsonPath -def generate_system_json(ctx, publication_state): +def generate_system_json(ctx: rule.Context, publication_state: Dict) -> None: """Overwrite combi metadata json with system-only metadata. :param ctx: Combined type of a callback and rei struct @@ -135,7 +141,7 @@ def generate_system_json(ctx, publication_state): publication_state["combiJsonPath"] = system_json_path -def get_publication_state(ctx, vault_package): +def get_publication_state(ctx: rule.Context, vault_package: str) -> Dict: """The publication state is kept as metadata on the vault package. :param ctx: Combined type of a callback and rei struct @@ -191,7 +197,7 @@ def get_publication_state(ctx, vault_package): return publication_state -def save_publication_state(ctx, vault_package, publication_state): +def save_publication_state(ctx: rule.Context, vault_package: str, publication_state: Dict) -> None: """Save the publication state key-value-pairs to AVU's on the vault package. :param ctx: Combined type of a callback and rei struct @@ -204,7 +210,7 @@ def save_publication_state(ctx, vault_package, publication_state): avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'publication_' + key, publication_state[key]) -def set_update_publication_state(ctx, vault_package): +def set_update_publication_state(ctx: rule.Context, vault_package: str) -> str: """Routine to set publication state of vault package pending to update. :param ctx: Combined type of a callback and rei struct @@ -251,7 +257,7 @@ def set_update_publication_state(ctx, vault_package): return "" -def get_publication_date(ctx, vault_package): +def get_publication_date(ctx: rule.Context, vault_package: str) -> str: """Determine the time of publication as a datetime with UTC offset. First try action_log. Then icat-time. @@ -279,7 +285,7 @@ def get_publication_date(ctx, vault_package): return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') -def get_last_modified_datetime(ctx, vault_package): +def get_last_modified_datetime(ctx: rule.Context, vault_package: str) -> str: """Determine the time of last modification as a datetime with UTC offset. :param ctx: Combined type of a callback and rei struct @@ -294,13 +300,14 @@ def get_last_modified_datetime(ctx, vault_package): ) for row in iter: log_item_list = jsonutil.parse(row[1]) - my_date = datetime.fromtimestamp(int(log_item_list[0])) - return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') + my_date = datetime.now() + return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') + -def generate_preliminary_doi(ctx, publication_config, publication_state): +def generate_preliminary_doi(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a Preliminary DOI. Preliminary, because we check for collision later. :param ctx: Combined type of a callback and rei struct @@ -310,13 +317,13 @@ def generate_preliminary_doi(ctx, publication_config, publication_state): dataCitePrefix = publication_config["dataCitePrefix"] yodaPrefix = publication_config["yodaPrefix"] - randomId = datacite.generate_random_id(ctx, publication_config["randomIdLength"]) + randomId = datacite.generate_random_id(publication_config["randomIdLength"]) publication_state["randomId"] = randomId publication_state["versionDOI"] = dataCitePrefix + "/" + yodaPrefix + "-" + randomId -def generate_base_doi(ctx, publication_config, publication_state): +def generate_base_doi(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a base DOI. :param ctx: Combined type of a callback and rei struct @@ -326,13 +333,13 @@ def generate_base_doi(ctx, publication_config, publication_state): dataCitePrefix = publication_config["dataCitePrefix"] yodaPrefix = publication_config["yodaPrefix"] - randomId = datacite.generate_random_id(ctx, publication_config["randomIdLength"]) + randomId = datacite.generate_random_id(publication_config["randomIdLength"]) publication_state["baseRandomId"] = randomId publication_state["baseDOI"] = dataCitePrefix + "/" + yodaPrefix + "-" + randomId -def generate_datacite_json(ctx, publication_state): +def generate_datacite_json(ctx: rule.Context, publication_state: Dict) -> None: """Generate a DataCite compliant JSON based on yoda-metadata.json. :param ctx: Combined type of a callback and rei struct @@ -353,7 +360,7 @@ def generate_datacite_json(ctx, publication_state): publication_state["dataCiteJsonPath"] = datacite_json_path -def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi=False): +def post_metadata_to_datacite(ctx: rule.Context, publication_state: Dict, doi: str, send_method: str, base_doi: bool = False) -> None: """Upload DataCite JSON to DataCite. This will register the DOI, without minting it. :param ctx: Combined type of a callback and rei struct @@ -370,9 +377,9 @@ def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi try: if send_method == 'post': - httpCode = datacite.metadata_post(ctx, datacite_json) + httpCode = datacite.metadata_post(datacite_json) else: - httpCode = datacite.metadata_put(ctx, doi, datacite_json) + httpCode = datacite.metadata_put(doi, datacite_json) if (send_method == 'post' and httpCode == 201) or (send_method == 'put' and httpCode == 200): publication_state["dataCiteMetadataPosted"] = "yes" @@ -389,7 +396,7 @@ def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi publication_state["status"] = "Retry" -def post_draft_doi_to_datacite(ctx, publication_state): +def post_draft_doi_to_datacite(ctx: rule.Context, publication_state: Dict) -> None: """Upload DOI to DataCite. This will register the DOI as a draft. This function is also a draft, and will have to be reworked! @@ -401,7 +408,7 @@ def post_draft_doi_to_datacite(ctx, publication_state): try: # post the DOI only - httpCode = datacite.metadata_post(ctx, { + httpCode = datacite.metadata_post({ 'data': { 'type': 'dois', 'attributes': { @@ -425,18 +432,17 @@ def post_draft_doi_to_datacite(ctx, publication_state): publication_state["status"] = "Retry" -def remove_metadata_from_datacite(ctx, publication_state, type_flag): +def remove_metadata_from_datacite(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Remove metadata XML from DataCite. :param ctx: Combined type of a callback and rei struct :param publication_state: Dict with state of the publication process :param type_flag: Determine whether it is base DOI or version DOI """ - import json payload = json.dumps({"data": {"attributes": {"event": "hide"}}}) try: - httpCode = datacite.metadata_put(ctx, publication_state[type_flag + "DOI"], payload) + httpCode = datacite.metadata_put(publication_state[type_flag + "DOI"], payload) if httpCode == 200: publication_state["dataCiteMetadataPosted"] = "yes" @@ -457,18 +463,17 @@ def remove_metadata_from_datacite(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def mint_doi(ctx, publication_state, type_flag): +def mint_doi(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Announce the landing page URL for a DOI to dataCite. This will mint the DOI. :param ctx: Combined type of a callback and rei struct :param publication_state: Dict with state of the publication process :param type_flag: Flag indicating DOI type ('version' or 'base') """ - import json payload = json.dumps({"data": {"attributes": {"url": publication_state["landingPageUrl"]}}}) try: - httpCode = datacite.metadata_put(ctx, publication_state[type_flag + "DOI"], payload) + httpCode = datacite.metadata_put(publication_state[type_flag + "DOI"], payload) if httpCode == 200: # 201: publication_state[type_flag + "DOIMinted"] = "yes" @@ -488,7 +493,7 @@ def mint_doi(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def generate_landing_page_url(ctx, publication_config, publication_state): +def generate_landing_page_url(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a URL for the landing page. :param ctx: Combined type of a callback and rei struct @@ -505,7 +510,7 @@ def generate_landing_page_url(ctx, publication_config, publication_state): publication_state["landingPageUrl"] = landingPageUrl -def generate_landing_page(ctx, publication_state, publish): +def generate_landing_page(ctx: rule.Context, publication_state: Dict, publish: str) -> None: """Generate a dataCite compliant XML based up yoda-metadata.json. :param ctx: Combined type of a callback and rei struct @@ -540,7 +545,7 @@ def generate_landing_page(ctx, publication_state, publish): publication_state["landingPagePath"] = landing_page_path -def copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state): +def copy_landingpage_to_public_host(ctx: rule.Context, random_id: str, publication_config: Dict, publication_state: Dict) -> None: """Copy the resulting landing page to configured public host. :param ctx: Combined type of a callback and rei struct @@ -565,7 +570,7 @@ def copy_landingpage_to_public_host(ctx, random_id, publication_config, publicat log.write(ctx, "copy_landingpage_to_public: " + error) -def copy_metadata_to_moai(ctx, random_id, publication_config, publication_state): +def copy_metadata_to_moai(ctx: rule.Context, random_id: str, publication_config: Dict, publication_state: Dict) -> None: """Copy the metadata json file to configured MOAI. :param ctx: Combined type of a callback and rei struct @@ -589,7 +594,7 @@ def copy_metadata_to_moai(ctx, random_id, publication_config, publication_state) log.write(ctx, "copy_metadata_to_public: " + error) -def set_access_restrictions(ctx, vault_package, publication_state): +def set_access_restrictions(ctx: rule.Context, vault_package: str, publication_state: Dict) -> None: """Set access restriction for vault package. This function is called when (re)publishing a vault package. @@ -600,8 +605,6 @@ def set_access_restrictions(ctx, vault_package, publication_state): :param ctx: Combined type of a callback and rei struct :param vault_package: Path to the package in the vault :param publication_state: Dict with state of the publication process - - :returns: None """ # Embargo handling combiJsonPath = publication_state["combiJsonPath"] @@ -658,7 +661,7 @@ def set_access_restrictions(ctx, vault_package, publication_state): publication_state["anonymousAccess"] = "yes" -def check_doi_availability(ctx, publication_state, type_flag): +def check_doi_availability(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Request DOI to check on availability. We want a 404 as return code. :param ctx: Combined type of a callback and rei struct @@ -668,7 +671,7 @@ def check_doi_availability(ctx, publication_state, type_flag): doi = publication_state[type_flag + "DOI"] try: - http_code = datacite.metadata_get(ctx, doi) + http_code = datacite.metadata_get(doi) if http_code == 404: publication_state[type_flag + "DOIAvailable"] = "yes" @@ -685,7 +688,7 @@ def check_doi_availability(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def process_publication(ctx, vault_package): +def process_publication(ctx: rule.Context, vault_package: str) -> str: """Handling of publication of vault_package. :param ctx: Combined type of a callback and rei struct @@ -999,7 +1002,7 @@ def process_publication(ctx, vault_package): return publication_state["status"] -def process_depublication(ctx, vault_package): +def process_depublication(ctx: rule.Context, vault_package: str) -> str: status = "Unknown" log.write(ctx, "Process depublication of vault package <{}>".format(vault_package)) @@ -1145,7 +1148,7 @@ def process_depublication(ctx, vault_package): return publication_state["status"] -def process_republication(ctx, vault_package): +def process_republication(ctx: rule.Context, vault_package: str) -> str: """Routine to process a republication with sanity checks at every step.""" publication_state = {} @@ -1315,7 +1318,11 @@ def process_republication(ctx, vault_package): @rule.make(inputs=[0, 1, 2, 3]) -def rule_update_publication(ctx, vault_package, update_datacite, update_landingpage, update_moai): +def rule_update_publication(ctx: rule.Context, + vault_package: str, + update_datacite: str, + update_landingpage: str, + update_moai: str) -> None: """Rule interface for updating the publication of a vault package. :param ctx: Combined type of a callback and rei struct @@ -1352,7 +1359,11 @@ def rule_update_publication(ctx, vault_package, update_datacite, update_landingp log.write(ctx, "[UPDATE PUBLICATIONS] Finished for {}".format(vault_package), True) -def update_publication(ctx, vault_package, update_datacite=False, update_landingpage=False, update_moai=False): +def update_publication(ctx: rule.Context, + vault_package: str, + update_datacite: bool = False, + update_landingpage: bool = False, + update_moai: bool = False) -> str: """Routine to update a publication with sanity checks at every step. :param ctx: Combined type of a callback and rei struct @@ -1365,7 +1376,7 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing """ publication_state = {} - def _check_return_if_publication_status(return_statuses, location): + def _check_return_if_publication_status(return_statuses: List[str], location: str) -> bool: # Used to check whether we need to return early because of an # unexpected publication status, and log a message for troubleshooting # purposes. @@ -1509,7 +1520,7 @@ def _check_return_if_publication_status(return_statuses, location): return publication_state["status"] -def get_collection_metadata(ctx, coll, prefix): +def get_collection_metadata(ctx: rule.Context, coll: str, prefix: str) -> Dict: """Retrieve all collection metadata. :param ctx: Combined type of a callback and rei struct @@ -1531,14 +1542,14 @@ def get_collection_metadata(ctx, coll, prefix): return coll_metadata -def get_all_versions(ctx, path, doi): +def get_all_versions(ctx: rule.Context, path: str, doi: str) -> Tuple[List, List]: """Get all the version DOI of published data package in a vault. :param ctx: Combined type of a callback and rei struct :param path: Path of the published data package :param doi: Base DOI of the selected publication - :return: Dict of related version DOIs + :return: Tuple with version DOIS and previous version DOIs """ coll_parent_name = path.rsplit('/', 1)[0] @@ -1577,7 +1588,7 @@ def get_all_versions(ctx, path, doi): @rule.make() -def rule_lift_embargos_on_data_access(ctx): +def rule_lift_embargos_on_data_access(ctx: rule.Context) -> str: """Find vault packages that have a data access embargo that can be lifted as the embargo expires. If lift_embargo_date <= now, update publication. diff --git a/publication_troubleshoot.py b/publication_troubleshoot.py index 8d2c7e027..04e6bb69d 100644 --- a/publication_troubleshoot.py +++ b/publication_troubleshoot.py @@ -108,7 +108,7 @@ def check_one_datacite_doi_reg(ctx, data_package, doi_name, write_stdout): log.write(ctx, "check_datacite_doi_registration: Error while trying to get {} - {}".format(doi_name, e), write_stdout) return False - status_code = datacite.metadata_get(ctx, doi) + status_code = datacite.metadata_get(doi) return status_code == 200 diff --git a/replication.py b/replication.py index 0dc7e07ac..062870770 100644 --- a/replication.py +++ b/replication.py @@ -16,7 +16,7 @@ __all__ = ['rule_replicate_batch'] -def replicate_asynchronously(ctx, path, source_resource, target_resource): +def replicate_asynchronously(ctx: rule.Context, path: str, source_resource: str, target_resource: str) -> None: """Schedule replication of a data object. :param ctx: Combined type of a callback and rei struct @@ -71,7 +71,7 @@ def replicate_asynchronously(ctx, path, source_resource, target_resource): @rule.make() -def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size_limit, dry_run): +def rule_replicate_batch(ctx: rule.Context, verbose: str, balance_id_min: int, balance_id_max: int, batch_size_limit: int, dry_run: str) -> None: """Scheduled replication batch job. Performs replication for all data objects marked with 'org_replication_scheduled' metadata. @@ -87,7 +87,6 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz :param balance_id_max: Maximum balance id for batch jobs (value 1-64) :param batch_size_limit: Maximum number of items to be processed within one batch :param dry_run: When '1' do not actually replicate, only log what would have replicated - """ count = 0 count_ok = 0 @@ -191,7 +190,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz # Mark as correctly replicated count_ok += 1 except msi.Error as e: - log.write(ctx, 'ERROR - The file {} could not be replicated from {} to {}: {}'.format(file, from_path, to_path, str(e))) + log.write(ctx, 'ERROR - The file {} could not be replicated from {} to {}: {}'.format(path, from_path, to_path, str(e))) if print_verbose: log.write(ctx, "Batch replication retry: copying {} from {} to {}".format(path, data_resc_name, to_path)) @@ -250,7 +249,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz log.write(ctx, "Batch replication job finished. {}/{} objects replicated successfully.".format(count_ok, count)) -def is_replication_blocked_by_admin(ctx): +def is_replication_blocked_by_admin(ctx: rule.Context) -> bool: """Admin can put the replication process on hold by adding a file called 'stop_replication' in collection /yoda/flags. :param ctx: Combined type of a callback and rei struct @@ -262,24 +261,19 @@ def is_replication_blocked_by_admin(ctx): return collection.exists(ctx, path) -def memory_rss_usage(): - """ - The RSS (resident) memory size in bytes for the current process. - """ +def memory_rss_usage() -> int: + """The RSS (resident) memory size in bytes for the current process.""" p = psutil.Process() return p.memory_info().rss -def show_memory_usage(ctx): - """ - For debug purposes show the current RSS usage. - """ +def show_memory_usage(ctx: rule.Context) -> None: + """For debug purposes show the current RSS usage.""" log.write(ctx, "current RSS usage: {} bytes".format(memory_rss_usage())) -def memory_limit_exceeded(rss_limit): - """ - True when a limit other than 0 was specified and memory usage is currently +def memory_limit_exceeded(rss_limit: int) -> bool: + """True when a limit other than 0 was specified and memory usage is currently above this limit. Otherwise False. :param rss_limit: Max memory usage in bytes @@ -287,4 +281,4 @@ def memory_limit_exceeded(rss_limit): :returns: Boolean indicating if memory limited exceeded """ rss_limit = int(rss_limit) - return rss_limit and memory_rss_usage() > rss_limit + return rss_limit > 0 and memory_rss_usage() > rss_limit diff --git a/research.py b/research.py index a906abba3..0f35c426f 100644 --- a/research.py +++ b/research.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Tuple + import genquery from pathvalidate import validate_filename, validate_filepath, ValidationError @@ -26,7 +28,7 @@ 'api_research_manifest'] -def folder_new_name_check(folder_name): +def folder_new_name_check(folder_name: str) -> Tuple[bool, str]: if len(folder_name) == 0: return False, api.Error('missing_foldername', 'Missing folder name. Please add a folder name') @@ -47,14 +49,14 @@ def folder_new_name_check(folder_name): @api.make() -def api_research_folder_add(ctx, coll, new_folder_name): +def api_research_folder_add(ctx: rule.Context, coll: str, new_folder_name: str) -> api.Result: """Add a new folder to a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Collection to create new folder in :param new_folder_name: Name of the new folder - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + new_folder_name @@ -102,7 +104,7 @@ def api_research_folder_add(ctx, coll, new_folder_name): return api.Result.ok() -def folder_copy_check(ctx, folder_path, new_folder_path, overwrite, copy=True): +def folder_copy_check(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool, copy: bool = True) -> Tuple[bool, str]: """Check whether can copy (or move) folder to new folder location. :param ctx: Combined type of a callback and rei struct @@ -166,7 +168,7 @@ def folder_copy_check(ctx, folder_path, new_folder_path, overwrite, copy=True): @api.make() -def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False): +def api_research_folder_copy(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool = False) -> api.Result: """Copy a folder in a research folder. :param ctx: Combined type of a callback and rei struct @@ -174,7 +176,7 @@ def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False) :param new_folder_path: Path to the new copy of the folder :param overwrite: Overwrite folder if it already exists - :returns: Dict with API status result + :returns: API status result """ valid, errorResponse = folder_copy_check(ctx, folder_path, new_folder_path, overwrite, True) if not valid: @@ -190,7 +192,7 @@ def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False) @api.make() -def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False): +def api_research_folder_move(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool = False) -> api.Result: """Move a folder in a research folder. :param ctx: Combined type of a callback and rei struct @@ -198,7 +200,7 @@ def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False) :param new_folder_path: Path to the new folder :param overwrite: Overwrite folder if it already exists - :returns: Dict with API status result + :returns: API status result """ valid, errorResponse = folder_copy_check(ctx, folder_path, new_folder_path, overwrite, False) if not valid: @@ -214,7 +216,7 @@ def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False) @api.make() -def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): +def api_research_folder_rename(ctx: rule.Context, new_folder_name: str, coll: str, org_folder_name: str) -> api.Result: """Rename an existing research folder. :param ctx: Combined type of a callback and rei struct @@ -222,7 +224,7 @@ def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): :param coll: Parent collection of folder :param org_folder_name: Current name of the folder - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + new_folder_name @@ -275,14 +277,14 @@ def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): @api.make() -def api_research_folder_delete(ctx, coll, folder_name): +def api_research_folder_delete(ctx: rule.Context, coll: str, folder_name: str) -> api.Result: """Delete a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Parent collection of folder to delete :param folder_name: Name of folder to delete - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + folder_name @@ -322,7 +324,7 @@ def api_research_folder_delete(ctx, coll, folder_name): @api.make() -def api_research_list_temporary_files(ctx, coll): +def api_research_list_temporary_files(ctx: rule.Context, coll: str) -> api.Result: """Get list of temporary files to be cleaned up. :param ctx: Combined type of a callback and rei struct @@ -354,7 +356,7 @@ def api_research_list_temporary_files(ctx, coll): @api.make() -def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): +def api_research_file_copy(ctx: rule.Context, filepath: str, new_filepath: str, overwrite: bool = False) -> api.Result: """Copy a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -362,7 +364,7 @@ def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): :param new_filepath: Path to the new copy of the file :param overwrite: Overwrite file if it already exists - :returns: Dict with API status result + :returns: API status result """ if len(new_filepath) == 0: return api.Error('missing_filepath', 'Missing file path. Please add a file path') @@ -424,7 +426,7 @@ def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): @api.make() -def api_research_file_rename(ctx, new_file_name, coll, org_file_name): +def api_research_file_rename(ctx: rule.Context, new_file_name: str, coll: str, org_file_name: str) -> api.Result: """Rename a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -432,7 +434,7 @@ def api_research_file_rename(ctx, new_file_name, coll, org_file_name): :param coll: Parent collection of file :param org_file_name: Current name of the file - :returns: Dict with API status result + :returns: API status result """ if len(new_file_name) == 0: return api.Error('missing_filename', 'Missing filename. Please add a file name') @@ -494,7 +496,7 @@ def api_research_file_rename(ctx, new_file_name, coll, org_file_name): @api.make() -def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): +def api_research_file_move(ctx: rule.Context, filepath: str, new_filepath: str, overwrite: bool = False) -> api.Result: """Move a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -502,7 +504,7 @@ def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): :param new_filepath: Path to the new location of the file :param overwrite: Overwrite file if it already exists - :returns: Dict with API status result + :returns: API status result """ if len(new_filepath) == 0: return api.Error('missing_filepath', 'Missing file path. Please add a file path') @@ -567,14 +569,14 @@ def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): @api.make() -def api_research_file_delete(ctx, coll, file_name): +def api_research_file_delete(ctx: rule.Context, coll: str, file_name: str) -> api.Result: """Delete a file in a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Parent collection of file to delete :param file_name: Name of file to delete - :returns: Dict with API status result + :returns: API status result """ path_target = coll + '/' + file_name @@ -610,13 +612,13 @@ def api_research_file_delete(ctx, coll, file_name): @api.make() -def api_research_system_metadata(ctx, coll): +def api_research_system_metadata(ctx: rule.Context, coll: str) -> api.Result: """Return collection statistics as JSON. :param ctx: Combined type of a callback and rei struct :param coll: Research collection - :returns: Dict with research system metadata + :returns: API status result """ data_count = collection.data_count(ctx, coll) collection_count = collection.collection_count(ctx, coll) @@ -629,8 +631,14 @@ def api_research_system_metadata(ctx, coll): @api.make() -def api_research_collection_details(ctx, path): - """Return details of a research collection.""" +def api_research_collection_details(ctx: rule.Context, path: str) -> api.Result: + """Return details of a research collection. + + :param ctx: Combined type of a callback and rei struct + :param path: Path to research collection + + :returns: API status result + """ if not collection.exists(ctx, path): return api.Error('nonexistent', 'The given path does not exist') @@ -664,7 +672,7 @@ def api_research_collection_details(ctx, path): @api.make() -def api_research_manifest(ctx, coll): +def api_research_manifest(ctx: rule.Context, coll: str) -> api.Result: """Produce a manifest of data objects in a collection :param ctx: Combined type of a callback and rei struct diff --git a/resources.py b/resources.py index f93ea0e07..c51b3e348 100644 --- a/resources.py +++ b/resources.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' from datetime import datetime +from typing import Dict, List, Optional import genquery @@ -22,12 +23,12 @@ @api.make() -def api_resource_browse_group_data(ctx, - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - search_groups=""): +def api_resource_browse_group_data(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + search_groups: str = "") -> api.Result: """Get paginated group data groupname / size :param ctx: Combined type of a callback and rei struct @@ -88,10 +89,8 @@ def api_resource_browse_group_data(ctx, @api.make() -def api_resource_full_year_differentiated_group_storage(ctx, group_name): - # def api_resource_full_range ... - - """Return the full range of registered storage data differentiated into vault/research/revision/total +def api_resource_full_year_differentiated_group_storage(ctx: rule.Context, group_name: str) -> api.Result: + """Return the full range of registered storage data differentiated into vault/research/revision/total. :param ctx: Combined type of a callback and rei struct :param group_name: Group that is searched for storage data @@ -135,7 +134,7 @@ def api_resource_full_year_differentiated_group_storage(ctx, group_name): @api.make() -def api_resource_category_stats(ctx): +def api_resource_category_stats(ctx: rule.Context) -> api.Result: """Collect storage stats of last month for categories. Storage is summed up for each category. @@ -235,7 +234,7 @@ def api_resource_category_stats(ctx): @api.make() -def api_resource_monthly_category_stats(ctx): +def api_resource_monthly_category_stats(ctx: rule.Context) -> api.Result: """Collect storage stats for all twelve months based upon categories a user is datamanager of. Statistics gathered: @@ -329,7 +328,7 @@ def api_resource_monthly_category_stats(ctx): return {'storage': all_storage, 'dates': storage_dates} -def get_group_category_info(ctx, groupName): +def get_group_category_info(ctx: rule.Context, groupName: str) -> Dict: """Get category and subcategory for a group. :param ctx: Combined type of a callback and rei struct @@ -358,7 +357,7 @@ def get_group_category_info(ctx, groupName): return {'category': category, 'subcategory': subcategory} -def get_groups_on_categories(ctx, categories, search_groups=""): +def get_groups_on_categories(ctx: rule.Context, categories: List, search_groups: str = "") -> List: """Get all groups belonging to all given categories. :param ctx: Combined type of a callback and rei struct @@ -414,7 +413,7 @@ def get_groups_on_categories(ctx, categories, search_groups=""): @rule.make() -def rule_resource_store_storage_statistics(ctx): +def rule_resource_store_storage_statistics(ctx: rule.Context) -> str: """ For all categories present, store all found storage data for each group belonging to these categories. @@ -558,13 +557,13 @@ def rule_resource_store_storage_statistics(ctx): @rule.make(inputs=[0, 1, 2], outputs=[]) -def rule_resource_update_resc_arb_data(ctx, resc_name, bytes_free, bytes_total): +def rule_resource_update_resc_arb_data(ctx: rule.Context, resc_name: str, bytes_free: int, bytes_total: int) -> None: """ Update ARB data for a specific resource - :param ctx: Combined type of a callback and rei struct - :param resc_name: Name of a particular unixfilesystem resource - :param bytes_free: Free size on this resource, in bytes + :param ctx: Combined type of a callback and rei struct + :param resc_name: Name of a particular unixfilesystem resource + :param bytes_free: Free size on this resource, in bytes :param bytes_total: Total size of this resource, in bytes """ if user.user_type(ctx) != 'rodsadmin': @@ -595,7 +594,7 @@ def rule_resource_update_resc_arb_data(ctx, resc_name, bytes_free, bytes_total): @rule.make() -def rule_resource_update_misc_arb_data(ctx): +def rule_resource_update_misc_arb_data(ctx: rule.Context) -> None: """Update ARB data for resources that are not covered by the regular process. That is, all resources that are neither unixfilesystem nor passthrough resources, as well as passthrough resources that do not have a unixfilesystem child resource. @@ -629,7 +628,7 @@ def rule_resource_update_misc_arb_data(ctx): manager.put(ctx, resc, constants.arb_status.IGNORE) -def get_categories(ctx): +def get_categories(ctx: rule.Context) -> List: """Get all categories for current user. :param ctx: Combined type of a callback and rei struct @@ -665,7 +664,7 @@ def get_categories(ctx): return categories -def get_groups_on_category(ctx, category): +def get_groups_on_category(ctx: rule.Context, category: str) -> List: """Get all groups for category.""" groups = [] iter = genquery.row_iterator( @@ -681,7 +680,7 @@ def get_groups_on_category(ctx, category): return groups -def get_group_data_sizes(ctx, group_name, ref_period=None): +def get_group_data_sizes(ctx: rule.Context, group_name: str, ref_period: Optional[str] = None) -> List: """Get group data sizes and return as a list of values. If no reference period is specified return closest to today. diff --git a/revision_strategies.py b/revision_strategies.py index 567cc16b0..3102829b3 100644 --- a/revision_strategies.py +++ b/revision_strategies.py @@ -4,8 +4,32 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import List -def get_revision_strategy(strategy_name): + +class RevisionStrategy: + HOURS = 3600 + DAYS = 86400 + WEEKS = 604800 + + def __init__(self, strategy_name: str, buckets_configuration: List) -> None: + self._name = strategy_name + self._buckets = buckets_configuration + + def get_name(self) -> str: + return self._name + + def get_buckets(self) -> List: + return self._buckets + + def get_minimum_bucket_size(self) -> int: + return min(map(lambda bucket_timespan_bucket_size_offset: bucket_timespan_bucket_size_offset[1], self.get_buckets())) + + def get_total_bucket_timespan(self) -> int: + return sum(map(lambda bucket_timespan_bucket_size_offset1: bucket_timespan_bucket_size_offset1[0], self.get_buckets())) + + +def get_revision_strategy(strategy_name: str) -> RevisionStrategy: """Returns a revision strategy object for a particular revision strategy name. This object can be used to obtain information about the revision strategy. @@ -61,25 +85,3 @@ def get_revision_strategy(strategy_name): return RevisionStrategy(strategy_name, buckets_configuration[strategy_name]) else: raise ValueError('Strategy "{}" is not supported'.format(strategy_name)) - - -class RevisionStrategy: - HOURS = 3600 - DAYS = 86400 - WEEKS = 604800 - - def __init__(self, strategy_name, buckets_configuration): - self._name = strategy_name - self._buckets = buckets_configuration - - def get_name(self): - return self._name - - def get_buckets(self): - return self._buckets - - def get_minimum_bucket_size(self): - return min(map(lambda bucket_timespan_bucket_size_offset: bucket_timespan_bucket_size_offset[1], self.get_buckets())) - - def get_total_bucket_timespan(self): - return sum(map(lambda bucket_timespan_bucket_size_offset1: bucket_timespan_bucket_size_offset1[0], self.get_buckets())) diff --git a/revision_utils.py b/revision_utils.py index 3aee91890..0f879e27c 100644 --- a/revision_utils.py +++ b/revision_utils.py @@ -7,12 +7,13 @@ import datetime import hashlib import os +from typing import List, Tuple -from revision_strategies import get_revision_strategy -from util import constants, log, pathutil +from revision_strategies import get_revision_strategy, RevisionStrategy +from util import constants, log, pathutil, rule -def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_store_exists): +def revision_eligible(max_size: int, data_obj_exists: bool, size: int, path: str, groups: List, revision_store_exists: bool) -> Tuple[bool, str]: """Determine whether can create a revision of given data object. :param max_size: Max size that file can be to create a revision (in bytes) @@ -55,7 +56,7 @@ def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_st return True, "" -def calculate_end_of_calendar_day(): +def calculate_end_of_calendar_day() -> int: """Calculate the unix timestamp for the end of the current day (Same as start of next day). :returns: End of calendar day - Timestamp of the end of the current day @@ -67,7 +68,7 @@ def calculate_end_of_calendar_day(): return int(tomorrow.strftime("%s")) -def get_revision_store_path(zone, trailing_slash=False): +def get_revision_store_path(zone: str, trailing_slash: bool = False) -> str: """Produces the logical path of the revision store :param zone: zone name @@ -81,7 +82,12 @@ def get_revision_store_path(zone, trailing_slash=False): return os.path.join("/" + zone, constants.UUREVISIONCOLLECTION.lstrip(os.path.sep)) -def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_time_bound, original_exists, verbose): +def get_deletion_candidates(ctx: 'rule.Context', + revision_strategy: RevisionStrategy, + revisions: List, + initial_upper_time_bound: bool, + original_exists: bool, + verbose: bool) -> List: """Get revision data objects for a particular versioned data object that should be deleted, as per a given revision strategy. @@ -181,7 +187,11 @@ def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_tim return deletion_candidates -def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, original_exists_dict, verbose): +def revision_cleanup_prefilter(ctx: 'rule.Context', + revisions_list: List, + revision_strategy_name: str, + original_exists_dict: bool, + verbose: bool) -> List: """Filters out revisioned data objects from a list if we can easily determine that they don't meet criteria for being removed, for example if the number of revisions of an existing versioned data object is at most one. @@ -231,7 +241,7 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, orig return results -def get_resc(row): +def get_resc(row: List) -> str: """Get the resc id for a data object given the metadata provided (for revision job). :param row: metadata for the data object @@ -246,7 +256,7 @@ def get_resc(row): return row[3] -def get_balance_id(row, path): +def get_balance_id(row: List, path: str) -> int: """Get the balance id for a data object given the metadata provided (for revision job). :param row: metadata for the data object diff --git a/revisions.py b/revisions.py index 0a874da58..db05a6b9b 100644 --- a/revisions.py +++ b/revisions.py @@ -8,6 +8,7 @@ import random import re import time +from typing import Dict, Iterator, List, Optional, Tuple import genquery import irods_types @@ -30,7 +31,7 @@ @api.make() -def api_revisions_search_on_filename(ctx, searchString, offset=0, limit=10): +def api_revisions_search_on_filename(ctx: rule.Context, searchString: str, offset: int = 0, limit: int = 10) -> api.Result: """Search revisions of a file in a research folder and return list of corresponding revisions. :param ctx: Combined type of a callback and rei struct @@ -114,7 +115,7 @@ def api_revisions_search_on_filename(ctx, searchString, offset=0, limit=10): @api.make() -def api_revisions_list(ctx, path): +def api_revisions_list(ctx: rule.Context, path: str) -> api.Result: """Get list revisions of a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -161,7 +162,7 @@ def api_revisions_list(ctx, path): @api.make() -def api_revisions_restore(ctx, revision_id, overwrite, coll_target, new_filename): +def api_revisions_restore(ctx: rule.Context, revision_id: str, overwrite: str, coll_target: str, new_filename: str) -> api.Result: """Copy selected revision to target collection with given name. :param ctx: Combined type of a callback and rei struct @@ -244,7 +245,7 @@ def api_revisions_restore(ctx, revision_id, overwrite, coll_target, new_filename return api.Result.ok() -def resource_modified_post_revision(ctx, resource, zone, path): +def resource_modified_post_revision(ctx: rule.Context, resource: str, zone: str, path: str) -> None: """Create revisions on file modifications. This policy should trigger whenever a new file is added or modified @@ -309,12 +310,20 @@ def resource_modified_post_revision(ctx, resource, zone, path): # CAT_SQL_ERROR: this AVU is already present. No need to set it anymore. pass else: + error_msg = "" error_status = re.search(r"status \[(.*?)\]", str(e)) - log.write(ctx, "Schedule revision of data object {} failed with error {}".format(path, error_status.group(1))) + if error_status is not None: + error_msg = error_status.group(1) + log.write(ctx, "Schedule revision of data object {} failed with error {}".format(path, error_msg)) @rule.make() -def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size_limit, dry_run='0'): +def rule_revision_batch(ctx: rule.Context, + verbose: str, + balance_id_min: str, + balance_id_max: str, + batch_size_limit: str, + dry_run: str = '0') -> None: """Scheduled revision creation batch job. Creates revisions for all data objects (in research space) marked with 'org_revision_scheduled' metadata. @@ -432,7 +441,7 @@ def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size log.write(ctx, "Batch revision job ignored {} data objects in research area, excluding data objects postponed because of delay time.".format(count_ignored)) -def check_eligible_and_create_revision(ctx, print_verbose, attr, errorattr, data_id, resc, path): +def check_eligible_and_create_revision(ctx: rule.Context, print_verbose: bool, attr: str, errorattr: str, data_id: str, resc: str, path: str) -> bool: """ Check that a data object is eligible for a revision, and if so, create a revision. Then remove or add revision flags as appropriate. @@ -475,15 +484,15 @@ def check_eligible_and_create_revision(ctx, print_verbose, attr, errorattr, data return revision_created -def remove_revision_error_flag(ctx, data_id, path, errorattr): +def remove_revision_error_flag(ctx: rule.Context, data_id: str, path: str, errorattr: str) -> None: """Remove revision_error flag""" # Revision creation OK. Remove any existing error indication attribute. - iter2 = genquery.row_iterator( + iter = genquery.row_iterator( "DATA_NAME", "DATA_ID = '{}' AND META_DATA_ATTR_NAME = '{}' AND META_DATA_ATTR_VALUE = 'true'".format(data_id, errorattr), genquery.AS_LIST, ctx ) - for _row in iter2: + for _row in iter: # Only try to remove it if we know for sure it exists, # otherwise we get useless errors in the log. avu.rmw_from_data(ctx, path, errorattr, "%") @@ -491,7 +500,7 @@ def remove_revision_error_flag(ctx, data_id, path, errorattr): break -def remove_revision_scheduled_flag(ctx, print_verbose, path, attr): +def remove_revision_scheduled_flag(ctx: rule.Context, print_verbose: bool, path: str, attr: str) -> None: """Remove revision_scheduled flag (no matter if it succeeded or not).""" # rods should have been given own access via policy to allow AVU # changes. @@ -517,7 +526,7 @@ def remove_revision_scheduled_flag(ctx, print_verbose, path, attr): log.write(ctx, "ERROR - Scheduled revision creation of <{}>: could not remove schedule flag".format(path)) -def is_revision_blocked_by_admin(ctx): +def is_revision_blocked_by_admin(ctx: rule.Context) -> bool: """Admin can put the revision process on a hold by adding a file called 'stop_revisions' in collection /yoda/flags. :param ctx: Combined type of a callback and rei struct @@ -529,7 +538,7 @@ def is_revision_blocked_by_admin(ctx): return collection.exists(ctx, path) -def get_revision_store(ctx, group_name): +def get_revision_store(ctx: rule.Context, group_name: str) -> Optional[str]: """Get path to revision store for group if the path exists. :param ctx: Combined type of a callback and rei struct @@ -545,7 +554,7 @@ def get_revision_store(ctx, group_name): return revision_store if revision_store_exists else None -def revision_create(ctx, print_verbose, data_id, resource, group_name, revision_store): +def revision_create(ctx: rule.Context, print_verbose: bool, data_id: str, resource: str, group_name: str, revision_store: str) -> bool: """Create a revision of a data object in a revision folder. :param ctx: Combined type of a callback and rei struct @@ -633,10 +642,10 @@ def revision_create(ctx, print_verbose, data_id, resource, group_name, revision_ return revision_created -def revision_cleanup_scan_revision_objects(ctx, revision_list): +def revision_cleanup_scan_revision_objects(ctx: rule.Context, revision_list: List) -> List: """Obtain information about all revisions. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param revision_list: List of revision data object IDs :returns: Nested list, where the outer list represents revisioned data objects, @@ -699,7 +708,7 @@ def revision_cleanup_scan_revision_objects(ctx, revision_list): return revisions_info -def get_all_revision_data_ids(ctx): +def get_all_revision_data_ids(ctx: rule.Context) -> Iterator[Tuple[str, str]]: """"Returns all data IDs of revision data objects :param ctx: Combined type of a callback and rei struct @@ -718,7 +727,7 @@ def get_all_revision_data_ids(ctx): yield (row[0], row[1]) -def _update_revision_store_acls(ctx): +def _update_revision_store_acls(ctx: rule.Context) -> None: """Sets the revision store ACL to grant present rodsadmin user access :param ctx: Combined type of a callback and rei struct @@ -741,7 +750,7 @@ def _update_revision_store_acls(ctx): @rule.make(inputs=[0], outputs=[1]) -def rule_revisions_cleanup_collect(ctx, target_batch_size): +def rule_revisions_cleanup_collect(ctx: rule.Context, target_batch_size: str) -> str: """Collect a list of revision data object IDs and puts them in the spool system for processing by the revision cleanup scan job. @@ -762,7 +771,6 @@ def rule_revisions_cleanup_collect(ctx, target_batch_size): log.write(ctx, "Starting revision cleanup collect process.") - target_batch_size = int(target_batch_size) ingest_state = { "batch": [], "current_coll": None, @@ -770,7 +778,7 @@ def rule_revisions_cleanup_collect(ctx, target_batch_size): } number_revisions = 0 - def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): + def ingest_new_data_id(ctx: rule.Context, coll_id: str, data_id: str, ingest_state: Dict, target_batch_size: int) -> None: """Read data object. Store it in ingest state as long as its collection ID is the same as the previous one, so that all data objects in the same collection are part of the same batch. @@ -804,10 +812,10 @@ def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): for (coll_id, data_id) in get_all_revision_data_ids(ctx): number_revisions += 1 - ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size) + ingest_new_data_id(ctx, coll_id, data_id, ingest_state, int(target_batch_size)) if (len(ingest_state["batch"]) > 0 - and len(ingest_state["batch"]) + len(ingest_state["objects_for_current_coll"]) >= target_batch_size): + and len(ingest_state["batch"]) + len(ingest_state["objects_for_current_coll"]) >= int(target_batch_size)): put_spool_data(constants.PROC_REVISION_CLEANUP_SCAN, [ingest_state["batch"]]) ingest_state["batch"] = [] @@ -820,7 +828,7 @@ def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_revisions_cleanup_scan(ctx, revision_strategy_name, verbose_flag): +def rule_revisions_cleanup_scan(ctx: rule.Context, revision_strategy_name: str, verbose_flag: str) -> str: """Collect revision data and put it in the spool system for processing by the revision cleanup scan jobs @@ -865,11 +873,11 @@ def rule_revisions_cleanup_scan(ctx, revision_strategy_name, verbose_flag): return 'Revision store cleanup scan job completed' -def get_original_exists_dict(ctx, revision_data): +def get_original_exists_dict(ctx: rule.Context, revision_data: List) -> Dict: """Returns a dictionary that indicates which original data objects of revision data still exist - :param ctx: Combined type of a callback and rei struct - :param revision_data: List of lists of revision tuples in (data_id, timestamp, revision_path) format + :param ctx: Combined type of a callback and rei struct + :param revision_data: List of lists of revision tuples in (data_id, timestamp, revision_path) format :returns: dictionary, in which the keys are revision path. The values are booleans, and indicate whether the versioned data object of the revision still exists. If the revision data object does not @@ -891,7 +899,7 @@ def get_original_exists_dict(ctx, revision_data): return result -def versioned_data_object_exists(ctx, revision_path): +def versioned_data_object_exists(ctx: rule.Context, revision_path: str) -> bool: """Checks whether the version data object of a revision still exists :param ctx: Combined type of a callback and rei struct @@ -902,19 +910,7 @@ def versioned_data_object_exists(ctx, revision_path): :raises KeyError: If revision data object does not have revision AVUs that point to versioned data object. - - :raises UnicodeEncodeError: If the revision path cannot be converted to a utf-8 byte string. """ - - if isinstance(revision_path, unicode): - try: - # Switch back to bytes for now - # TODO change logic in Python 3 - revision_path = revision_path.encode('utf-8') - except UnicodeEncodeError: - log.write(ctx, "File path {} is not UTF-8 encoded or is not compatible with UTF-8 encoding".format(revision_path)) - raise - revision_avus = avu.of_data(ctx, revision_path) avu_dict = {a: v for (a, v, u) in revision_avus} @@ -931,7 +927,7 @@ def versioned_data_object_exists(ctx, revision_path): @rule.make(inputs=[0, 1, 2], outputs=[3]) -def rule_revisions_cleanup_process(ctx, revision_strategy_name, endOfCalendarDay, verbose_flag): +def rule_revisions_cleanup_process(ctx: rule.Context, revision_strategy_name: str, endOfCalendarDay: str, verbose_flag: str) -> str: """Applies the selected revision strategy to a batch of spooled revision data :param ctx: Combined type of a callback and rei struct @@ -999,7 +995,7 @@ def rule_revisions_cleanup_process(ctx, revision_strategy_name, endOfCalendarDay return 'Revision store cleanup processing job completed' -def revision_remove(ctx, revision_id, revision_path): +def revision_remove(ctx: rule.Context, revision_id: str, revision_path: str) -> bool: """Remove a revision from the revision store. Called by revision-cleanup.r cronjob. @@ -1031,24 +1027,19 @@ def revision_remove(ctx, revision_id, revision_path): return False -def memory_rss_usage(): - """ - The RSS (resident) memory size in bytes for the current process. - """ +def memory_rss_usage() -> int: + """The RSS (resident) memory size in bytes for the current process.""" p = psutil.Process() return p.memory_info().rss -def show_memory_usage(ctx): - """ - For debug purposes show the current RSS usage. - """ +def show_memory_usage(ctx: rule.Context) -> None: + """For debug purposes show the current RSS usage.""" log.write(ctx, "current RSS usage: {} bytes".format(memory_rss_usage())) -def memory_limit_exceeded(rss_limit): - """ - True when a limit other than 0 was specified and memory usage is currently +def memory_limit_exceeded(rss_limit: int) -> bool: + """True when a limit other than 0 was specified and memory usage is currently above this limit. Otherwise False. :param rss_limit: Max memory usage in bytes @@ -1056,15 +1047,14 @@ def memory_limit_exceeded(rss_limit): :returns: Boolean indicating if memory limited exceeded """ rss_limit = int(rss_limit) - return rss_limit and memory_rss_usage() > rss_limit + return rss_limit > 0 and memory_rss_usage() > rss_limit -def remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose): - """ - Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata]. +def remove_revision_creation_avu_from_deleted_data_objects(ctx: rule.Context, print_verbose: bool) -> None: + """Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata]. - :param ctx: Combined type of a callback and rei struct - :param print_verbose: Whether to log verbose messages for troubleshooting (Boolean) + :param ctx: Combined type of a callback and rei struct + :param print_verbose: Whether to log verbose messages for troubleshooting """ revision_avu_name = constants.UUORGMETADATAPREFIX + "revision_scheduled" diff --git a/schema.py b/schema.py index 6ce9d4707..043b7f867 100644 --- a/schema.py +++ b/schema.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Dict, Optional, Tuple import genquery @@ -14,12 +15,12 @@ @api.make() -def api_schema_get_schemas(ctx): +def api_schema_get_schemas(ctx: rule.Context) -> api.Result: """Retrieve selectable schemas and default schema. :param ctx: Combined type of a callback and rei struct - :returns: Dit with schemas and default schema. + :returns: Dict with schemas and default schema. """ schemas = [] @@ -42,7 +43,7 @@ def api_schema_get_schemas(ctx): 'schema_default': schema_default} -def get_schema_collection(ctx, rods_zone, group_name): +def get_schema_collection(ctx: rule.Context, rods_zone: str, group_name: str) -> str: """Determine schema collection based upon rods zone and name of the group. If there is no schema id set on group level and @@ -87,7 +88,7 @@ def get_schema_collection(ctx, rods_zone, group_name): return config.default_yoda_schema -def get_schema_id_from_group(ctx, group_name): +def get_schema_id_from_group(ctx: rule.Context, group_name: str) -> Optional[str]: """Returns the schema_id value that has been set on an iRODS group :param ctx: Combined type of a callback and rei struct @@ -107,7 +108,7 @@ def get_schema_id_from_group(ctx, group_name): return None -def get_active_schema_path(ctx, path): +def get_active_schema_path(ctx: rule.Context, path: str) -> str: """Get the iRODS path to a schema file from a deposit, research or vault path. The schema collection is determined from group name of the path. @@ -143,7 +144,7 @@ def get_active_schema_path(ctx, path): return '/{}/yoda/schemas/{}/metadata.json'.format(rods_zone, schema_coll) -def get_active_schema(ctx, path): +def get_active_schema(ctx: rule.Context, path: str) -> Dict: """Get a schema object from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -155,7 +156,7 @@ def get_active_schema(ctx, path): return jsonutil.read(ctx, get_active_schema_path(ctx, path)) -def get_active_schema_uischema(ctx, path): +def get_active_schema_uischema(ctx: rule.Context, path: str) -> Tuple[Dict, Dict]: """Get a schema and uischema object from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -171,7 +172,7 @@ def get_active_schema_uischema(ctx, path): jsonutil.read(ctx, uischema_path) -def get_active_schema_id(ctx, path): +def get_active_schema_id(ctx: rule.Context, path: str) -> str: """Get the active schema id from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -183,14 +184,14 @@ def get_active_schema_id(ctx, path): return get_active_schema(ctx, path)['$id'] -def get_schema_id(ctx, metadata_path, metadata=None): +def get_schema_id(ctx: rule.Context, metadata_path: str, metadata: Optional[Dict] = None) -> Optional[str]: """Get the current schema id from a path to a metadata json.""" if metadata is None: metadata = jsonutil.read(ctx, metadata_path) return meta.metadata_get_schema_id(metadata) -def get_schema_path_by_id(ctx, path, schema_id): +def get_schema_path_by_id(ctx: rule.Context, path: str, schema_id: str) -> Optional[str]: """Get a schema path from a schema id.""" _, zone, _2, _3 = pathutil.info(path) @@ -203,7 +204,7 @@ def get_schema_path_by_id(ctx, path, schema_id): return None -def get_schema_by_id(ctx, path, schema_id): +def get_schema_by_id(ctx: rule.Context, path: str, schema_id: str) -> Optional[Dict]: """ Get a schema from a schema id. @@ -216,7 +217,7 @@ def get_schema_by_id(ctx, path, schema_id): :returns: Schema object (parsed from JSON) """ - path = get_schema_path_by_id(ctx, path, schema_id) - if path is None: + schema_path = get_schema_path_by_id(ctx, path, schema_id) + if schema_path is None: return None - return jsonutil.read(ctx, path) + return jsonutil.read(ctx, schema_path) diff --git a/schema_transformation.py b/schema_transformation.py index 6cd3470ab..fb255cd39 100644 --- a/schema_transformation.py +++ b/schema_transformation.py @@ -13,6 +13,7 @@ import os import re import time +from typing import Callable, Dict, Optional import genquery import session_vars @@ -23,7 +24,7 @@ from util import * -def execute_transformation(ctx, metadata_path, transform, keep_metadata_backup=True): +def execute_transformation(ctx: rule.Context, metadata_path: str, transform: Callable, keep_metadata_backup: bool = True) -> None: """Transform a metadata file with the given transformation function.""" coll, data = os.path.split(metadata_path) @@ -50,10 +51,10 @@ def execute_transformation(ctx, metadata_path, transform, keep_metadata_backup=T @api.make() -def api_transform_metadata(ctx, coll, keep_metadata_backup=True): +def api_transform_metadata(ctx: rule.Context, coll: str, keep_metadata_backup: bool = True) -> api.Result: """Transform a yoda-metadata file in the given collection to the active schema.""" metadata_path = meta.get_collection_metadata_path(ctx, coll) - if metadata_path.endswith('.json'): + if metadata_path and metadata_path.endswith('.json'): # JSON metadata. log.write(ctx, 'Transforming JSON metadata in the research space: <{}>'.format(metadata_path)) transform = get(ctx, metadata_path) @@ -64,10 +65,9 @@ def api_transform_metadata(ctx, coll, keep_metadata_backup=True): execute_transformation(ctx, metadata_path, transform, keep_metadata_backup) else: return api.Error('no_metadata', 'No metadata file found') - return None -def get(ctx, metadata_path, metadata=None): +def get(ctx: rule.Context, metadata_path: str, metadata: Optional[Dict] = None) -> Optional[Callable]: """Find a transformation that can be executed on the given metadata JSON. :param ctx: Combined type of a ctx and rei struct @@ -82,8 +82,9 @@ def get(ctx, metadata_path, metadata=None): # Ideally, we would check that the metadata is valid in its current # schema before claiming that we can transform it... - # print('{} -> {}'.format(src,dst)) + if src is None: + return None return schema_transformations.get(src, dst) except KeyError: @@ -114,7 +115,7 @@ def rule_get_transformation_info(rule_args, callback, rei): rule_args[1:3] = 'true', transformation_html(transform) -def copy_acls_from_parent(ctx, path, recursive_flag): +def copy_acls_from_parent(ctx: rule.Context, path: str, recursive_flag: str) -> None: """ When inheritance is missing we need to copy ACLs when introducing new data in vault package. @@ -310,7 +311,7 @@ def rule_batch_vault_metadata_correct_orcid_format(rule_args, callback, rei): "") -def transform_orcid(ctx, m): +def transform_orcid(ctx: rule.Context, m: Dict) -> Dict: """ Transform all present orcid's into the correct format. If possible! @@ -342,7 +343,7 @@ def transform_orcid(ctx, m): return {'metadata': m, 'data_changed': data_changed} -def correctify_orcid(org_orcid): +def correctify_orcid(org_orcid: str) -> Optional[str]: """Function to correct illformatted ORCIDs. Returns None if value cannot be fixed.""" # Get rid of all spaces. orcid = org_orcid.replace(' ', '') @@ -359,7 +360,7 @@ def correctify_orcid(org_orcid): return "https://orcid.org/{}".format(orcs[-1]) -def html(f): +def html(f: Callable) -> str: """Get a human-readable HTML description of a transformation function. The text is derived from the function's docstring. @@ -368,18 +369,19 @@ def html(f): :returns: Human-readable HTML description of a transformation function """ + docstring = "" if f.__doc__ is None else f.__doc__ description = '\n'.join(map(lambda paragraph: '

{}

'.format( # Trim whitespace. re.sub(r'\s+', ' ', paragraph).strip()), # Docstring paragraphs are separated by blank lines. - re.split('\n{2,}', f.__doc__))) + re.split('\n{2,}', docstring))) # Remove docstring. return re.sub('((:param).*)|((:returns:).*)', ' ', description) @rule.make(inputs=[], outputs=[0]) -def rule_batch_vault_metadata_schema_report(ctx): +def rule_batch_vault_metadata_schema_report(ctx: rule.Context) -> str: """Show vault metadata schema about each data package in vault :param ctx: Combined type of a callback and rei struct diff --git a/schema_transformations.py b/schema_transformations.py index d61f324eb..079911c1a 100644 --- a/schema_transformations.py +++ b/schema_transformations.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Callable, Dict, Optional from schema_transformations_utils import correctify_isni, correctify_orcid, correctify_researcher_id, correctify_scopus @@ -25,7 +26,7 @@ # The docstring of a transformation function should describe the transformation # in a human-readable manner: it is provided to the user executing the transformation. -def _default0_default1(ctx, m): +def _default0_default1(ctx: rule.Context, m: Dict) -> Dict: """ A Data type field is added to be used for publication purposes to DataCite. @@ -45,7 +46,7 @@ def _default0_default1(ctx, m): :returns: Transformed (default-1) JSON object """ - def fixup_name(n): + def fixup_name(n: str) -> Dict: """Split a name into a first and last name, error-prone, but acceptable.""" n.strip() # Trim whitespace, if any. @@ -76,7 +77,7 @@ def fixup_name(n): return m -def _default1_default2(ctx, m): +def _default1_default2(ctx: rule.Context, m: Dict) -> Dict: """ Metadata fields Discipline, Language and Tags have become required fields. @@ -108,7 +109,7 @@ def _default1_default2(ctx, m): return m -def _default2_default3(ctx, m): +def _default2_default3(ctx: rule.Context, m: Dict) -> Dict: """ Add affiliation identifiers to creators and contributors. @@ -265,7 +266,7 @@ def _default2_default3(ctx, m): return m -def _core1_core2(ctx, m): +def _core1_core2(ctx: rule.Context, m: Dict) -> Dict: """ Add affiliation identifiers to creators. @@ -297,7 +298,7 @@ def _core1_core2(ctx, m): return m -def _dag0_default2(ctx, m): +def _dag0_default2(ctx: rule.Context, m: Dict) -> Dict: """ Transform dag-0 data to the default-2 schema definition @@ -361,7 +362,7 @@ def _dag0_default2(ctx, m): return m -def _default1_teclab0(ctx, m): +def _default1_teclab0(ctx: rule.Context, m: Dict) -> Dict: """ Transform Default-1 data to the teclab-0 schema definition @@ -482,7 +483,7 @@ def _default1_teclab0(ctx, m): return m -def _default1_hptlab0(ctx, m): +def _default1_hptlab0(ctx: rule.Context, m: Dict) -> Dict: """ Transform Default-1 data to the hptlab-0 schema definition @@ -601,7 +602,7 @@ def _default1_hptlab0(ctx, m): return m -def _hptlab0_hptlab1(ctx, m): +def _hptlab0_hptlab1(ctx: rule.Context, m: Dict) -> Dict: """ Transform hptlab-0 data to the hptlab-1 schema definition which holds better qualified lists. @@ -657,7 +658,7 @@ def _hptlab0_hptlab1(ctx, m): return m -def _teclab0_teclab1(ctx, m): +def _teclab0_teclab1(ctx: rule.Context, m: Dict) -> Dict: """ Transform teclab-0 data to the teclab-1 schema definition which holds better qualified lists. @@ -716,7 +717,7 @@ def _teclab0_teclab1(ctx, m): # }}} -def get(src_id, dst_id): +def get(src_id: str, dst_id: str) -> Optional[Callable]: """ Get a transformation function that maps metadata from the given src schema id to the dst schema id. diff --git a/schema_transformations_utils.py b/schema_transformations_utils.py index 2e5e499b5..3f14335dd 100644 --- a/schema_transformations_utils.py +++ b/schema_transformations_utils.py @@ -4,9 +4,10 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Optional -def correctify_orcid(org_orcid): +def correctify_orcid(org_orcid: str) -> Optional[str]: """Correct illformatted ORCID.""" # Get rid of all spaces. orcid = org_orcid.replace(' ', '') @@ -23,7 +24,7 @@ def correctify_orcid(org_orcid): return "https://orcid.org/{}".format(orcs[-1]) -def correctify_scopus(org_scopus): +def correctify_scopus(org_scopus: str) -> Optional[str]: """Correct illformatted Scopus.""" # Get rid of all spaces. new_scopus = org_scopus.replace(' ', '') @@ -34,7 +35,7 @@ def correctify_scopus(org_scopus): return new_scopus -def correctify_isni(org_isni): +def correctify_isni(org_isni: str) -> Optional[str]: """Correct ill-formatted ISNI.""" # Remove all spaces. new_isni = org_isni.replace(' ', '') @@ -44,14 +45,14 @@ def correctify_isni(org_isni): # The last part should hold a valid id like eg: 123412341234123X. # If not, it is impossible to correct it to the valid isni format - new_isni = new_isni.split('/') - if not re.search("^[0-9]{15}[0-9X]$", new_isni[-1]): + new_isni_split = new_isni.split('/') + if not re.search("^[0-9]{15}[0-9X]$", new_isni_split[-1]): return None - return "https://isni.org/isni/{}".format(new_isni[-1]) + return "https://isni.org/isni/{}".format(new_isni_split[-1]) -def correctify_researcher_id(org_researcher_id): +def correctify_researcher_id(org_researcher_id: str) -> str: """Correct illformatted ResearcherID.""" # Get rid of all spaces. researcher_id = org_researcher_id.replace(' ', '') diff --git a/settings.py b/settings.py index 24bc673ab..827b85951 100644 --- a/settings.py +++ b/settings.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, Optional, Sequence + from genquery import Query from util import * @@ -21,7 +23,7 @@ SETTINGS_KEY = constants.UUORGMETADATAPREFIX + "settings_" -def load(ctx, setting, username=None): +def load(ctx: rule.Context, setting: str, username: Optional[str] = None) -> Sequence[str]: """Load user setting. :param ctx: Combined type of a callback and rei struct @@ -44,7 +46,7 @@ def load(ctx, setting, username=None): @api.make() -def api_settings_load(ctx): +def api_settings_load(ctx: rule.Context) -> api.Result: """Load user settings. :param ctx: Combined type of a callback and rei struct @@ -64,7 +66,7 @@ def api_settings_load(ctx): @api.make() -def api_settings_save(ctx, settings): +def api_settings_save(ctx: rule.Context, settings: Dict) -> api.Result: """Save user settings. :param ctx: Combined type of a callback and rei struct diff --git a/setup.cfg b/setup.cfg index a9ef75804..08887bf8a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,3 +6,17 @@ docstring_style=sphinx max-line-length=127 exclude=__init__.py,tools,tests/env/ application-import-names=avu,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,groups_import,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,revision_strategies,revision_utils,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource,yoda_names,policies_utils + +[mypy] +exclude = tools|unit-tests|util +disable_error_code = arg-type, attr-defined, index, method-assign, misc, no-redef, operator, union-attr, unreachable, var-annotated +ignore_missing_imports = True +warn_unreachable = True +no_implicit_optional = True +check_untyped_defs = False +disallow_any_generics = False +disallow_incomplete_defs = True +disallow_untyped_calls = False +disallow_untyped_defs = False +show_error_codes = True +show_error_context = True diff --git a/sram.py b/sram.py index ad06db8dc..33caa17a2 100644 --- a/sram.py +++ b/sram.py @@ -5,6 +5,7 @@ import datetime import time +from typing import Dict, List import requests import session_vars @@ -13,7 +14,7 @@ from util import * -def sram_post_collaboration(ctx, group_name, description): +def sram_post_collaboration(ctx: rule.Context, group_name: str, description: str) -> Dict: """Create SRAM Collaborative Organisation Identifier. :param ctx: Combined type of a callback and rei struct @@ -57,7 +58,7 @@ def sram_post_collaboration(ctx, group_name, description): return data -def sram_get_uid(ctx, co_identifier, user_name): +def sram_get_uid(ctx: rule.Context, co_identifier: str, user_name: str) -> str: """Get SRAM Collaboration member uid. :param ctx: Combined type of a callback and rei struct @@ -91,7 +92,7 @@ def sram_get_uid(ctx, co_identifier, user_name): return uid -def sram_delete_collaboration(ctx, co_identifier): +def sram_delete_collaboration(ctx: rule.Context, co_identifier: str) -> bool: """Delete SRAM Collaborative Organisation. :param ctx: Combined type of a callback and rei struct @@ -113,7 +114,7 @@ def sram_delete_collaboration(ctx, co_identifier): return response.status_code == 204 -def sram_delete_collaboration_membership(ctx, co_identifier, uuid): +def sram_delete_collaboration_membership(ctx: rule.Context, co_identifier: str, uuid: str) -> bool: """Delete SRAM Collaborative Organisation membership. :param ctx: Combined type of a callback and rei struct @@ -136,7 +137,7 @@ def sram_delete_collaboration_membership(ctx, co_identifier, uuid): return response.status_code == 204 -def sram_put_collaboration_invitation(ctx, group_name, username, co_identifier): +def sram_put_collaboration_invitation(ctx: rule.Context, group_name: str, username: str, co_identifier: str) -> bool: """Create SRAM Collaborative Organisation Identifier. :param ctx: Combined type of a ctx and rei struct @@ -180,7 +181,7 @@ def sram_put_collaboration_invitation(ctx, group_name, username, co_identifier): return response.status_code == 201 -def sram_connect_service_collaboration(ctx, short_name): +def sram_connect_service_collaboration(ctx: rule.Context, short_name: str) -> bool: """Connect a service to an existing SRAM collaboration. :param ctx: Combined type of a ctx and rei struct @@ -208,7 +209,7 @@ def sram_connect_service_collaboration(ctx, short_name): return response.status_code == 201 -def invitation_mail_group_add_user(ctx, group_name, username, co_identifier): +def invitation_mail_group_add_user(ctx: rule.Context, group_name: str, username: str, co_identifier: str) -> str: """Send invitation email to newly added user to the group. :param ctx: Combined type of a ctx and rei struct @@ -235,7 +236,7 @@ def invitation_mail_group_add_user(ctx, group_name, username, co_identifier): """.format(username.split('@')[0], session_vars.get_map(ctx.rei)["client_user"]["user_name"], config.sram_rest_api_url, co_identifier)) -def sram_update_collaboration_membership(ctx, co_identifier, uuid, new_role): +def sram_update_collaboration_membership(ctx: rule.Context, co_identifier: str, uuid: str, new_role: str) -> bool: """Update SRAM Collaborative Organisation membership. :param ctx: Combined type of a callback and rei struct @@ -269,13 +270,13 @@ def sram_update_collaboration_membership(ctx, co_identifier, uuid, new_role): return response.status_code == 201 -def sram_get_co_members(ctx, co_identifier): +def sram_get_co_members(ctx: rule.Context, co_identifier: str) -> List[str]: """Get SRAM Collaboration members. :param ctx: Combined type of a callback and rei struct :param co_identifier: SRAM CO identifier - :returns: Email of the user + :returns: List of emails of the SRAM Collaboration members """ url = "{}/api/collaborations/v1/{}".format(config.sram_rest_api_url, co_identifier) headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Authorization': 'bearer ' + config.sram_api_key} diff --git a/vault.py b/vault.py index 5a02a994d..90e7d50ff 100644 --- a/vault.py +++ b/vault.py @@ -8,9 +8,9 @@ import subprocess import time from datetime import datetime +from typing import Dict, List, Optional, Tuple import genquery -import irods_types from dateutil import parser import folder @@ -48,7 +48,7 @@ @api.make() -def api_vault_submit(ctx, coll, previous_version=None): +def api_vault_submit(ctx: rule.Context, coll: str, previous_version: Optional[str] = None) -> api.Result: """Submit data package for publication. :param ctx: Combined type of a callback and rei struct @@ -72,7 +72,7 @@ def api_vault_submit(ctx, coll, previous_version=None): @api.make() -def api_vault_approve(ctx, coll): +def api_vault_approve(ctx: rule.Context, coll: str) -> api.Result: """Approve data package for publication. :param ctx: Combined type of a callback and rei struct @@ -102,7 +102,7 @@ def api_vault_approve(ctx, coll): @api.make() -def api_vault_cancel(ctx, coll): +def api_vault_cancel(ctx: rule.Context, coll: str) -> api.Result: """Cancel submit of data package. :param ctx: Combined type of a callback and rei struct @@ -125,7 +125,7 @@ def api_vault_cancel(ctx, coll): @api.make() -def api_vault_depublish(ctx, coll): +def api_vault_depublish(ctx: rule.Context, coll: str) -> api.Result: """Depublish data package. :param ctx: Combined type of a callback and rei struct @@ -148,7 +148,7 @@ def api_vault_depublish(ctx, coll): @api.make() -def api_vault_republish(ctx, coll): +def api_vault_republish(ctx: rule.Context, coll: str) -> api.Result: """Republish data package. :param ctx: Combined type of a callback and rei struct @@ -171,7 +171,7 @@ def api_vault_republish(ctx, coll): @api.make() -def api_vault_copy_to_research(ctx, coll_origin, coll_target): +def api_vault_copy_to_research(ctx: rule.Context, coll_origin: str, coll_target: str) -> api.Result: """Copy data package from vault to research space. :param ctx: Combined type of a callback and rei struct @@ -242,7 +242,7 @@ def api_vault_copy_to_research(ctx, coll_origin, coll_target): @api.make() -def api_vault_preservable_formats_lists(ctx): +def api_vault_preservable_formats_lists(ctx: rule.Context) -> api.Result: """Retrieve lists of preservable file formats on the system. :param ctx: Combined type of a callback and rei struct @@ -262,7 +262,7 @@ def api_vault_preservable_formats_lists(ctx): @api.make() -def api_vault_unpreservable_files(ctx, coll, list_name): +def api_vault_unpreservable_files(ctx: rule.Context, coll: str, list_name: str) -> api.Result: """Retrieve list of unpreservable file formats in a collection. :param ctx: Combined type of a callback and rei struct @@ -284,36 +284,35 @@ def api_vault_unpreservable_files(ctx, coll, list_name): collection.data_objects(ctx, coll, recursive=True)) # Exclude Yoda metadata files - data_names = filter(lambda x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), data_names) + data_names_filtered = filter(lambda x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), data_names) # Data names -> lowercase extensions, without the dot. - exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) + exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names_filtered))) exts -= {''} # Return any ext that is not in the preservable list. return list(exts - preservable_formats) -def rule_vault_copy_original_metadata_to_vault(rule_args, callback, rei): +@rule.make(inputs=[0], outputs=[]) +def rule_vault_copy_original_metadata_to_vault(ctx: rule.Context, vault_package: str) -> None: """Copy the original metadata JSON into the root of the package. - :param rule_args: [0] Path of a new package in the vault - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a callback and rei struct + :param vault_package: Path of a package in the vault """ - vault_package = rule_args[0] - vault_copy_original_metadata_to_vault(callback, vault_package) + vault_copy_original_metadata_to_vault(ctx, vault_package) -def get_vault_copy_numthreads(ctx): +def get_vault_copy_numthreads(ctx: rule.Context) -> int: # numThreads should be 0 if want multithreading with no specified amount of threads return 0 if config.vault_copy_multithread_enabled else 1 -def vault_copy_original_metadata_to_vault(ctx, vault_package_path): +def vault_copy_original_metadata_to_vault(ctx: rule.Context, vault_package_path: str) -> None: """Copy original metadata to the vault package root. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param vault_package_path: Path of a package in the vault """ original_metadata = vault_package_path + "/original/" + constants.IIJSONMETADATA @@ -325,22 +324,20 @@ def vault_copy_original_metadata_to_vault(ctx, vault_package_path): # msi.data_obj_copy(ctx, original_metadata, copied_metadata, 'verifyChksum=', irods_types.BytesBuf()) -def rule_vault_write_license(rule_args, callback, rei): +@rule.make(inputs=[0], outputs=[]) +def rule_vault_write_license(ctx: rule.Context, vault_pkg_coll: str) -> None: """Write the license as a text file into the root of the vault package. - :param rule_args: [0] Path of a package in the vault - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a callback and rei struct + :param vault_pkg_coll: Path of a package in the vault """ + vault_write_license(ctx, vault_pkg_coll) - vault_pkg_coll = rule_args[0] - vault_write_license(callback, vault_pkg_coll) - -def vault_write_license(ctx, vault_pkg_coll): +def vault_write_license(ctx: rule.Context, vault_pkg_coll: str) -> None: """Write the license as a text file into the root of the vault package. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param vault_pkg_coll: Path of a package in the vault """ zone = user.zone(ctx) @@ -398,30 +395,31 @@ def vault_write_license(ctx, vault_pkg_coll): @rule.make(inputs=[0], outputs=[1]) -def rule_vault_enable_indexing(ctx, coll): +def rule_vault_enable_indexing(ctx: rule.Context, coll: str) -> str: vault_enable_indexing(ctx, coll) return "Success" -def vault_enable_indexing(ctx, coll): +def vault_enable_indexing(ctx: rule.Context, coll: str) -> None: if config.enable_open_search: if not collection.exists(ctx, coll + "/index"): # index collection does not exist yet path = meta.get_latest_vault_metadata_path(ctx, coll) - ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) - meta.ingest_metadata_vault(ctx, path) + if path: + ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) + meta.ingest_metadata_vault(ctx, path) # add indexing attribute and update opensearch subprocess.call(["imeta", "add", "-C", coll + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_disable_indexing(ctx, coll): +def rule_vault_disable_indexing(ctx: rule.Context, coll: str) -> str: vault_disable_indexing(ctx, coll) return "Success" -def vault_disable_indexing(ctx, coll): +def vault_disable_indexing(ctx: rule.Context, coll: str) -> None: if config.enable_open_search: if collection.exists(ctx, coll + "/index"): coll = coll + "/index" @@ -434,7 +432,7 @@ def vault_disable_indexing(ctx, coll): @api.make() -def api_vault_system_metadata(ctx, coll): +def api_vault_system_metadata(ctx: rule.Context, coll: str) -> api.Result: """Return system metadata of a vault collection. :param ctx: Combined type of a callback and rei struct @@ -466,8 +464,8 @@ def api_vault_system_metadata(ctx, coll): # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # modified_date = date.fromisoformat(row[0]) modified_date = parser.parse(row[0]) - modified_date = modified_date.strftime('%Y-%m-%d %H:%M:%S%z') - system_metadata["Modified date"] = "{}".format(modified_date) + modified_date_time = modified_date.strftime('%Y-%m-%d %H:%M:%S%z') + system_metadata["Modified date"] = "{}".format(modified_date_time) # Landingpage URL. landinpage_url = "" @@ -524,15 +522,15 @@ def api_vault_system_metadata(ctx, coll): return system_metadata -def get_coll_vault_status(ctx, path, org_metadata=None): +def get_coll_vault_status(ctx: rule.Context, path: str, org_metadata: Optional[List] = None) -> constants.vault_package_state: """Get the status of a vault folder.""" if org_metadata is None: org_metadata = folder.get_org_metadata(ctx, path) # Don't care about duplicate attr names here. - org_metadata = dict(org_metadata) - if constants.IIVAULTSTATUSATTRNAME in org_metadata: - x = org_metadata[constants.IIVAULTSTATUSATTRNAME] + org_metadata_dict = dict(org_metadata) + if constants.IIVAULTSTATUSATTRNAME in org_metadata_dict: + x = org_metadata_dict[constants.IIVAULTSTATUSATTRNAME] try: return constants.vault_package_state(x) except Exception: @@ -541,7 +539,7 @@ def get_coll_vault_status(ctx, path, org_metadata=None): return constants.vault_package_state.EMPTY -def get_all_published_versions(ctx, path): +def get_all_published_versions(ctx: rule.Context, path: str) -> Tuple[Optional[str], Optional[str], List]: """Get all published versions of a data package.""" base_doi = get_doi(ctx, path, 'base') package_doi = get_doi(ctx, path) @@ -582,7 +580,7 @@ def get_all_published_versions(ctx, path): @api.make() -def api_vault_collection_details(ctx, path): +def api_vault_collection_details(ctx: rule.Context, path: str) -> api.Result: """Return details of a vault collection. :param ctx: Combined type of a callback and rei struct @@ -694,7 +692,7 @@ def api_vault_collection_details(ctx, path): @api.make() -def api_vault_get_package_by_reference(ctx, reference): +def api_vault_get_package_by_reference(ctx: rule.Context, reference: str) -> api.Result: """Return path to data package with provided reference (UUID4). :param ctx: Combined type of a callback and rei struct @@ -719,7 +717,7 @@ def api_vault_get_package_by_reference(ctx, reference): @api.make() -def api_vault_get_landingpage_data(ctx, coll): +def api_vault_get_landingpage_data(ctx: rule.Context, coll: str) -> api.Result: """Retrieve landingpage data of data package. Landinpage data consists of metadata and system metadata. @@ -767,7 +765,7 @@ def api_vault_get_landingpage_data(ctx, coll): @api.make() -def api_vault_get_publication_terms(ctx): +def api_vault_get_publication_terms(ctx: rule.Context) -> api.Result: """Retrieve the publication terms.""" zone = user.zone(ctx) terms_collection = "/{}{}".format(zone, constants.IITERMSCOLLECTION) @@ -791,7 +789,7 @@ def api_vault_get_publication_terms(ctx): return api.Error('TermsReadFailed', 'Could not open Terms and Agreements.') -def change_read_access_group(ctx, coll, actor, group, grant=True): +def change_read_access_group(ctx: rule.Context, coll: str, actor: str, group: str, grant: bool = True) -> Tuple[bool, api.Result]: """Grant/revoke research group read access to vault package. :param ctx: Combined type of a callback and rei struct @@ -818,7 +816,7 @@ def change_read_access_group(ctx, coll, actor, group, grant=True): return True, '' -def check_change_read_access_research_group(ctx, coll, grant=True): +def check_change_read_access_research_group(ctx: rule.Context, coll: str, grant: bool = True) -> Tuple[bool, api.Result]: """Initial checks when changing read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -843,7 +841,7 @@ def check_change_read_access_research_group(ctx, coll, grant=True): return True, '' -def change_read_access_research_group(ctx, coll, grant=True): +def change_read_access_research_group(ctx: rule.Context, coll: str, grant: bool = True) -> api.Result: """Grant/revoke read rights of members of research group to a datapackage in vault. This operation also includes read only members. @@ -884,7 +882,7 @@ def change_read_access_research_group(ctx, coll, grant=True): @api.make() -def api_grant_read_access_research_group(ctx, coll): +def api_grant_read_access_research_group(ctx: rule.Context, coll: str) -> api.Result: """Grant read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -896,7 +894,7 @@ def api_grant_read_access_research_group(ctx, coll): @api.make() -def api_revoke_read_access_research_group(ctx, coll): +def api_revoke_read_access_research_group(ctx: rule.Context, coll: str) -> api.Result: """Revoke read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -908,17 +906,17 @@ def api_revoke_read_access_research_group(ctx, coll): @rule.make() -def rule_vault_retry_copy_to_vault(ctx): +def rule_vault_retry_copy_to_vault(ctx: rule.Context) -> None: copy_to_vault(ctx, constants.CRONJOB_STATE["PENDING"]) copy_to_vault(ctx, constants.CRONJOB_STATE["RETRY"]) -def copy_to_vault(ctx, state): +def copy_to_vault(ctx: rule.Context, state: str) -> None: """ Collect all folders with a given cronjob state and try to copy them to the vault. - :param ctx: Combined type of a callback and rei struct - :param state: one of constants.CRONJOB_STATE + :param ctx: Combined type of a callback and rei struct + :param state: One of constants.CRONJOB_STATE """ iter = get_copy_to_vault_colls(ctx, state) for row in iter: @@ -933,7 +931,7 @@ def copy_to_vault(ctx, state): folder.folder_secure_set_retry(ctx, coll) -def get_copy_to_vault_colls(ctx, cronjob_state): +def get_copy_to_vault_colls(ctx: rule.Context, cronjob_state: str) -> List: iter = list(genquery.Query(ctx, ['COLL_NAME'], "META_COLL_ATTR_NAME = '{}' AND META_COLL_ATTR_VALUE = '{}'".format( @@ -943,7 +941,7 @@ def get_copy_to_vault_colls(ctx, cronjob_state): return iter -def copy_folder_to_vault(ctx, coll, target): +def copy_folder_to_vault(ctx: rule.Context, coll: str, target: str) -> bool: """Copy folder and all its contents to target in vault using irsync. The data will reside under folder '/original' within the vault. @@ -958,7 +956,7 @@ def copy_folder_to_vault(ctx, coll, target): try: returncode = subprocess.call(["irsync", "-rK", "i:{}/".format(coll), "i:{}/original".format(target)]) except Exception as e: - log.write(ctx, "irsync failure: " + e) + log.write(ctx, "irsync failure: " + str(e)) log.write(ctx, "irsync failure for coll <{}> and target <{}>".format(coll, target)) return False @@ -969,103 +967,7 @@ def copy_folder_to_vault(ctx, coll, target): return True -def treewalk_and_ingest(ctx, folder, target, origin, error): - """Treewalk folder and ingest. - - :param ctx: Combined type of a callback and rei struct - :param folder: Will change every time as it represents every folder that has to be copied to vault - :param target: Target of ingest - :param origin: Origin of treewalk - :param error: 0/1 indicating if treewalk or ingest failed - - :returns: Error status (which should remain 0 for further processing in iterative manner) - """ - parent_coll, coll = pathutil.chop(folder) - - # 1. Process this collection itself as a collection. - # INGEST - if error == 0: - # INGEST COLLECTION - error = ingest_object(ctx, parent_coll, coll, True, target, origin) - - # 2. Process dataobjects located directly within the collection - if error == 0: - iter = genquery.row_iterator( - "DATA_NAME", - "COLL_NAME = '" + folder + "'", - genquery.AS_LIST, ctx - ) - for row in iter: - # INGEST OBJECT - error = ingest_object(ctx, folder, row[0], False, target, origin) - if error: - break - - if error == 0: - # 3. Process the subfolders - # Loop through subfolders which have folder as parent folder - iter = genquery.row_iterator( - "COLL_NAME", - "COLL_PARENT_NAME = '" + folder + "'", - genquery.AS_LIST, ctx - ) - for row in iter: - error = treewalk_and_ingest(ctx, row[0], target, origin, error) - if error: - break - - return error - - -def ingest_object(ctx, parent, item, item_is_collection, destination, origin): - source_path = parent + "/" + item - read_access = msi.check_access(ctx, source_path, 'read_object', irods_types.BytesBuf())['arguments'][2] - - # TODO use set_acl_check? - if read_access != b'\x01': - try: - msi.set_acl(ctx, "default", "admin:read", user.full_name(ctx), source_path) - except msi.Error: - return 1 - - dest_path = destination - - if source_path != origin: - markIncomplete = False - # rewrite path to copy objects that are located underneath the toplevel collection - source_length = len(source_path) - relative_path = source_path[len(origin) + 1: source_length] - dest_path = destination + '/' + relative_path - else: - markIncomplete = True - - if item_is_collection: - # CREATE COLLECTION - try: - msi.coll_create(ctx, dest_path, '', irods_types.BytesBuf()) - except msi.Error: - return 1 - - if markIncomplete: - avu.set_on_coll(ctx, dest_path, constants.IIVAULTSTATUSATTRNAME, constants.vault_package_state.INCOMPLETE) - else: - # CREATE COPY OF DATA OBJECT - try: - # msi.data_obj_copy(ctx, source_path, dest_path, '', irods_types.BytesBuf()) - ctx.msiDataObjCopy(source_path, dest_path, 'numThreads={}++++verifyChksum='.format(get_vault_copy_numthreads(ctx)), 0) - except msi.Error: - return 1 - - if read_access != b'\x01': - try: - msi.set_acl(ctx, "default", "admin:null", user.full_name(ctx), source_path) - except msi.Error: - return 1 - - return 0 - - -def set_vault_permissions(ctx, coll, target): +def set_vault_permissions(ctx: rule.Context, coll: str, target: str) -> bool: """Set permissions in the vault as such that data can be copied to the vault.""" group_name = folder.collection_group_name(ctx, coll) if group_name == '': @@ -1161,7 +1063,7 @@ def set_vault_permissions(ctx, coll, target): return True -def reader_needs_access(ctx, group_name, coll): +def reader_needs_access(ctx: rule.Context, group_name: str, coll: str) -> bool: """Return if research group has access to this group but readers do not""" iter = genquery.row_iterator( "COLL_ACCESS_USER_ID", @@ -1183,7 +1085,7 @@ def reader_needs_access(ctx, group_name, coll): return not reader_found and research_found -def set_reader_vault_permissions(ctx, group_name, zone, dry_run): +def set_reader_vault_permissions(ctx: rule.Context, group_name: str, zone: str, dry_run: bool) -> bool: """Given a research group name, give reader group access to vault packages if they don't have that access already. @@ -1240,7 +1142,7 @@ def set_reader_vault_permissions(ctx, group_name, zone, dry_run): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): +def rule_vault_grant_readers_vault_access(ctx: rule.Context, dry_run: str, verbose: str) -> str: """Rule for granting reader members of research groups access to vault packages in their group if they don't have access already @@ -1250,8 +1152,8 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): :return: String status of completed successfully ('0') or there were errors ('1') """ - dry_run = (dry_run == '1') - verbose = (verbose == '1') + dry_run_mode = (dry_run == '1') + verbose_mode = (verbose == '1') no_errors = True log.write(ctx, "grant_readers_vault_access started.") @@ -1260,11 +1162,11 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): log.write(ctx, "User is not rodsadmin") return '1' - if dry_run or verbose: + if dry_run_mode or verbose_mode: modes = [] - if dry_run: + if dry_run_mode: modes.append("dry run") - if verbose: + if verbose_mode: modes.append("verbose") log.write(ctx, "Running grant_readers_vault_access in {} mode.".format((" and ").join(modes))) @@ -1281,7 +1183,7 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): name = row[0] if verbose: log.write(ctx, "{}: checking permissions".format(name)) - if not set_reader_vault_permissions(ctx, name, zone, dry_run): + if not set_reader_vault_permissions(ctx, name, zone, dry_run_mode): no_errors = False message = "" @@ -1295,13 +1197,13 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): @rule.make(inputs=[0, 1, 2, 3], outputs=[4, 5]) -def rule_vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous_version): +def rule_vault_process_status_transitions(ctx: rule.Context, coll: str, new_coll_status: str, actor: str, previous_version: str) -> str: """Rule interface for processing vault status transition request. - :param ctx: Combined type of a callback and rei struct - :param coll: Vault collection to change status for - :param new_coll_status: New vault package status - :param actor: Actor of the status change + :param ctx: Combined type of a callback and rei struct + :param coll: Vault collection to change status for + :param new_coll_status: New vault package status + :param actor: Actor of the status change :param previous_version: Path to previous version of data package in the vault :return: Dict with status and statusinfo. @@ -1311,7 +1213,7 @@ def rule_vault_process_status_transitions(ctx, coll, new_coll_status, actor, pre return 'Success' -def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous_version): +def vault_process_status_transitions(ctx: rule.Context, coll: str, new_coll_status: str, actor: str, previous_version: str) -> List: """Processing vault status transition request. :param ctx: Combined type of a callback and rei struct @@ -1320,7 +1222,7 @@ def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous :param actor: Actor of the status change :param previous_version: Path to previous version of data package in the vault - :return: Dict with status and statusinfo + :return: List with status and statusinfo """ # check permissions - rodsadmin only if user.user_type(ctx) != 'rodsadmin': @@ -1374,7 +1276,7 @@ def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous return ['Success', ''] -def vault_request_status_transitions(ctx, coll, new_vault_status, previous_version=None): +def vault_request_status_transitions(ctx: rule.Context, coll: str, new_vault_status: str, previous_version: Optional[str] = None) -> List: """Request vault status transition action. :param ctx: Combined type of a callback and rei struct @@ -1382,7 +1284,7 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi :param new_vault_status: New vault status :param previous_version: Path to previous version of data package in the vault - :return: Dict with status and statusinfo + :return: List with status and statusinfo """ # check permissions - rodsadmin only if user.user_type(ctx) != 'rodsadmin': @@ -1444,9 +1346,10 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi # Data package is new version of existing data package with a DOI. previous_version_path = "" - doi = get_doi(ctx, previous_version) - if previous_version and doi: - previous_version_path = previous_version + if previous_version: + doi = get_doi(ctx, previous_version) + if doi: + previous_version_path = previous_version # Add vault action request to actor group. avu.set_on_coll(ctx, actor_group_path, constants.UUORGMETADATAPREFIX + 'vault_action_' + coll_id, jsonutil.dump([coll, str(new_vault_status), actor, previous_version_path])) @@ -1458,13 +1361,13 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi return ['', ''] -def set_submitter(ctx, path, actor): +def set_submitter(ctx: rule.Context, path: str, actor: str) -> None: """Set submitter of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_submission_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_submitter(ctx, path): +def get_submitter(ctx: rule.Context, path: str) -> str: """Set submitter of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_submission_actor" org_metadata = dict(folder.get_org_metadata(ctx, path)) @@ -1472,16 +1375,16 @@ def get_submitter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_approver(ctx, path, actor): +def set_approver(ctx: rule.Context, path: str, actor: str) -> None: """Set approver of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_approval_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_approver(ctx, path): +def get_approver(ctx: rule.Context, path: str) -> str: """Set approver of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_approval_actor" org_metadata = dict(folder.get_org_metadata(ctx, path)) @@ -1489,10 +1392,10 @@ def get_approver(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def get_doi(ctx, path, doi='version'): +def get_doi(ctx: rule.Context, path: str, doi: str = 'version') -> Optional[str]: """Get the DOI of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1516,7 +1419,7 @@ def get_doi(ctx, path, doi='version'): return None -def get_previous_version(ctx, path): +def get_previous_version(ctx: rule.Context, path: str) -> Optional[str]: """Get the previous version of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1536,7 +1439,7 @@ def get_previous_version(ctx, path): return None -def get_title(ctx, path): +def get_title(ctx: rule.Context, path: str) -> str: """Get the title of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1556,7 +1459,7 @@ def get_title(ctx, path): return "(no title)" -def meta_add_new_version(ctx, new_version, previous_version): +def meta_add_new_version(ctx: rule.Context, new_version: str, previous_version: str) -> None: """Add new version as related resource metadata to data package in a vault. :param ctx: Combined type of a callback and rei struct @@ -1604,13 +1507,13 @@ def meta_add_new_version(ctx, new_version, previous_version): meta_form.save(ctx, new_version, metadata) -def get_all_doi_versions(ctx, path): +def get_all_doi_versions(ctx: rule.Context, path: str) -> Tuple[List, List, List]: """Get the path and DOI of latest versions of published data package in a vault. - :param ctx: Combined type of a callback and rei struct - :param path: Path of vault with data packages + :param ctx: Combined type of a callback and rei struct + :param path: Path of vault with data packages - :return: Dict of data packages with DOI + :return: Lists of data packages with DOI """ iter = genquery.row_iterator( @@ -1646,7 +1549,7 @@ def get_all_doi_versions(ctx, path): @api.make() -def api_vault_get_published_packages(ctx, path): +def api_vault_get_published_packages(ctx: rule.Context, path: str) -> Dict: """Get the path and DOI of latest versions of published data package in a vault. :param ctx: Combined type of a callback and rei struct @@ -1676,7 +1579,7 @@ def api_vault_get_published_packages(ctx, path): return published_packages -def update_archive(ctx, coll, attr=None): +def update_archive(ctx: rule.Context, coll: str, attr: Optional[str] = None) -> None: """Potentially update archive after metadata changed. :param ctx: Combined type of a callback and rei struct @@ -1685,10 +1588,9 @@ def update_archive(ctx, coll, attr=None): """ if config.enable_data_package_archive: import vault_archive - vault_archive.update(ctx, coll, attr) @rule.make(inputs=[], outputs=[0]) -def rule_vault_copy_numthreads(ctx): +def rule_vault_copy_numthreads(ctx: rule.Context) -> int: return get_vault_copy_numthreads(ctx) diff --git a/vault_archive.py b/vault_archive.py index 96be89a0f..903a45749 100644 --- a/vault_archive.py +++ b/vault_archive.py @@ -5,6 +5,7 @@ import json import time +from typing import Dict, List, Optional import genquery import irods_types @@ -25,7 +26,7 @@ 'rule_vault_update_archive'] -def package_system_metadata(ctx, coll): +def package_system_metadata(ctx: rule.Context, coll: str) -> List: """Retrieve system metadata of collection. :param ctx: Combined type of a callback and rei struct @@ -46,7 +47,7 @@ def package_system_metadata(ctx, coll): ] -def package_provenance_log(ctx, system_metadata): +def package_provenance_log(ctx: rule.Context, system_metadata: List) -> List: """Retrieve provenance log from system metadata. :param ctx: Combined type of a callback and rei struct @@ -54,7 +55,7 @@ def package_provenance_log(ctx, system_metadata): :returns: List of dicts with provenance log """ - def key(item): + def key(item: Dict) -> int: return int(item["time"]) provenance_log = [] @@ -69,15 +70,16 @@ def key(item): return sorted(provenance_log, key=key) -def package_archive_path(ctx, coll): +def package_archive_path(ctx: rule.Context, coll: str) -> Optional[str]: for row in genquery.row_iterator("DATA_PATH", "COLL_NAME = '{}' AND DATA_NAME = 'archive.tar'".format(coll), genquery.AS_LIST, ctx): return row[0] + return None -def vault_archivable(ctx, coll): +def vault_archivable(ctx: rule.Context, coll: str) -> bool: minimum = int(config.data_package_archive_minimum) maximum = int(config.data_package_archive_maximum) @@ -101,11 +103,11 @@ def vault_archivable(ctx, coll): return False -def vault_archival_status(ctx, coll): +def vault_archival_status(ctx: rule.Context, coll: str) -> str: return bagit.status(ctx, coll) -def create_archive(ctx, coll): +def create_archive(ctx: rule.Context, coll: str) -> None: log.write(ctx, "Creating archive of data package <{}>".format(coll)) user_metadata = meta.get_latest_vault_metadata_path(ctx, coll) system_metadata = package_system_metadata(ctx, coll) @@ -130,7 +132,7 @@ def create_archive(ctx, coll): ctx.dmput(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "REG") -def extract_archive(ctx, coll): +def extract_archive(ctx: rule.Context, coll: str) -> None: while True: state = ctx.dmattr(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "")["arguments"][2] if state not in ("UNM", "MIG"): @@ -144,7 +146,7 @@ def extract_archive(ctx, coll): bagit.extract(ctx, coll + "/archive.tar", coll + "/archive", resource=config.resource_vault) -def vault_archive(ctx, actor, coll): +def vault_archive(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for archival. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "archive") @@ -165,7 +167,7 @@ def vault_archive(ctx, actor, coll): return "Failure" -def vault_create_archive(ctx, coll): +def vault_create_archive(ctx: rule.Context, coll: str) -> str: if vault_archival_status(ctx, coll) != "archive": return "Invalid" try: @@ -202,7 +204,7 @@ def vault_create_archive(ctx, coll): return "Failure" -def vault_unarchive(ctx, actor, coll): +def vault_unarchive(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for unarchival. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "extract") @@ -225,7 +227,7 @@ def vault_unarchive(ctx, actor, coll): return "Failure" -def vault_extract_archive(ctx, coll): +def vault_extract_archive(ctx: rule.Context, coll: str) -> str: if vault_archival_status(ctx, coll) != "extract": return "Invalid" try: @@ -251,13 +253,13 @@ def vault_extract_archive(ctx, coll): return "Failure" -def update(ctx, coll, attr): +def update(ctx: rule.Context, coll: str, attr: Optional[str]) -> None: if pathutil.info(coll).space == pathutil.Space.VAULT and attr not in (constants.IIARCHIVEATTRNAME, constants.UUPROVENANCELOG) and vault_archival_status(ctx, coll) == "archived": avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "update") ctx.dmget(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "OFL") -def vault_update_archive(ctx, coll): +def vault_update_archive(ctx: rule.Context, coll: str) -> str: try: log.write(ctx, "Start update of archived data package <{}>".format(coll)) avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "updating") @@ -279,7 +281,7 @@ def vault_update_archive(ctx, coll): @api.make() -def api_vault_archive(ctx, coll): +def api_vault_archive(ctx: rule.Context, coll: str) -> api.Result: """Request to archive vault data package. :param ctx: Combined type of a callback and rei struct @@ -305,7 +307,7 @@ def api_vault_archive(ctx, coll): @api.make() -def api_vault_archival_status(ctx, coll): +def api_vault_archival_status(ctx: rule.Context, coll: str) -> api.Result: """Request archival status of vault data package. :param ctx: Combined type of a callback and rei struct @@ -317,7 +319,7 @@ def api_vault_archival_status(ctx, coll): @api.make() -def api_vault_extract(ctx, coll): +def api_vault_extract(ctx: rule.Context, coll: str) -> api.Result: """Request to unarchive an archived vault data package. :param ctx: Combined type of a callback and rei struct @@ -343,23 +345,25 @@ def api_vault_extract(ctx, coll): @rule.make(inputs=[0, 1, 2], outputs=[3]) -def rule_vault_archive(ctx, actor, coll, action): +def rule_vault_archive(ctx: rule.Context, actor: str, coll: str, action: str) -> str: if action == "archive": return vault_archive(ctx, actor, coll) elif action == "extract": return vault_unarchive(ctx, actor, coll) + else: + return "Failure" @rule.make(inputs=[0], outputs=[1]) -def rule_vault_create_archive(ctx, coll): +def rule_vault_create_archive(ctx: rule.Context, coll: str) -> str: return vault_create_archive(ctx, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_extract_archive(ctx, coll): +def rule_vault_extract_archive(ctx: rule.Context, coll: str) -> str: return vault_extract_archive(ctx, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_update_archive(ctx, coll): +def rule_vault_update_archive(ctx: rule.Context, coll: str) -> str: return vault_update_archive(ctx, coll) diff --git a/vault_download.py b/vault_download.py index ba99dd1a6..36d5ad710 100644 --- a/vault_download.py +++ b/vault_download.py @@ -14,7 +14,7 @@ 'rule_vault_download_archive'] -def vault_downloadable(ctx, coll): +def vault_downloadable(ctx: rule.Context, coll: str) -> bool: if coll.endswith("/original"): return False @@ -32,17 +32,17 @@ def vault_downloadable(ctx, coll): return False -def vault_bagitor(ctx, coll): +def vault_bagitor(ctx: rule.Context, coll: str) -> str: for row in genquery.row_iterator("META_COLL_ATTR_VALUE", "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = '{}'".format(coll, constants.IIBAGITOR), genquery.AS_LIST, ctx): return row[0] - return False + return "" -def vault_download(ctx, actor, coll): +def vault_download(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for download. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "bagit") @@ -54,7 +54,7 @@ def vault_download(ctx, actor, coll): return "Failure" -def vault_download_archive(ctx, coll): +def vault_download_archive(ctx: rule.Context, coll: str) -> str: if bagit.status(ctx, coll) != "bagit": return "Invalid" try: @@ -84,7 +84,7 @@ def vault_download_archive(ctx, coll): @api.make() -def api_vault_download(ctx, coll): +def api_vault_download(ctx: rule.Context, coll: str) -> api.Result: """Request to download a vault data package. :param ctx: Combined type of a callback and rei struct @@ -109,10 +109,10 @@ def api_vault_download(ctx, coll): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_vault_download(ctx, actor, coll): +def rule_vault_download(ctx: rule.Context, actor: str, coll: str) -> str: return vault_download(ctx, actor, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_download_archive(ctx, coll): +def rule_vault_download_archive(ctx: rule.Context, coll: str) -> str: return vault_download_archive(ctx, coll)