From bbfbaf0e90ee9544ea23a5bd8a9d7da73a97fa77 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 19 Nov 2024 15:47:16 +0100 Subject: [PATCH 01/27] YDA-5992: remove Python 2 lint workflow --- .github/workflows/python2.yml | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 .github/workflows/python2.yml diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml deleted file mode 100644 index 45b3e90d9..000000000 --- a/.github/workflows/python2.yml +++ /dev/null @@ -1,35 +0,0 @@ -name: "Python 2 lint" - -on: - push: - paths-ignore: - - 'tests/**' - pull_request: - paths-ignore: - - 'tests/**' - -jobs: - flake8: - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [2.7] - steps: - - uses: actions/checkout@v4 - - - name: Set up Python - # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python - uses: MatteoH2O1999/setup-python@v3.2.1 - with: - python-version: ${{ matrix.python-version }} - allow-build: info - cache-build: true - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 flake8-import-order codespell - - - name: Lint with flake8 - run: | - flake8 --statistics From 72dcf3328edb5c8bdc3dbcc109eeb94596117b64 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 19 Nov 2024 15:49:17 +0100 Subject: [PATCH 02/27] YDA-5992: Python3 simplifies UTF-8 handling --- json_landing_page.py | 4 +-- research.py | 14 ++++---- util/api.py | 4 +-- util/jsonutil.py | 77 +++----------------------------------------- 4 files changed, 15 insertions(+), 84 deletions(-) diff --git a/json_landing_page.py b/json_landing_page.py index 5a67f9114..03cae032e 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -57,9 +57,7 @@ def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_j """ # Landing page creation is part of the publication process # Read user & system metadata from corresponding combi JSON file - # (Python2) 'want_bytes=False': Do not encode embedded unicode strings as - # UTF-8, as that will trip up jinja2. - json_data = jsonutil.read(ctx, combi_json_path, want_bytes=False) + json_data = jsonutil.read(ctx, combi_json_path) # Remove empty objects to prevent empty fields on landingpage. json_data = misc.remove_empty_objects(json_data) diff --git a/research.py b/research.py index ff61fd41e..60960e0ef 100644 --- a/research.py +++ b/research.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """Functions for the research space.""" -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import genquery @@ -64,7 +64,7 @@ def api_research_folder_add(ctx, coll, new_folder_name): return error_response try: - validate_filepath(coll_target.decode('utf-8')) + validate_filepath(coll_target) except ValidationError: return api.Error('invalid_foldername', 'This is not a valid folder name. Please choose another name for your folder') @@ -125,7 +125,7 @@ def folder_copy_check(ctx, folder_path, new_folder_path, overwrite, copy=True): return False, api.Error('invalid_foldername', 'It is not allowed to use apostrophes in a folder name') try: - validate_filepath(new_folder_path.decode('utf-8')) + validate_filepath(new_folder_path) except ValidationError: return False, api.Error('invalid_foldername', 'This is not a valid folder name. Please choose another name for your folder') @@ -232,7 +232,7 @@ def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): return error_response try: - validate_filepath(coll_target.decode('utf-8')) + validate_filepath(coll_target) except ValidationError: return api.Error('invalid_foldername', 'This is not a valid folder name. Please choose another name for your folder') @@ -376,7 +376,7 @@ def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): # These are of the NEW filepath coll, data_name = pathutil.chop(new_filepath) try: - validate_filename(data_name.decode('utf-8')) + validate_filename(data_name) except Exception: return api.Error('invalid_filename', 'This is not a valid file name. Please choose another name') @@ -439,7 +439,7 @@ def api_research_file_rename(ctx, new_file_name, coll, org_file_name): return api.Error('missing_filename', 'Missing filename. Please add a file name') try: - validate_filename(new_file_name.decode('utf-8')) + validate_filename(new_file_name) except Exception: return api.Error('invalid_filename', 'This is not a valid file name. Please choose another name') @@ -515,7 +515,7 @@ def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): # These are of the NEW filepath coll, data_name = pathutil.chop(new_filepath) try: - validate_filename(data_name.decode('utf-8')) + validate_filename(data_name) except Exception: return api.Error('invalid_filename', 'This is not a valid file name. Please choose another name') diff --git a/util/api.py b/util/api.py index 78cacdbd7..cc2d87b03 100644 --- a/util/api.py +++ b/util/api.py @@ -4,7 +4,7 @@ For example usage, see make(). """ -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import base64 @@ -127,7 +127,7 @@ def bad_request(debug_info=None): try: base64_decoded = base64.b64decode(inp) decompressed_data = zlib.decompress(base64_decoded) - data = jsonutil.parse(decompressed_data.decode('utf-8')) + data = jsonutil.parse(decompressed_data) if type(data) is not OrderedDict: raise jsonutil.ParseError('Argument is not a JSON object') except base64.binascii.Error: diff --git a/util/jsonutil.py b/util/jsonutil.py index 8c627add8..fdc975c36 100644 --- a/util/jsonutil.py +++ b/util/jsonutil.py @@ -17,85 +17,21 @@ class ParseError(error.UUError): - """ - Exception for unparsable JSON text. - - Python2's JSON lib raises ValueError on bad parses, which is ambiguous. - Use this exception (together with the functions below) instead. - (this can also ease transitions to python3, since python3's json already - uses a different, unambiguous exception type: json.JSONDecodeError) - """ - - -def _fold(x, **alg): - """Fold over a JSON structure. - - Calls functions from 'alg', indexed by the type of value, to transform values recursively. - - :param x: JSON structure - :param **alg: Functions to fold over JSON structure - - :returns: Function f folded over a JSON structure - """ - f = alg.get(type(x).__name__, lambda y: y) - if type(x) in [dict, OrderedDict]: - return f(OrderedDict([(k, _fold(v, **alg)) for k, v in x.items()])) - elif type(x) is list: - return f([_fold(v, **alg) for v in x]) - else: - return f(x) - - -def _demote_strings(json_data): - """Transform unicode -> UTF-8 encoded strings recursively, for a given JSON structure. - - Needed for handling unicode in JSON as long as we are still using Python2. - Both JSON string values and JSON object (dict) keys are transformed. + """Exception for unparsable JSON text.""" - :param json_data: JSON structure to transform - :returns: JSON structure with unicode strings transformed to UTF-8 encoded strings - """ - return _fold(json_data, - unicode=lambda x: x.encode('utf-8'), - OrderedDict=lambda x: OrderedDict([(k.encode('utf-8'), v) for k, v in x.items()])) - - -def _promote_strings(json_data): - """Transform UTF-8 encoded strings -> unicode recursively, for a given JSON structure. - - Needed for handling unicode in JSON as long as we are still using Python2. - Both JSON string values and JSON object (dict) keys are transformed. - - May raise UnicodeDecodeError if strings are not proper UTF-8. - - :param json_data: JSON structure to transform - - :returns: JSON structure with UTF-8 encoded strings transformed to unicode strings - """ - return _fold(json_data, - str=lambda x: x.decode('utf-8', errors='replace'), - OrderedDict=lambda x: OrderedDict([(k.decode('utf-8'), v) for k, v in x.items()]), - dict=lambda x: OrderedDict([(k.decode('utf-8'), v) for k, v in x.items()])) - - -def parse(text, want_bytes=True): +def parse(text): """Parse JSON into an OrderedDict. - All strings are UTF-8 encoded with Python2 in mind. - This behavior is disabled if want_bytes is False. - :param text: JSON to parse into an OrderedDict - :param want_bytes: Should strings be UTF-8 encoded? :raises ParseError: JSON file format error :returns: JSON string as OrderedDict """ try: - x = json.loads(text, object_pairs_hook=OrderedDict) - return _demote_strings(x) if want_bytes else x - except ValueError: + return json.loads(text, object_pairs_hook=OrderedDict) + except json.JSONDecodeError: raise ParseError('JSON file format error') @@ -103,12 +39,9 @@ def dump(data, **options): """Dump an object to a JSON string.""" # json.dumps seems to not like mixed str/unicode input, so make sure # everything is of the same type first. - data = _promote_strings(data) return json.dumps(data, ensure_ascii=False, # Don't unnecessarily use \u0000 escapes. - encoding='utf-8', - **({'indent': 4} if options == {} else options)) \ - .encode('utf-8') # turn unicode json string back into an encoded str + **({'indent': 4} if options == {} else options)) def read(callback, path, **options): From 3f572993d83bf93795413fb7d18d666fb90ed150 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 19 Nov 2024 15:57:21 +0100 Subject: [PATCH 03/27] YDA-5992: as of PEP 3120, the default encoding is UTF-8 --- __init__.py | 1 - admin.py | 1 - browse.py | 1 - data_access_token.py | 3 +-- datacite.py | 3 +-- datarequest.py | 1 - deposit.py | 1 - epic.py | 3 +-- folder.py | 1 - groups.py | 1 - groups_import.py | 1 - integration_tests.py | 1 - json_datacite.py | 1 - json_landing_page.py | 1 - mail.py | 3 +-- meta.py | 1 - meta_form.py | 1 - notifications.py | 3 +-- policies.py | 1 - policies_datamanager.py | 3 +-- policies_datapackage_status.py | 3 +-- policies_datarequest_status.py | 3 +-- policies_folder_status.py | 1 - policies_intake.py | 1 - policies_utils.py | 1 - provenance.py | 3 +-- publication.py | 1 - publication_troubleshoot.py | 1 - replication.py | 1 - research.py | 1 - resources.py | 1 - revision_strategies.py | 3 +-- revision_utils.py | 1 - revisions.py | 1 - schema.py | 3 +-- schema_transformation.py | 3 +-- schema_transformations.py | 1 - schema_transformations_utils.py | 1 - settings.py | 3 +-- sram.py | 1 - tools/pep/pep_prototypes.py | 1 - unit-tests/test_group_import.py | 2 -- unit-tests/test_policies.py | 1 - unit-tests/test_revisions.py | 1 - unit-tests/test_schema_transformations.py | 1 - unit-tests/test_util_misc.py | 1 - unit-tests/test_util_pathutil.py | 3 +-- unit-tests/test_util_yoda_names.py | 3 +-- unit-tests/unit_tests.py | 4 +--- util/__init__.py | 1 - util/api.py | 1 - util/arb_data_manager.py | 3 +-- util/avu.py | 1 - util/bagit.py | 3 +-- util/cached_data_manager.py | 4 +--- util/collection.py | 3 +-- util/config.py | 1 - util/constants.py | 1 - util/data_object.py | 1 - util/error.py | 3 +-- util/genquery_col_constants.py | 1 - util/group.py | 3 +-- util/irods_type_info.py | 1 - util/jsonutil.py | 1 - util/log.py | 3 +-- util/misc.py | 1 - util/msi.py | 1 - util/pathutil.py | 3 +-- util/policy.py | 3 +-- util/resource.py | 3 +-- util/rule.py | 3 +-- util/user.py | 1 - util/yoda_names.py | 4 +--- vault.py | 1 - vault_archive.py | 1 - vault_download.py | 1 - 76 files changed, 28 insertions(+), 108 deletions(-) diff --git a/__init__.py b/__init__.py index 92c63a291..1172214ca 100644 --- a/__init__.py +++ b/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Yoda core ruleset containing iRODS and Python rules and policies useful for all Yoda environments.""" __version__ = '1.10.0' diff --git a/admin.py b/admin.py index 3bd77571e..b869cf880 100644 --- a/admin.py +++ b/admin.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for admin module.""" __copyright__ = 'Copyright 2024, Utrecht University' diff --git a/browse.py b/browse.py index 117a5670c..e3bfae8bb 100644 --- a/browse.py +++ b/browse.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for listing collection information.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/data_access_token.py b/data_access_token.py index 8410d48f1..29afd5537 100644 --- a/data_access_token.py +++ b/data_access_token.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for token management.""" -__copyright__ = 'Copyright (c) 2021, Utrecht University' +__copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import os diff --git a/datacite.py b/datacite.py index 51a7a95d1..4dd2f47c4 100644 --- a/datacite.py +++ b/datacite.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for communicating with DataCite and some utilities.""" -__copyright__ = 'Copyright (c) 2019-2022, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import random diff --git a/datarequest.py b/datarequest.py index 02d482ce9..521d91b80 100644 --- a/datarequest.py +++ b/datarequest.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions to handle data requests.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/deposit.py b/deposit.py index fd2596762..038461111 100644 --- a/deposit.py +++ b/deposit.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for deposit module.""" __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' diff --git a/epic.py b/epic.py index 4333c32b1..335c9288b 100644 --- a/epic.py +++ b/epic.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for communicating with EPIC and some utilities.""" -__copyright__ = 'Copyright (c) 2019, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import uuid diff --git a/folder.py b/folder.py index 519069e18..9278a2798 100644 --- a/folder.py +++ b/folder.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions to act on user-visible folders in the research or vault area.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/groups.py b/groups.py index f4dfbd69d..7bb383344 100644 --- a/groups.py +++ b/groups.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for group management and group queries.""" __copyright__ = 'Copyright (c) 2018-2024, Utrecht University' diff --git a/groups_import.py b/groups_import.py index c1c876d68..1933c95fd 100644 --- a/groups_import.py +++ b/groups_import.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions related to importing group data.""" __copyright__ = 'Copyright (c) 2018-2024, Utrecht University' diff --git a/integration_tests.py b/integration_tests.py index f92f4c0dd..696e3a107 100644 --- a/integration_tests.py +++ b/integration_tests.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Integration tests for the development environment.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/json_datacite.py b/json_datacite.py index 04bbefafa..2ee72f0dc 100644 --- a/json_datacite.py +++ b/json_datacite.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for transforming Yoda JSON to DataCite 4.4 JSON.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/json_landing_page.py b/json_landing_page.py index 03cae032e..87062cf06 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for transforming JSON to landingpage HTML.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/mail.py b/mail.py index fc1a99c00..bf8f6afad 100644 --- a/mail.py +++ b/mail.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Rules for sending e-mails.""" -__copyright__ = 'Copyright (c) 2020-2022, Utrecht University' +__copyright__ = 'Copyright (c) 2020-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import email diff --git a/meta.py b/meta.py index 797162ec5..bba63b57b 100644 --- a/meta.py +++ b/meta.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """JSON metadata handling.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/meta_form.py b/meta_form.py index 2de46d63f..abbc274ec 100644 --- a/meta_form.py +++ b/meta_form.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """JSON metadata form handling.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/notifications.py b/notifications.py index babf34997..20623026c 100644 --- a/notifications.py +++ b/notifications.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for user notifications.""" -__copyright__ = 'Copyright (c) 2021-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' diff --git a/policies.py b/policies.py index a2048a119..b02f39056 100644 --- a/policies.py +++ b/policies.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """iRODS policy implementations.""" __copyright__ = 'Copyright (c) 2020-2024, Utrecht University' diff --git a/policies_datamanager.py b/policies_datamanager.py index 65007b7f6..cfa51f54f 100644 --- a/policies_datamanager.py +++ b/policies_datamanager.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Policy check functions for datamanager actions.""" -__copyright__ = 'Copyright (c) 2019-2022, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' from util import * diff --git a/policies_datapackage_status.py b/policies_datapackage_status.py index d1a8c7ea0..e4d5bbd34 100644 --- a/policies_datapackage_status.py +++ b/policies_datapackage_status.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Policy check functions for data package status transitions.""" -__copyright__ = 'Copyright (c) 2019-2022, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import folder diff --git a/policies_datarequest_status.py b/policies_datarequest_status.py index 6a41b6a83..16e3bc482 100644 --- a/policies_datarequest_status.py +++ b/policies_datarequest_status.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Policy check functions for datarequest status transitions.""" -__copyright__ = "Copyright (c) 2019-2020, Utrecht University" +__copyright__ = "Copyright (c) 2019-2024, Utrecht University" __license__ = "GPLv3, see LICENSE" import re diff --git a/policies_folder_status.py b/policies_folder_status.py index 9f96adb8c..f9483928f 100644 --- a/policies_folder_status.py +++ b/policies_folder_status.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Policy check functions for folder status transitions.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/policies_intake.py b/policies_intake.py index 159ddbca9..3981bbfeb 100644 --- a/policies_intake.py +++ b/policies_intake.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Policies for intake.""" __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' diff --git a/policies_utils.py b/policies_utils.py index a85424eb9..a3f9e98eb 100644 --- a/policies_utils.py +++ b/policies_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """iRODS policy utility functions""" __copyright__ = 'Copyright (c) 2024, Utrecht University' diff --git a/provenance.py b/provenance.py index f0485fe5b..171cb8493 100644 --- a/provenance.py +++ b/provenance.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for provenance handling.""" -__copyright__ = 'Copyright (c) 2019-2021, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import json diff --git a/publication.py b/publication.py index d97507197..c86faceb4 100644 --- a/publication.py +++ b/publication.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for publication.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/publication_troubleshoot.py b/publication_troubleshoot.py index 6ceafe737..4350888fc 100644 --- a/publication_troubleshoot.py +++ b/publication_troubleshoot.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions and rules for troubleshooting published data packages.""" __copyright__ = 'Copyright (c) 2024, Utrecht University' diff --git a/replication.py b/replication.py index eac2e93d1..492c5077a 100644 --- a/replication.py +++ b/replication.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for replication management.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/research.py b/research.py index 60960e0ef..a906abba3 100644 --- a/research.py +++ b/research.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for the research space.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/resources.py b/resources.py index e0bc935b7..f93ea0e07 100644 --- a/resources.py +++ b/resources.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for statistics module.""" __copyright__ = 'Copyright (c) 2018-2024, Utrecht University' diff --git a/revision_strategies.py b/revision_strategies.py index bfd8b5fb7..dbae94b0c 100644 --- a/revision_strategies.py +++ b/revision_strategies.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- """Functions for revision strategies, which control which revisions are kept and which ones are to be discarded.""" -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' diff --git a/revision_utils.py b/revision_utils.py index 57646712b..3aee91890 100644 --- a/revision_utils.py +++ b/revision_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utility functions for revision management.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/revisions.py b/revisions.py index e1e3d4d8a..217bedc5b 100644 --- a/revisions.py +++ b/revisions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for revision management.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/schema.py b/schema.py index fe621e8c0..6ce9d4707 100644 --- a/schema.py +++ b/schema.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for finding the active schema.""" -__copyright__ = 'Copyright (c) 2018-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2018-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import re diff --git a/schema_transformation.py b/schema_transformation.py index 77299ada3..59758c75a 100644 --- a/schema_transformation.py +++ b/schema_transformation.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for handling schema updates within any yoda-metadata file.""" -__copyright__ = 'Copyright (c) 2018-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2018-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' __all__ = ['rule_batch_transform_vault_metadata', diff --git a/schema_transformations.py b/schema_transformations.py index 5e6bd9ad9..0ec48d0ad 100644 --- a/schema_transformations.py +++ b/schema_transformations.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """JSON schema transformation functions.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/schema_transformations_utils.py b/schema_transformations_utils.py index d5cf58f68..16207dd96 100644 --- a/schema_transformations_utils.py +++ b/schema_transformations_utils.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """JSON schema transformation utility functions.""" __copyright__ = 'Copyright (c) 2024, Utrecht University' diff --git a/settings.py b/settings.py index bd0cbe8d8..24bc673ab 100644 --- a/settings.py +++ b/settings.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions for user settings.""" -__copyright__ = 'Copyright (c) 2021, Utrecht University' +__copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' from genquery import Query diff --git a/sram.py b/sram.py index 3bd0e8c71..13fbbe827 100644 --- a/sram.py +++ b/sram.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for communicating with SRAM and some utilities.""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' diff --git a/tools/pep/pep_prototypes.py b/tools/pep/pep_prototypes.py index 6fceedc8f..ea82eb070 100644 --- a/tools/pep/pep_prototypes.py +++ b/tools/pep/pep_prototypes.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- from util import policy # Use below prototypes as a basis for PEP rule implementations in policies.py diff --git a/unit-tests/test_group_import.py b/unit-tests/test_group_import.py index 53ab77e58..ddfcf2fa7 100644 --- a/unit-tests/test_group_import.py +++ b/unit-tests/test_group_import.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Unit tests for the groups functionality """ diff --git a/unit-tests/test_policies.py b/unit-tests/test_policies.py index 1a9e61000..4d8ac896b 100644 --- a/unit-tests/test_policies.py +++ b/unit-tests/test_policies.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Unit tests for the policies""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' diff --git a/unit-tests/test_revisions.py b/unit-tests/test_revisions.py index 522aab1cb..3fbfca485 100644 --- a/unit-tests/test_revisions.py +++ b/unit-tests/test_revisions.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Unit tests for the revision functions""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' diff --git a/unit-tests/test_schema_transformations.py b/unit-tests/test_schema_transformations.py index d273365ca..0fe6dd888 100644 --- a/unit-tests/test_schema_transformations.py +++ b/unit-tests/test_schema_transformations.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Unit tests for the correctify functions in schema_transformations""" __copyright__ = 'Copyright (c) 2024, Utrecht University' diff --git a/unit-tests/test_util_misc.py b/unit-tests/test_util_misc.py index 428fa33e8..51c63fe99 100644 --- a/unit-tests/test_util_misc.py +++ b/unit-tests/test_util_misc.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Unit tests for the misc utils module""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' diff --git a/unit-tests/test_util_pathutil.py b/unit-tests/test_util_pathutil.py index 179a21a5f..9e6d406ca 100644 --- a/unit-tests/test_util_pathutil.py +++ b/unit-tests/test_util_pathutil.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Unit tests for the pathutil utils module""" -__copyright__ = 'Copyright (c) 2023, Utrecht University' +__copyright__ = 'Copyright (c) 2023-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import sys diff --git a/unit-tests/test_util_yoda_names.py b/unit-tests/test_util_yoda_names.py index 4201aee6a..4d2b79dcf 100644 --- a/unit-tests/test_util_yoda_names.py +++ b/unit-tests/test_util_yoda_names.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Unit tests for the yoda_names utils functions""" -__copyright__ = 'Copyright (c) 2023, Utrecht University' +__copyright__ = 'Copyright (c) 2023-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import sys diff --git a/unit-tests/unit_tests.py b/unit-tests/unit_tests.py index 8af940d91..4d6cb011c 100644 --- a/unit-tests/unit_tests.py +++ b/unit-tests/unit_tests.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- - -__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' +ยท__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' from unittest import makeSuite, TestSuite diff --git a/util/__init__.py b/util/__init__.py index 8828ec69d..2353fecbb 100644 --- a/util/__init__.py +++ b/util/__init__.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Generic UU ruleset utility functions and types. This subpackage does not export any callable rules by itself. diff --git a/util/api.py b/util/api.py index cc2d87b03..a31e1910d 100644 --- a/util/api.py +++ b/util/api.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions for creating API rules. For example usage, see make(). diff --git a/util/arb_data_manager.py b/util/arb_data_manager.py index a059f4bd3..769af9801 100644 --- a/util/arb_data_manager.py +++ b/util/arb_data_manager.py @@ -1,10 +1,9 @@ -# -*- coding: utf-8 -*- """This file contain functions that implement cached data storage for automatic resource balancing, which takes care of ensuring that new data objects are put on resources that have enough space available. """ -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import genquery diff --git a/util/avu.py b/util/avu.py index 0098fcea4..1ccb9f7e5 100644 --- a/util/avu.py +++ b/util/avu.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for dealing with AVUs.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/bagit.py b/util/bagit.py index 187e50953..528c2ad7d 100644 --- a/util/bagit.py +++ b/util/bagit.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Functions to copy packages to the vault and manage permissions of vault packages.""" -__copyright__ = 'Copyright (c) 2023, Utrecht University' +__copyright__ = 'Copyright (c) 2023-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import itertools diff --git a/util/cached_data_manager.py b/util/cached_data_manager.py index 6c8a9666d..157e2c8cb 100644 --- a/util/cached_data_manager.py +++ b/util/cached_data_manager.py @@ -1,6 +1,4 @@ -# -*- coding: utf-8 -*- - -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import traceback diff --git a/util/collection.py b/util/collection.py index f5e3a7e52..23d0ae0dc 100644 --- a/util/collection.py +++ b/util/collection.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for dealing with collections.""" -__copyright__ = 'Copyright (c) 2019-2021, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import itertools diff --git a/util/config.py b/util/config.py index 4827294c1..88af1c625 100644 --- a/util/config.py +++ b/util/config.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Yoda ruleset configuration.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/constants.py b/util/constants.py index a50949100..0b76ebaee 100644 --- a/util/constants.py +++ b/util/constants.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Constants that apply to all Yoda environments.""" __copyright__ = 'Copyright (c) 2016-2024, Utrecht University' diff --git a/util/data_object.py b/util/data_object.py index 73fed02c2..f8c3cb2ae 100644 --- a/util/data_object.py +++ b/util/data_object.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for data object IO.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/error.py b/util/error.py index ee0cda852..44b9b0e01 100644 --- a/util/error.py +++ b/util/error.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Common UU Error/Exception types.""" -__copyright__ = 'Copyright (c) 2019, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' diff --git a/util/genquery_col_constants.py b/util/genquery_col_constants.py index 19fd610d2..a58ad5133 100644 --- a/util/genquery_col_constants.py +++ b/util/genquery_col_constants.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """GenQuery column constants Adapted from ./lib/core/include/rodsGenQuery.h in main iRODS repository.""" diff --git a/util/group.py b/util/group.py index 7ea8d4c10..ee6de5c88 100644 --- a/util/group.py +++ b/util/group.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for querying group info.""" -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import genquery diff --git a/util/irods_type_info.py b/util/irods_type_info.py index e9d2f4939..b39aedcee 100644 --- a/util/irods_type_info.py +++ b/util/irods_type_info.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Allows converting certain irods types to string representation for debugging purposes. Importing this module (anywhere) adds stringifyability to some frequently-used diff --git a/util/jsonutil.py b/util/jsonutil.py index fdc975c36..93ae7729e 100644 --- a/util/jsonutil.py +++ b/util/jsonutil.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for dealing with JSON.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/log.py b/util/log.py index 545e626ca..9b5d70e36 100644 --- a/util/log.py +++ b/util/log.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Logging facilities.""" -__copyright__ = 'Copyright (c) 2019-2022, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import inspect diff --git a/util/misc.py b/util/misc.py index 73b05d2e6..005a45f3a 100644 --- a/util/misc.py +++ b/util/misc.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Miscellaneous util functions.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/msi.py b/util/msi.py index f18d28781..3e9826313 100644 --- a/util/msi.py +++ b/util/msi.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """iRODS microservice wrappers that provide primitive error handling. Microservices may fail and indicate failure in a number of different ways. diff --git a/util/pathutil.py b/util/pathutil.py index 32d2b6164..5b2ab2fda 100644 --- a/util/pathutil.py +++ b/util/pathutil.py @@ -1,9 +1,8 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for dealing with paths.""" # (ideally this module would be named 'path', but name conflicts cause too much pain) -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import re diff --git a/util/policy.py b/util/policy.py index d4f8b4306..0916e6c64 100644 --- a/util/policy.py +++ b/util/policy.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Utilities for creating PEP rules.""" -__copyright__ = 'Copyright (c) 2019, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import api diff --git a/util/resource.py b/util/resource.py index f54faca5a..66ed0183d 100644 --- a/util/resource.py +++ b/util/resource.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for dealing with resources.""" -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import genquery diff --git a/util/rule.py b/util/rule.py index 702234d74..983661e24 100644 --- a/util/rule.py +++ b/util/rule.py @@ -1,7 +1,6 @@ -# -*- coding: utf-8 -*- """Experimental Python/Rule interface code.""" -__copyright__ = 'Copyright (c) 2019, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import json diff --git a/util/user.py b/util/user.py index 28725672b..887cc96e3 100644 --- a/util/user.py +++ b/util/user.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Utility / convenience functions for querying user info.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/util/yoda_names.py b/util/yoda_names.py index 5d51296fc..f1f295565 100644 --- a/util/yoda_names.py +++ b/util/yoda_names.py @@ -1,9 +1,7 @@ -# -*- coding: utf-8 -*- - """This class contains utility functions that process names of Yoda entities (e.g. category names, user names, etc.) """ -__copyright__ = 'Copyright (c) 2019-2023, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' import re diff --git a/vault.py b/vault.py index aa288a30a..8e1b1df4e 100644 --- a/vault.py +++ b/vault.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions to copy packages to the vault and manage permissions of vault packages.""" __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' diff --git a/vault_archive.py b/vault_archive.py index ef4e46c9b..96be89a0f 100644 --- a/vault_archive.py +++ b/vault_archive.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions to archive vault data packages.""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' diff --git a/vault_download.py b/vault_download.py index 3e180135b..ba99dd1a6 100644 --- a/vault_download.py +++ b/vault_download.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- """Functions to download vault data packages.""" __copyright__ = 'Copyright (c) 2023-2024, Utrecht University' From 75408637713e307fbd1152768e05ffc1b06a376b Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 19 Nov 2024 16:17:21 +0100 Subject: [PATCH 04/27] YDA-5992: remove unicode literals, percent string formatting and update regex --- browse.py | 12 ++--- datarequest.py | 92 ++++++++++++++++----------------- deposit.py | 4 +- integration_tests.py | 4 +- mail.py | 2 +- meta.py | 2 +- meta_form.py | 2 +- replication.py | 2 +- revision_strategies.py | 2 +- revisions.py | 2 +- schema_transformation.py | 8 +-- schema_transformations.py | 4 +- schema_transformations_utils.py | 2 +- sram.py | 2 +- unit-tests/unit_tests.py | 2 +- util/api.py | 2 +- util/arb_data_manager.py | 4 +- util/cached_data_manager.py | 2 +- util/config.py | 8 +-- util/msi.py | 2 +- util/policy.py | 4 +- util/rule.py | 2 +- vault.py | 4 +- 23 files changed, 85 insertions(+), 85 deletions(-) diff --git a/browse.py b/browse.py index e3bfae8bb..5b3d4ccee 100644 --- a/browse.py +++ b/browse.py @@ -39,7 +39,7 @@ def api_browse_folder(ctx, """ def transform(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} if 'DATA_NAME' in x and 'META_DATA_ATTR_VALUE' in x: return {x['DATA_NAME']: x['META_DATA_ATTR_VALUE']} elif 'DATA_NAME' in x: @@ -128,7 +128,7 @@ def api_browse_collections(ctx, """ def transform(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} if 'DATA_NAME' in x: return {'name': x['DATA_NAME'], @@ -205,7 +205,7 @@ def api_search(ctx, """ def transform(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} if 'DATA_NAME' in x: _, _, path, subpath = pathutil.info(x['COLL_NAME']) @@ -231,8 +231,8 @@ def transform(row): # Status description must be kept in tact. if search_type != 'status': search_string = search_string.replace("\\", "\\\\") - search_string = search_string.replace("%", "\%") - search_string = search_string.replace("_", "\_") + search_string = search_string.replace("%", r"\%") + search_string = search_string.replace("_", r"\_") zone = user.zone(ctx) @@ -295,7 +295,7 @@ def _filter_vault_deposit_index(row): :returns: boolean value that indicates whether row should be displayed """ # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} # Filter out deposit vault index collection return not re.match("^/[^/]+/home/vault-[^/]+/deposit-[^/]+/index$", x['COLL_NAME']) diff --git a/datarequest.py b/datarequest.py index 521d91b80..df1d2611d 100644 --- a/datarequest.py +++ b/datarequest.py @@ -615,7 +615,7 @@ def datarequest_provenance_write(ctx, request_id, request_status): :returns: Nothing """ # Check if request ID is valid - if re.search("^\d+$", request_id) is None: + if re.search(r"^\d+$", request_id) is None: return api.Error("input_error", "Invalid request ID supplied: {}.".format(request_id)) # Check if status parameter is valid @@ -766,7 +766,7 @@ def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limi def transform(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} return {'id': x['COLL_NAME'].split('/')[-1], 'name': x['COLL_OWNER_NAME'], @@ -775,14 +775,14 @@ def transform(row): def transform_title(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} return {'id': x['COLL_NAME'].split('/')[-1], 'title': x['META_DATA_ATTR_VALUE']} def transform_status(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} return {'id': x['COLL_NAME'].split('/')[-1], 'status': x['META_DATA_ATTR_VALUE']} @@ -2393,14 +2393,14 @@ def data_ready_emails(ctx, request_id): def mail_datarequest_researcher(ctx, truncated_title, resubmission, researcher_email, researcher_name, request_id, cc, dao): - subject = u"YOUth data request {} (\"{}\") (data assessment only): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted") if dao else u"YOUth data request {} (\"{}\"): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted") + subject = "YOUth data request {} (\"{}\") (data assessment only): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted") if dao else "YOUth data request {} (\"{}\"): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted") return mail.send(ctx, to=researcher_email, cc=cc, actor=user.full_name(ctx), subject=subject, - body=u"""Dear {}, + body="""Dear {}, Your data request has been submitted. @@ -2419,8 +2419,8 @@ def mail_datarequest_pm(ctx, truncated_title, resubmission, pm_email, request_id return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted"), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted"), + body="""Dear project manager, A new data request has been submitted. @@ -2444,8 +2444,8 @@ def mail_datarequest_dao_pm(ctx, truncated_title, resubmission, pm_email, reques return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\") (data assessment only): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted"), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\") (data assessment only): {}".format(request_id, truncated_title, "resubmitted" if resubmission else "submitted"), + body="""Dear project manager, A new data request (for the purpose of data assessment only) has been submitted. @@ -2467,8 +2467,8 @@ def mail_preliminary_review_accepted(ctx, truncated_title, datamanager_email, re return mail.send(ctx, to=datamanager_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): accepted for data manager review".format(request_id, truncated_title), - body=u"""Dear data manager, + subject="YOUth data request {} (\"{}\"): accepted for data manager review".format(request_id, truncated_title), + body="""Dear data manager, Data request {} has been approved for review by the YOUth project manager. @@ -2485,8 +2485,8 @@ def mail_datamanager_review_accepted(ctx, truncated_title, pm_email, request_id) return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): accepted by data manager".format(request_id, truncated_title), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): accepted by data manager".format(request_id, truncated_title), + body="""Dear project manager, Data request {} has been accepted by the data manager. @@ -2501,8 +2501,8 @@ def mail_datamanager_review_resubmit(ctx, truncated_title, pm_email, datamanager return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): rejected (resubmit) by data manager".format(request_id, truncated_title), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): rejected (resubmit) by data manager".format(request_id, truncated_title), + body="""Dear project manager, Data request {} has been rejected (resubmission allowed) by the data manager for the following reason(s): @@ -2519,8 +2519,8 @@ def mail_datamanager_review_rejected(ctx, truncated_title, pm_email, datamanager return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): rejected by data manager".format(request_id, truncated_title), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): rejected by data manager".format(request_id, truncated_title), + body="""Dear project manager, Data request {} has been rejected by the data manager for the following reason(s): @@ -2538,8 +2538,8 @@ def mail_assignment_accepted_researcher(ctx, truncated_title, researcher_email, to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): under review".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): under review".format(request_id, truncated_title), + body="""Dear {}, Your data request has passed a preliminary assessment and is now under review. @@ -2555,8 +2555,8 @@ def mail_assignment_accepted_assignee(ctx, truncated_title, assignee_email, prop return mail.send(ctx, to=assignee_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): assigned".format(request_id, truncated_title), - body=u"""Dear DAC member, + subject="YOUth data request {} (\"{}\"): assigned".format(request_id, truncated_title), + body="""Dear DAC member, Data request {} (proposal title: \"{}\") has been assigned to you for review. Please sign in to Yoda to view the data request and submit your review within {} days. @@ -2572,8 +2572,8 @@ def mail_review_researcher(ctx, truncated_title, researcher_email, researcher_na to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): reviewed".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): reviewed".format(request_id, truncated_title), + body="""Dear {}, Your data request been reviewed by the YOUth Data Access Committee and is awaiting final evaluation by the YOUth project manager. @@ -2588,8 +2588,8 @@ def mail_review_pm(ctx, truncated_title, pm_email, request_id): return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): reviewed".format(request_id, truncated_title), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): reviewed".format(request_id, truncated_title), + body="""Dear project manager, Data request {} has been reviewed by the YOUth Data Access Committee and is awaiting your final evaluation. @@ -2606,8 +2606,8 @@ def mail_evaluation_approved_researcher(ctx, truncated_title, researcher_email, to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): approved".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): approved".format(request_id, truncated_title), + body="""Dear {}, Congratulations! Your data request has been approved. You are now asked to preregister your study in the YOUth Open Science Framework preregistry. To do so, please navigate to the preregistration form using this link: https://{}/datarequest/preregister/{}. @@ -2620,8 +2620,8 @@ def mail_preregistration_submit(ctx, truncated_title, pm_email, request_id): return mail.send(ctx, to=pm_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): preregistration submitted".format(request_id, truncated_title), - body=u"""Dear project manager, + subject="YOUth data request {} (\"{}\"): preregistration submitted".format(request_id, truncated_title), + body="""Dear project manager, Data request {} has been preregistered by the researcher. You are now asked to review and confirm the preregistration. The following link will take you directly to the data request, where you may confirm the preregistration: https://{}/datarequest/view/{}. @@ -2634,8 +2634,8 @@ def mail_datarequest_approved_dm(ctx, truncated_title, reviewing_dm, datamanager return mail.send(ctx, to=datamanager_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): approved".format(request_id, truncated_title), - body=u"""Dear data manager, + subject="YOUth data request {} (\"{}\"): approved".format(request_id, truncated_title), + body="""Dear data manager, Data request {} has been approved by the YOUth project manager (and has passed the data manager review of {}). Please sign in to Yoda to upload a Data Transfer Agreement for the researcher. @@ -2650,8 +2650,8 @@ def mail_datarequest_approved_dao_dm(ctx, truncated_title, datamanager_email, re return mail.send(ctx, to=datamanager_email, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\") (data assessment only): approved".format(request_id, truncated_title), - body=u"""Dear data manager, + subject="YOUth data request {} (\"{}\") (data assessment only): approved".format(request_id, truncated_title), + body="""Dear data manager, Data request {} has been approved by the YOUth project manager. Please sign in to Yoda to upload a Data Transfer Agreement for the researcher. @@ -2667,8 +2667,8 @@ def mail_datarequest_approved_researcher(ctx, truncated_title, researcher_email, to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=(u"YOUth data request {} (\"{}\") (data assessment only): approved".format(request_id, truncated_title) if dao else "YOUth data request {} (\"{}\"): preregistration approved".format(request_id, truncated_title)), - body=u"""Dear {}, + subject=("YOUth data request {} (\"{}\") (data assessment only): approved".format(request_id, truncated_title) if dao else "YOUth data request {} (\"{}\"): preregistration approved".format(request_id, truncated_title)), + body="""Dear {}, The preregistration of your data request has been approved. The YOUth data manager will now create a Data Transfer Agreement for you to sign. You will be notified when it is ready. @@ -2685,8 +2685,8 @@ def mail_resubmit(ctx, truncated_title, researcher_email, researcher_name, feedb to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): rejected (resubmit)".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): rejected (resubmit)".format(request_id, truncated_title), + body="""Dear {}, Your data request has been rejected for the following reason(s): @@ -2710,8 +2710,8 @@ def mail_rejected(ctx, truncated_title, researcher_email, researcher_name, feedb to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): rejected".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): rejected".format(request_id, truncated_title), + body="""Dear {}, Your data request has been rejected for the following reason(s): @@ -2731,8 +2731,8 @@ def mail_dta(ctx, truncated_title, researcher_email, researcher_name, request_id to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): DTA ready".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): DTA ready".format(request_id, truncated_title), + body="""Dear {}, The YOUth data manager has created a Data Transfer Agreement to formalize the transfer of the data you have requested. Please sign in to Yoda to download and read the Data Transfer Agreement. @@ -2750,8 +2750,8 @@ def mail_signed_dta(ctx, truncated_title, authoring_dm, datamanager_email, reque to=datamanager_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): DTA signed".format(request_id, truncated_title), - body=u"""Dear data manager, + subject="YOUth data request {} (\"{}\"): DTA signed".format(request_id, truncated_title), + body="""Dear data manager, The researcher has uploaded a signed copy of the Data Transfer Agreement for data request {}. The DTA was authored by {}. @@ -2769,8 +2769,8 @@ def mail_data_ready(ctx, truncated_title, researcher_email, researcher_name, req to=researcher_email, cc=cc, actor=user.full_name(ctx), - subject=u"YOUth data request {} (\"{}\"): data ready".format(request_id, truncated_title), - body=u"""Dear {}, + subject="YOUth data request {} (\"{}\"): data ready".format(request_id, truncated_title), + body="""Dear {}, The data you have requested has been made available to you within a new folder in Yoda. You can access the data through the webportal in the "research" area or you can connect Yoda as a network drive and access the data through your file explorer. For information on how to access the data, see https://www.uu.nl/en/research/yoda/guide-to-yoda/i-want-to-start-using-yoda diff --git a/deposit.py b/deposit.py index 038461111..f8b2d1973 100644 --- a/deposit.py +++ b/deposit.py @@ -75,7 +75,7 @@ def api_deposit_copy_data_package(ctx, reference, deposit_group): # Register to delayed rule queue. ctx.delayExec( "1s", - "iiCopyFolderToResearch('%s', '%s')" % (coll_data_package, coll_target), + "iiCopyFolderToResearch('{}', '{}')".format(coll_data_package, coll_target), "") return {"data": new_deposit_path} @@ -217,7 +217,7 @@ def api_deposit_overview(ctx, """ def transform(row): # Remove ORDER_BY etc. wrappers from column names. - x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()} + x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} deposit_size = collection.size(ctx, x['COLL_NAME']) diff --git a/integration_tests.py b/integration_tests.py index 696e3a107..c65cea82d 100644 --- a/integration_tests.py +++ b/integration_tests.py @@ -553,10 +553,10 @@ def _test_folder_secure_func(ctx, func): "check": lambda x: x}, {"name": "folder.determine_new_vault_target.research", "test": lambda ctx: folder.determine_new_vault_target(ctx, "/tempZone/home/research-initial/testdata"), - "check": lambda x: re.match("^\/tempZone\/home\/vault-initial\/testdata\[[0-9]*\]$", x) is not None}, + "check": lambda x: re.match(r"^\/tempZone\/home\/vault-initial\/testdata\[[0-9]*\]$", x) is not None}, {"name": "folder.determine_new_vault_target.deposit", "test": lambda ctx: folder.determine_new_vault_target(ctx, "/tempZone/home/deposit-pilot/deposit-hi[123123]"), - "check": lambda x: re.match("^\/tempZone\/home\/vault-pilot\/deposit-hi\[[0-9]*\]\[[0-9]*\]$", x) is not None}, + "check": lambda x: re.match(r"^\/tempZone\/home\/vault-pilot\/deposit-hi\[[0-9]*\]\[[0-9]*\]$", x) is not None}, {"name": "folder.determine_new_vault_target.invalid", "test": lambda ctx: folder.determine_new_vault_target(ctx, "/tempZone/home/not-research-group-not-exist/folder-not-exist"), "check": lambda x: x == ""}, diff --git a/mail.py b/mail.py index bf8f6afad..5c8435d76 100644 --- a/mail.py +++ b/mail.py @@ -36,7 +36,7 @@ def send(ctx, to, actor, subject, body, cc=None): log.write(ctx, 'Ignoring invalid destination <{}>'.format(to)) return # Silently ignore obviously invalid destinations (mimic old behavior). - log.write(ctx, u'Sending mail for <{}> to <{}>, subject <{}>'.format(actor, to, subject)) + log.write(ctx, 'Sending mail for <{}> to <{}>, subject <{}>'.format(actor, to, subject)) cfg = {k: getattr(config, v) for k, v in [('from', 'notifications_sender_email'), diff --git a/meta.py b/meta.py index bba63b57b..19805c338 100644 --- a/meta.py +++ b/meta.py @@ -687,7 +687,7 @@ def set_result(msg_short, msg_long): keys = meta_diff[i].keys() if keys: for item in keys: - m = re.match("root\['(.*?)'\]", item) + m = re.match(r"root\['(.*?)'\]", item) if m: item_list[action].append(m.group(1).replace('_', ' ')) diff --git a/meta_form.py b/meta_form.py index abbc274ec..add877018 100644 --- a/meta_form.py +++ b/meta_form.py @@ -88,7 +88,7 @@ def humanize_validation_error(e): # Get the names of disallowed extra fields. # (the jsonschema library isn't of much help here - we must extract it from the message) - if e['validator'] == u'additionalProperties' and len(path_out) == 0: + if e['validator'] == 'additionalProperties' and len(path_out) == 0: m = re.search('[\'\"]([^\"\']+)[\'\"] was unexpected', e['message']) if m: return 'This extra field is not allowed: ' + m.group(1) diff --git a/replication.py b/replication.py index 492c5077a..0dc7e07ac 100644 --- a/replication.py +++ b/replication.py @@ -66,7 +66,7 @@ def replicate_asynchronously(ctx, path, source_resource, target_resource): # CAT_SQL_ERROR: this AVU is already present. No need to set it anymore. pass else: - error_status = re.search("status \[(.*?)\]", str(e)) + error_status = re.search(r"status \[(.*?)\]", str(e)) log.write(ctx, "Schedule replication of data object {} failed with error {}".format(path, error_status.group(1))) diff --git a/revision_strategies.py b/revision_strategies.py index dbae94b0c..567cc16b0 100644 --- a/revision_strategies.py +++ b/revision_strategies.py @@ -63,7 +63,7 @@ def get_revision_strategy(strategy_name): raise ValueError('Strategy "{}" is not supported'.format(strategy_name)) -class RevisionStrategy(object): +class RevisionStrategy: HOURS = 3600 DAYS = 86400 WEEKS = 604800 diff --git a/revisions.py b/revisions.py index 217bedc5b..063d925a3 100644 --- a/revisions.py +++ b/revisions.py @@ -309,7 +309,7 @@ def resource_modified_post_revision(ctx, resource, zone, path): # CAT_SQL_ERROR: this AVU is already present. No need to set it anymore. pass else: - error_status = re.search("status \[(.*?)\]", str(e)) + error_status = re.search(r"status \[(.*?)\]", str(e)) log.write(ctx, "Schedule revision of data object {} failed with error {}".format(path, error_status.group(1))) diff --git a/schema_transformation.py b/schema_transformation.py index 59758c75a..81bef3270 100644 --- a/schema_transformation.py +++ b/schema_transformation.py @@ -202,7 +202,7 @@ def rule_batch_transform_vault_metadata(rule_args, callback, rei): log.write(callback, "[METADATA] Executing transformation for: " + metadata_path) execute_transformation(callback, metadata_path, transform) except Exception as e: - log.write(callback, "[METADATA] Exception occurred during schema transformation of %s: %s" % (coll_name, str(type(e)) + ":" + str(e))) + log.write(callback, "[METADATA] Exception occurred during schema transformation of {}: {}".format(coll_name, str(type(e)) + ":" + str(e))) # Sleep briefly between checks. time.sleep(pause) @@ -290,7 +290,7 @@ def rule_batch_vault_metadata_correct_orcid_format(rule_args, callback, rei): log.write(callback, "Would have transformed ORCIDs for: %s if dry run mode was disabled." % (metadata_path)) except Exception as e: - log.write(callback, "Exception occurred during ORCID transformation of %s: %s" % (coll_name, str(type(e)) + ":" + str(e))) + log.write(callback, "Exception occurred during ORCID transformation of {}: {}".format(coll_name, str(type(e)) + ":" + str(e))) # Sleep briefly between checks. time.sleep(pause) @@ -335,7 +335,7 @@ def transform_orcid(ctx, m): if corrected_orcid is None: log.write(ctx, "Warning: unable to automatically fix ORCID '%s'" % (original_orcid)) elif corrected_orcid != original_orcid: - log.write(ctx, "Updating ORCID '%s' to '%s'" % (original_orcid, corrected_orcid)) + log.write(ctx, "Updating ORCID '{}' to '{}'".format(original_orcid, corrected_orcid)) pi['Name_Identifier'] = corrected_orcid data_changed = True @@ -370,7 +370,7 @@ def html(f): """ description = '\n'.join(map(lambda paragraph: '

{}

'.format( # Trim whitespace. - re.sub('\s+', ' ', paragraph).strip()), + re.sub(r'\s+', ' ', paragraph).strip()), # Docstring paragraphs are separated by blank lines. re.split('\n{2,}', f.__doc__))) diff --git a/schema_transformations.py b/schema_transformations.py index 0ec48d0ad..d61f324eb 100644 --- a/schema_transformations.py +++ b/schema_transformations.py @@ -143,7 +143,7 @@ def _default2_default3(ctx, m): # Check Scopus elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)': # Check for incorrect Scopus format. - if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)): + if not re.search(r"^\d{1,11}$", person_identifier.get('Name_Identifier', None)): corrected_scopus = correctify_scopus(person_identifier['Name_Identifier']) # Only if an actual correction took place change the value and mark this data as 'changed'. if corrected_scopus is None: @@ -202,7 +202,7 @@ def _default2_default3(ctx, m): # Check Scopus elif person_identifier.get('Name_Identifier_Scheme', None) == 'Author identifier (Scopus)': # Check for incorrect Scopus format. - if not re.search("^\d{1,11}$", person_identifier.get('Name_Identifier', None)): + if not re.search(r"^\d{1,11}$", person_identifier.get('Name_Identifier', None)): corrected_scopus = correctify_scopus(person_identifier['Name_Identifier']) # Only if an actual correction took place change the value and mark this data as 'changed'. if corrected_scopus is None: diff --git a/schema_transformations_utils.py b/schema_transformations_utils.py index 16207dd96..2e5e499b5 100644 --- a/schema_transformations_utils.py +++ b/schema_transformations_utils.py @@ -28,7 +28,7 @@ def correctify_scopus(org_scopus): # Get rid of all spaces. new_scopus = org_scopus.replace(' ', '') - if not re.search("^\d{1,11}$", new_scopus): + if not re.search(r"^\d{1,11}$", new_scopus): return None return new_scopus diff --git a/sram.py b/sram.py index 13fbbe827..ad06db8dc 100644 --- a/sram.py +++ b/sram.py @@ -223,7 +223,7 @@ def invitation_mail_group_add_user(ctx, group_name, username, co_identifier): to=username, cc='', actor=user.full_name(ctx), - subject=(u"Invitation to join collaboration {}".format(group_name)), + subject=("Invitation to join collaboration {}".format(group_name)), body="""Dear {}, You have been invited by {} to join a collaboration page. diff --git a/unit-tests/unit_tests.py b/unit-tests/unit_tests.py index 4d6cb011c..d3ba6a4f0 100644 --- a/unit-tests/unit_tests.py +++ b/unit-tests/unit_tests.py @@ -1,4 +1,4 @@ -ยท__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' +__copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' from unittest import makeSuite, TestSuite diff --git a/util/api.py b/util/api.py index a31e1910d..fd893fc62 100644 --- a/util/api.py +++ b/util/api.py @@ -19,7 +19,7 @@ from error import * -class Result(object): +class Result: """API result.""" def __init__(self, data=None, status='ok', info=None, debug_info=None): diff --git a/util/arb_data_manager.py b/util/arb_data_manager.py index 769af9801..b8beb012a 100644 --- a/util/arb_data_manager.py +++ b/util/arb_data_manager.py @@ -26,7 +26,7 @@ def get(self, ctx, keyname): :returns: data for this key (arb_status) """ - value = super(ARBDataManager, self).get(ctx, keyname) + value = super().get(ctx, keyname) return constants.arb_status[value] def put(self, ctx, keyname, data): @@ -36,7 +36,7 @@ def put(self, ctx, keyname, data): :param keyname: name of the key :param data: data for this key (arb_status) """ - super(ARBDataManager, self).put(ctx, keyname, data.value) + super().put(ctx, keyname, data.value) def _get_context_string(self): """ :returns: a string that identifies the particular type of data manager diff --git a/util/cached_data_manager.py b/util/cached_data_manager.py index 157e2c8cb..f5c415cc5 100644 --- a/util/cached_data_manager.py +++ b/util/cached_data_manager.py @@ -6,7 +6,7 @@ import redis -class CachedDataManager(object): +class CachedDataManager: """This class contains a framework that subclasses can use to create a manager for cached data. The basic idea is that the subclass defines functions to access some data (e.g. in AVUs diff --git a/util/config.py b/util/config.py index 88af1c625..762b94366 100644 --- a/util/config.py +++ b/util/config.py @@ -6,7 +6,7 @@ # Config class {{{ -class Config(object): +class Config: """Stores configuration info, accessible through attributes (config.foo). Valid options are determined at __init__ time. @@ -33,7 +33,7 @@ def freeze(self): def __setattr__(self, k, v): if k.startswith('_'): - return super(Config, self).__setattr__(k, v) + return super().__setattr__(k, v) if self._frozen: print('Ruleset configuration error: No config changes possible to \'{}\''.format(k)) return @@ -45,7 +45,7 @@ def __setattr__(self, k, v): def __getattr__(self, k): if k.startswith('_'): - return super(Config, self).__getattr__(k) + return super().__getattr__(k) try: return self._items[k] except KeyError: @@ -182,7 +182,7 @@ def __repr__(self): else: setattr(config, *m.groups()) -except IOError: +except OSError: # Ignore, config file is optional. pass diff --git a/util/msi.py b/util/msi.py index 3e9826313..42170467c 100644 --- a/util/msi.py +++ b/util/msi.py @@ -17,7 +17,7 @@ class Error(error.UUError): """Error for microservice failure.""" def __init__(self, message, msi_status, msi_code, msi_args, src_exception): - super(Error, self).__init__(message) + super().__init__(message) # Store msi result, if any. # These may be None when an msi aborts in an abnormal way. self.msi_status = msi_status diff --git a/util/policy.py b/util/policy.py index 0916e6c64..edf3523ac 100644 --- a/util/policy.py +++ b/util/policy.py @@ -8,7 +8,7 @@ import rule -class Succeed(object): +class Succeed: """Policy function result, indicates success. Evaluates to True in boolean context. @@ -22,7 +22,7 @@ def __bool__(self): __nonzero__ = __bool__ -class Fail(object): +class Fail: """Policy function result, indicates failure. As a result, the PEP-instrumented operation will be aborted, and diff --git a/util/rule.py b/util/rule.py index 983661e24..05f3a186c 100644 --- a/util/rule.py +++ b/util/rule.py @@ -7,7 +7,7 @@ from enum import Enum -class Context(object): +class Context: """Combined type of a callback and rei struct. `Context` can be treated as a rule engine callback for all intents and purposes. diff --git a/vault.py b/vault.py index 8e1b1df4e..275567ffd 100644 --- a/vault.py +++ b/vault.py @@ -233,7 +233,7 @@ def api_vault_copy_to_research(ctx, coll_origin, coll_target): ctx.delayExec( "%ds" % delay, - "iiCopyFolderToResearch('%s', '%s')" % (coll_origin, coll_target), + "iiCopyFolderToResearch('{}', '{}')".format(coll_origin, coll_target), "") # TODO: response nog veranderen @@ -291,7 +291,7 @@ def api_vault_unpreservable_files(ctx, coll, list_name): # Data names -> lowercase extensions, without the dot. exts = set(list(itertools.imap(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) - exts -= set(['']) + exts -= {''} # Return any ext that is not in the preservable list. return list(exts - preservable_formats) From d67314ef8fb3ca5fbcf7e93bf59335939d1af512 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 19 Nov 2024 16:20:44 +0100 Subject: [PATCH 05/27] YDA-5992: put underscores in access_types --- folder.py | 2 +- iiVault.r | 6 +++--- meta.py | 2 +- revisions.py | 2 +- schema_transformation.py | 4 ++-- tools/check-acl.r | 8 ++++---- tools/check-revisions-acl.r | 8 ++++---- tools/create-revision-stores.r | 6 +++--- tools/edit-vault-metadata.py | 2 +- tools/process-datarequest-actions.r | 4 ++-- tools/process-vault-status-transitions.r | 4 ++-- uuBatch.r | 2 +- vault.py | 4 ++-- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/folder.py b/folder.py index 9278a2798..bec8e82d5 100644 --- a/folder.py +++ b/folder.py @@ -354,7 +354,7 @@ def set_last_run_time(ctx, coll): def set_can_modify(ctx, coll): """Check if have permission to modify, set if necessary""" - check_access_result = msi.check_access(ctx, coll, 'modify object', irods_types.BytesBuf()) + check_access_result = msi.check_access(ctx, coll, 'modify_object', irods_types.BytesBuf()) modify_access = check_access_result['arguments'][2] if modify_access != b'\x01': # This allows us permission to copy the files diff --git a/iiVault.r b/iiVault.r index 570c5ef0e..132ad078d 100644 --- a/iiVault.r +++ b/iiVault.r @@ -16,7 +16,7 @@ # iiIngestObject(*itemParent, *itemName, *itemIsCollection, *buffer, *error) { *sourcePath = "*itemParent/*itemName"; - msiCheckAccess(*sourcePath, "read object", *readAccess); + msiCheckAccess(*sourcePath, "read_object", *readAccess); if (*readAccess != 1) { *error = errorcode(msiSetACL("default", "admin:read", uuClientFullName, *sourcePath)); if (*error < 0) { @@ -144,10 +144,10 @@ iiCopyACLsFromParent(*path, *recursiveFlag) { if (*accessName == "own") { writeLine("serverLog", "iiCopyACLsFromParent: granting own to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "own", *userName, *path); - } else if (*accessName == "read object") { + } else if (*accessName == "read_object") { writeLine("serverLog", "iiCopyACLsFromParent: granting read to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "read", *userName, *path); - } else if (*accessName == "modify object") { + } else if (*accessName == "modify_object") { writeLine("serverLog", "iiCopyACLsFromParent: granting write to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "write", *userName, *path); } diff --git a/meta.py b/meta.py index 19805c338..499f1e2bf 100644 --- a/meta.py +++ b/meta.py @@ -627,7 +627,7 @@ def set_result(msg_short, msg_long): prev_json_data = json.loads(json.dumps(prev_json)) try: - ret = msi.check_access(ctx, json_path, 'modify object', irods_types.BytesBuf()) + ret = msi.check_access(ctx, json_path, 'modify_object', irods_types.BytesBuf()) if ret['arguments'][2] != b'\x01': msi.set_acl(ctx, 'default', 'admin:own', client_full_name, json_path) except error.UUError: diff --git a/revisions.py b/revisions.py index 063d925a3..0a874da58 100644 --- a/revisions.py +++ b/revisions.py @@ -586,7 +586,7 @@ def revision_create(ctx, print_verbose, data_id, resource, group_name, revision_ rev_filename = basename + "_" + iso8601 + data_owner rev_coll = revision_store + "/" + coll_id - read_access = msi.check_access(ctx, path, 'read object', irods_types.BytesBuf())['arguments'][2] + read_access = msi.check_access(ctx, path, 'read_object', irods_types.BytesBuf())['arguments'][2] if read_access != b'\x01': try: msi.set_acl(ctx, "default", "read", "rods#{}".format(user.zone(ctx)), path) diff --git a/schema_transformation.py b/schema_transformation.py index 81bef3270..6cd3470ab 100644 --- a/schema_transformation.py +++ b/schema_transformation.py @@ -144,10 +144,10 @@ def copy_acls_from_parent(ctx, path, recursive_flag): if access_name == "own": log.write(ctx, "iiCopyACLsFromParent: granting own to <" + user_name + "> on <" + path + "> with recursiveFlag <" + recursive_flag + ">") msi.set_acl(ctx, recursive_flag, "own", user_name, path) - elif access_name == "read object": + elif access_name == "read_object": log.write(ctx, "iiCopyACLsFromParent: granting read to <" + user_name + "> on <" + path + "> with recursiveFlag <" + recursive_flag + ">") msi.set_acl(ctx, recursive_flag, "read", user_name, path) - elif access_name == "modify object": + elif access_name == "modify_object": log.write(ctx, "iiCopyACLsFromParent: granting write to <" + user_name + "> on <" + path + "> with recursiveFlag <" + recursive_flag + ">") msi.set_acl(ctx, recursive_flag, "write", user_name, path) diff --git a/tools/check-acl.r b/tools/check-acl.r index 1c85be9ef..35784a842 100644 --- a/tools/check-acl.r +++ b/tools/check-acl.r @@ -74,9 +74,9 @@ getCollAccess(*coll) { if (*name == "own") { *access.own = *access.own ++ "%" ++ *userId; *access."*userId" = "own"; - } else if (*name == "read object") { + } else if (*name == "read_object") { *access."*userId" = "read"; - } else if (*name == "modify object") { + } else if (*name == "modify_object") { *access."*userId" = "write"; } } @@ -96,9 +96,9 @@ getDataAccess(*coll, *data) { if (*name == "own") { *access.own = *access.own ++ "%" ++ *userId; *access."*userId" = "own"; - } else if (*name == "read object") { + } else if (*name == "read_object") { *access."*userId" = "read"; - } else if (*name == "modify object") { + } else if (*name == "modify_object") { *access."*userId" = "write"; } } diff --git a/tools/check-revisions-acl.r b/tools/check-revisions-acl.r index 5a147c82c..47bbd7b50 100644 --- a/tools/check-revisions-acl.r +++ b/tools/check-revisions-acl.r @@ -68,9 +68,9 @@ getCollAccess(*coll) { if (*name == "own") { *access.own = *access.own ++ "%" ++ *userId; *access."*userId" = "own"; - } else if (*name == "read object") { + } else if (*name == "read_object") { *access."*userId" = "read"; - } else if (*name == "modify object") { + } else if (*name == "modify_object") { *access."*userId" = "write"; } } @@ -90,9 +90,9 @@ getDataAccess(*coll, *data) { if (*name == "own") { *access.own = *access.own ++ "%" ++ *userId; *access."*userId" = "own"; - } else if (*name == "read object") { + } else if (*name == "read_object") { *access."*userId" = "read"; - } else if (*name == "modify object") { + } else if (*name == "modify_object") { *access."*userId" = "write"; } } diff --git a/tools/create-revision-stores.r b/tools/create-revision-stores.r index f77a433db..38893c685 100755 --- a/tools/create-revision-stores.r +++ b/tools/create-revision-stores.r @@ -22,7 +22,7 @@ createRevisionStores { # rodsadmin needs at least read access to research group to copy data # unfortunately msiCheckAccess does not check for group membership, but it won't be a problem # when we add a user level ACL. - msiCheckAccess(*groupColl, "read object", *readPermission); + msiCheckAccess(*groupColl, "read_object", *readPermission); if (*readPermission == 0) { writeLine("stdout", "Granting read access to *groupColl"); @@ -56,7 +56,7 @@ createRevisionStores { # We need read access on the original object # unfortunately msiCheckAccess does not check for group membership, but it won't be a problem # when we add a user level ACL. - msiCheckAccess(*path, "read object", *objectReadPermission); + msiCheckAccess(*path, "read_object", *objectReadPermission); if (*objectReadPermission == 0) { writeLine("stdout", "Granting read access to *path"); msiSetACL("default", "admin:read", uuClientFullName, *path); @@ -98,7 +98,7 @@ createRevisionStores { } if (!*skip) { - msiCheckAccess(*path, "read object", *objectReadPermission); + msiCheckAccess(*path, "read_object", *objectReadPermission); if (*objectReadPermission == 0) { writeLine("stdout", "Granting read access to *path"); msiSetACL("default", "admin:read", uuClientFullName, *path); diff --git a/tools/edit-vault-metadata.py b/tools/edit-vault-metadata.py index d099f7ebb..21552bc7a 100755 --- a/tools/edit-vault-metadata.py +++ b/tools/edit-vault-metadata.py @@ -111,7 +111,7 @@ def get_dataobject_acls(path: str) -> List[Tuple[str, str]]: match = re.search(r"^ ACL - ([\S\s]+)$", line) if match: acl_line = match.group(1) - for acl_entry in acl_line.replace("read object", "read").replace("g:", "").split(): + for acl_entry in acl_line.replace("read_object", "read").replace("g:", "").split(): (acl_group, acl_priv) = acl_entry.split(":") acl_clean_group = acl_group.split("#")[0] results.append((acl_clean_group, acl_priv)) diff --git a/tools/process-datarequest-actions.r b/tools/process-datarequest-actions.r index 638bfd510..f62f825e1 100644 --- a/tools/process-datarequest-actions.r +++ b/tools/process-datarequest-actions.r @@ -54,8 +54,8 @@ processDatarequestActions() { *statusInfo = ""; } - # Check if rods can modify metadata and grant temporary write ACL if necessary - msiCheckAccess(*collName, "modify metadata", *modifyPermission); + # Check if rods can modify_metadata and grant temporary write ACL if necessary + msiCheckAccess(*collName, "modify_metadata", *modifyPermission); if (*modifyPermission == 0) { writeLine("stdout", "Granting write access to *collName"); msiSetACL("default", "admin:write", uuClientFullName, *collName); diff --git a/tools/process-vault-status-transitions.r b/tools/process-vault-status-transitions.r index ee35a86cb..ddce4228e 100644 --- a/tools/process-vault-status-transitions.r +++ b/tools/process-vault-status-transitions.r @@ -55,8 +55,8 @@ processVaultActions() { rule_vault_process_status_transitions(*folder, *action, *actor, *previous_version, *status, *statusInfo); *status = 'Success'; - # Check if rods can modify metadata and grant temporary write ACL if necessary. - msiCheckAccess(*collName, "modify metadata", *modifyPermission); + # Check if rods can modify_metadata and grant temporary write ACL if necessary. + msiCheckAccess(*collName, "modify_metadata", *modifyPermission); if (*modifyPermission == 0) { writeLine("stdout", "Granting write access to *collName"); msiSetACL("default", "admin:write", uuClientFullName, *collName); diff --git a/uuBatch.r b/uuBatch.r index 78116e887..24586fc8a 100644 --- a/uuBatch.r +++ b/uuBatch.r @@ -24,7 +24,7 @@ verifyChecksumBatch(*start, *max, *update) { } verifyChecksumData(*path, *chksum, *update) { - msiCheckAccess(*path, "read object", *access); + msiCheckAccess(*path, "read_object", *access); if (*access == 0) { msiSetACL("default", "admin:read", uuClientFullName, *path); } diff --git a/vault.py b/vault.py index 275567ffd..db10f10ef 100644 --- a/vault.py +++ b/vault.py @@ -1022,7 +1022,7 @@ def treewalk_and_ingest(ctx, folder, target, origin, error): def ingest_object(ctx, parent, item, item_is_collection, destination, origin): source_path = parent + "/" + item - read_access = msi.check_access(ctx, source_path, 'read object', irods_types.BytesBuf())['arguments'][2] + read_access = msi.check_access(ctx, source_path, 'read_object', irods_types.BytesBuf())['arguments'][2] # TODO use set_acl_check? if read_access != b'\x01': @@ -1119,7 +1119,7 @@ def set_vault_permissions(ctx, coll, target): for row in iter: access_name = row[0] - if access_name != "read object": + if access_name != "read_object": # Grant the research group read-only access to the collection to enable browsing through the vault. for name in valid_read_groups: try: From 1397ecf9026e1beb74c694760f7b59b27ccb3c2e Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 20 Nov 2024 11:02:59 +0100 Subject: [PATCH 06/27] YDA-5992: Python 3 returns iterators from map() and filter() --- browse.py | 8 ++++---- datarequest.py | 34 +++++++++++++++++----------------- deposit.py | 2 +- json_landing_page.py | 4 ++-- meta.py | 16 ++++++++-------- policies_utils.py | 2 +- util/irods_type_info.py | 2 +- util/rule.py | 4 ++-- vault.py | 2 +- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/browse.py b/browse.py index 5b3d4ccee..f9c02f0aa 100644 --- a/browse.py +++ b/browse.py @@ -88,11 +88,11 @@ def transform(row): qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), offset=offset, limit=limit, output=AS_DICT) - colls = map(transform, [c for c in list(qcoll) if _filter_vault_deposit_index(c)]) + colls = list(map(transform, [c for c in list(qcoll) if _filter_vault_deposit_index(c)])) qdata = Query(ctx, dcols, "COLL_NAME = '{}' AND DATA_REPL_STATUS n> '0'".format(coll), offset=max(0, offset - qcoll.total_rows()), limit=limit - len(colls), output=AS_DICT) - datas = map(transform, list(qdata)) + datas = list(map(transform, list(qdata))) # No results at all? Make sure the collection actually exists. if len(colls) + len(datas) == 0 and not collection.exists(ctx, coll): @@ -172,7 +172,7 @@ def transform(row): qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll), offset=offset, limit=limit, output=AS_DICT) - colls = map(transform, [d for d in list(qcoll) if _filter_vault_deposit_index(d)]) + colls = list(map(transform, [d for d in list(qcoll) if _filter_vault_deposit_index(d)])) # No results at all? Make sure the collection actually exists. if len(colls) == 0 and not collection.exists(ctx, coll): @@ -279,7 +279,7 @@ def transform(row): qdata = Query(ctx, cols, where, offset=max(0, int(offset)), limit=int(limit), case_sensitive=query_is_case_sensitive, output=AS_DICT) - datas = map(transform, [d for d in list(qdata) if _filter_vault_deposit_index(d)]) + datas = list(map(transform, [d for d in list(qdata) if _filter_vault_deposit_index(d)])) return OrderedDict([('total', qdata.total_rows()), ('items', datas)]) diff --git a/datarequest.py b/datarequest.py index df1d2611d..50e1aaff8 100644 --- a/datarequest.py +++ b/datarequest.py @@ -379,7 +379,7 @@ def api_datarequest_action_permitted(ctx, request_id, roles, statuses): if statuses is not None: def get_status(stat): return status[stat] - statuses = map(get_status, statuses) + statuses = list(map(get_status, statuses)) return datarequest_action_permitted(ctx, request_id, roles, statuses) @@ -830,9 +830,9 @@ def transform_status(row): return OrderedDict([('total', 0), ('items', [])]) # Merge datarequest title and status into results. - colls = map(transform, list(qcoll)) - colls_title = map(transform_title, list(qcoll_title)) - colls_status = map(transform_status, list(qcoll_status)) + colls = list(map(transform, list(qcoll))) + colls_title = list(map(transform_title, list(qcoll_title))) + colls_status = list(map(transform_status, list(qcoll_status))) for datarequest in colls: for datarequest_title in colls_title: if datarequest_title['id'] == datarequest['id']: @@ -1183,7 +1183,7 @@ def get_filename(file_path): # Return list of attachment filepaths coll_path = "/{}/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id, ATTACHMENTS_PATHNAME) - return map(get_filename, list(collection.data_objects(ctx, coll_path))) + return list(map(get_filename, list(collection.data_objects(ctx, coll_path)))) @api.make() @@ -1397,7 +1397,7 @@ def datarequest_dac_members_get(ctx, request_id): :returns: List of DAC members """ - dac_members = map(lambda member: member[0], group.members(ctx, GROUP_DAC)) + dac_members = list(map(lambda member: member[0], group.members(ctx, GROUP_DAC))) request_owner = datarequest_owner_get(ctx, request_id) if request_owner in dac_members: dac_members.remove(request_owner) @@ -1487,9 +1487,9 @@ def assign_request(ctx, assignees, request_id): # Grant read permissions on relevant files of data request attachments = datarequest_attachments_get(ctx, request_id) - attachments = map(lambda attachment: ATTACHMENTS_PATHNAME + "/" + attachment, attachments) + attachments = list(map(lambda attachment: ATTACHMENTS_PATHNAME + "/" + attachment, attachments)) for assignee in json.loads(assignees): - for doc in map(lambda filename: filename + JSON_EXT, [DATAREQUEST, PR_REVIEW, DM_REVIEW]) + attachments: + for doc in list(map(lambda filename: filename + JSON_EXT, [DATAREQUEST, PR_REVIEW, DM_REVIEW])) + attachments: file_path = "{}/{}".format(coll_path, doc) ctx.adminTempWritePermission(file_path, "grantread", "{}#{}".format(assignee, user.zone(ctx))) @@ -1575,8 +1575,8 @@ def api_datarequest_review_submit(ctx, data, request_id): # Write form data to disk try: - readers = [GROUP_PM] + map(lambda reviewer: reviewer + "#" + user.zone(ctx), - datarequest_reviewers_get(ctx, request_id)) + readers = [GROUP_PM] + list(map(lambda reviewer: reviewer + "#" + user.zone(ctx), + datarequest_reviewers_get(ctx, request_id))) file_write_and_lock(ctx, coll_path, REVIEW + "_{}".format(user.name(ctx)) + JSON_EXT, data, readers) except error.UUError as e: return api.Error('write_error', 'Could not write review data to disk: {}.'.format(e)) @@ -1683,8 +1683,8 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): # Write form data to disk try: - readers = [GROUP_PM] + map(lambda reviewer: reviewer + "#" + user.zone(ctx), - datarequest_reviewers_get(ctx, request_id)) + readers = [GROUP_PM] + list(map(lambda reviewer: reviewer + "#" + user.zone(ctx), + datarequest_reviewers_get(ctx, request_id))) file_write_and_lock(ctx, coll_path, EVALUATION + JSON_EXT, data, readers) except error.UUError: return api.Error('write_error', 'Could not write evaluation data to disk') @@ -2209,7 +2209,7 @@ def preliminary_review_emails(ctx, request_id, datarequest_status): researcher = datarequest['contact']['principal_investigator'] researcher_email = datarequest_owner_get(ctx, request_id) cc = cc_email_addresses_get(datarequest['contact']) - pm_email, _ = filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM))[0] + pm_email, _ = list(filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM)))[0] preliminary_review = json.loads(datarequest_preliminary_review_get(ctx, request_id)) feedback_for_researcher = preliminary_review['feedback_for_researcher'] @@ -2264,7 +2264,7 @@ def assignment_emails(ctx, request_id, datarequest_status): status.REJECTED_AFTER_DATAMANAGER_REVIEW): # Get additional email input parameters feedback_for_researcher = assignment['feedback_for_researcher'] - pm_email, _ = filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM))[0] + pm_email, _ = list(filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM)))[0] # Send emails if datarequest_status == status.RESUBMIT_AFTER_DATAMANAGER_REVIEW: @@ -2300,7 +2300,7 @@ def evaluation_emails(ctx, request_id, datarequest_status): cc = cc_email_addresses_get(datarequest['contact']) evaluation = json.loads(datarequest_evaluation_get(ctx, request_id)) feedback_for_researcher = evaluation.get('feedback_for_researcher', '') - pm_email, _ = filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM))[0] + pm_email, _ = list(filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM)))[0] truncated_title = truncated_title_get(ctx, request_id) # Send emails @@ -2354,7 +2354,7 @@ def dta_post_upload_actions_emails(ctx, request_id): researcher_email = datarequest_owner_get(ctx, request_id) cc = cc_email_addresses_get(datarequest['contact']) # (Also) cc project manager - pm_email, _ = filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM))[0] + pm_email, _ = list(filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM)))[0] cc = cc + ',{}'.format(pm_email) if cc else pm_email truncated_title = truncated_title_get(ctx, request_id) @@ -2366,7 +2366,7 @@ def signed_dta_post_upload_actions_emails(ctx, request_id): # Get (source data for) email input parameters datamanager_members = group.members(ctx, GROUP_DM) authoring_dm = data_object.owner(ctx, datarequest_dta_path_get(ctx, request_id))[0] - cc, _ = pm_email, _ = filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM))[0] + cc, _ = pm_email, _ = list(filter(lambda x: x[0] != "rods", group.members(ctx, GROUP_PM)))[0] truncated_title = truncated_title_get(ctx, request_id) # Send email diff --git a/deposit.py b/deposit.py index f8b2d1973..69161a9f2 100644 --- a/deposit.py +++ b/deposit.py @@ -282,7 +282,7 @@ def transform(row): qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}' AND COLL_NAME not like '/{}/home/vault-%' AND COLL_NAME not like '/{}/home/grp-vault-%'".format(coll_name, zone, zone), offset=offset, limit=limit, output=AS_DICT) - colls = map(transform, list(qcoll)) + colls = list(map(transform, list(qcoll))) all_colls += colls return OrderedDict([('total', len(all_colls)), diff --git a/json_landing_page.py b/json_landing_page.py index 87062cf06..674865a27 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -124,8 +124,8 @@ def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_j language = "" language_id = json_data["Language"] # Convert just the language schemas to unicode to handle when a language has non-ascii characters (like Volapรผk) - schema_lang_ids = map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enum"]) - schema_lang_names = map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enumNames"]) + schema_lang_ids = list(map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enum"])) + schema_lang_names = list(map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enumNames"])) index = schema_lang_ids.index(language_id) # Language variable must be kept in unicode, otherwise landing page fails to build with a language with non-ascii characters language = schema_lang_names[index] diff --git a/meta.py b/meta.py index 499f1e2bf..52b51ff93 100644 --- a/meta.py +++ b/meta.py @@ -31,22 +31,22 @@ def metadata_get_links(metadata): if 'links' not in metadata or type(metadata['links']) is not list: return [] - return filter(lambda x: type(x) in (dict, OrderedDict) - and 'rel' in x - and 'href' in x - and type(x['rel']) is str - and type(x['href']) is str, - metadata['links']) + return list(filter(lambda x: type(x) in (dict, OrderedDict) + and 'rel' in x + and 'href' in x + and type(x['rel']) is str + and type(x['href']) is str, + metadata['links'])) def metadata_get_schema_id(metadata): - desc = filter(lambda x: x['rel'] == 'describedby', metadata_get_links(metadata)) + desc = list(filter(lambda x: x['rel'] == 'describedby', metadata_get_links(metadata))) if len(desc) > 0: return desc[0]['href'] def metadata_set_schema_id(metadata, schema_id): - other_links = filter(lambda x: x['rel'] != 'describedby', metadata_get_links(metadata)) + other_links = list(filter(lambda x: x['rel'] != 'describedby', metadata_get_links(metadata))) metadata['links'] = [OrderedDict([ ['rel', 'describedby'], diff --git a/policies_utils.py b/policies_utils.py index a3f9e98eb..0d97901e0 100644 --- a/policies_utils.py +++ b/policies_utils.py @@ -30,7 +30,7 @@ def _column_in_select_inp(selectInp, columns): def _column_in_cond_inp(sqlCondInp, columns): condition_data = ast.literal_eval(str(sqlCondInp)) - condition_columns = map(lambda c: c[0], condition_data) + condition_columns = list(map(lambda c: c[0], condition_data)) for column in columns: if column in condition_columns: return True diff --git a/util/irods_type_info.py b/util/irods_type_info.py index b39aedcee..a317e8640 100644 --- a/util/irods_type_info.py +++ b/util/irods_type_info.py @@ -43,7 +43,7 @@ def pyify(x): def col_name(i): - return filter(lambda kv: kv[1] == i, cols)[0][0] + return list(filter(lambda kv: kv[1] == i, cols))[0][0] cols =\ diff --git a/util/rule.py b/util/rule.py index 05f3a186c..c553e48e6 100644 --- a/util/rule.py +++ b/util/rule.py @@ -92,12 +92,12 @@ def r(rule_args, callback, rei): if result is None: return - result = map(transform, list(result) if type(result) is tuple else [result]) + result = list(map(transform, list(result) if type(result) is tuple else [result])) if handler is Output.STORE: if outputs is None: # outputs not specified? overwrite all arguments. - rule_args[:] = map(encode_val, result) + rule_args[:] = list(map(encode_val, result)) else: # set specific output arguments. for i, x in zip(outputs, result): diff --git a/vault.py b/vault.py index db10f10ef..6f45028af 100644 --- a/vault.py +++ b/vault.py @@ -1665,7 +1665,7 @@ def api_vault_get_published_packages(ctx, path): # Sort by publication date sorted_publ = [sorted(x, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f")) for x in grouped_base_dois] - latest_publ = map(lambda x: x[-1], sorted_publ) + latest_publ = list(map(lambda x: x[-1], sorted_publ)) # Append to data package for items in latest_publ: From 71ea79d39d7bcc64b8668202f79197b4abe995ba Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 20 Nov 2024 11:15:04 +0100 Subject: [PATCH 07/27] YDA-5992: range() does not return a list --- datarequest.py | 2 +- mail.py | 2 +- notifications.py | 2 +- publication.py | 8 ++++---- vault.py | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/datarequest.py b/datarequest.py index 50e1aaff8..71b98fd14 100644 --- a/datarequest.py +++ b/datarequest.py @@ -694,7 +694,7 @@ def cc_email_addresses_get(contact_object): return None -@rule.make(inputs=range(0), outputs=range(2)) +@rule.make(inputs=[], outputs=[0, 1]) def rule_datarequest_review_period_expiration_check(ctx): coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) criteria = "COLL_PARENT_NAME = '{}' AND DATA_NAME = '{}' AND META_DATA_ATTR_NAME = 'endOfReviewPeriod' AND META_DATA_ATTR_VALUE < '{}' AND META_DATA_ATTR_NAME = 'status' AND META_DATA_ATTR_VALUE = 'UNDER_REVIEW'".format(coll, DATAREQUEST + JSON_EXT, int(time.time())) diff --git a/mail.py b/mail.py index 5c8435d76..98e9025f9 100644 --- a/mail.py +++ b/mail.py @@ -116,7 +116,7 @@ def wrapper(ctx, to, actor, subject, body): return '0', '' -@rule.make(inputs=range(1), outputs=range(1, 3)) +@rule.make(inputs=[0], outputs=[1, 2]) def rule_mail_test(ctx, to): if not user.is_admin(ctx): return api.Error('not_allowed', 'Only rodsadmin can send test mail') diff --git a/notifications.py b/notifications.py index 20623026c..ae45d3ee7 100644 --- a/notifications.py +++ b/notifications.py @@ -182,7 +182,7 @@ def send_notification(ctx, to, actor, message): """.format(message, config.yoda_portal_fqdn, config.yoda_portal_fqdn)) -@rule.make(inputs=range(2), outputs=range(2, 4)) +@rule.make(inputs=[0, 1], outputs=[2, 3]) def rule_mail_notification_report(ctx, to, notifications): if not user.is_admin(ctx): return api.Error('not_allowed', 'Only rodsadmin can send test mail') diff --git a/publication.py b/publication.py index c86faceb4..522605497 100644 --- a/publication.py +++ b/publication.py @@ -1314,7 +1314,7 @@ def process_republication(ctx, vault_package): return publication_state["status"] -@rule.make(inputs=range(4)) +@rule.make(inputs=[0, 1, 2, 3]) def rule_update_publication(ctx, vault_package, update_datacite, update_landingpage, update_moai): """Rule interface for updating the publication of a vault package. @@ -1565,15 +1565,15 @@ def get_all_versions(ctx, path, doi): """Rule interface for processing publication of a vault package.""" -rule_process_publication = rule.make(inputs=range(1), outputs=range(1, 3))(process_publication) +rule_process_publication = rule.make(inputs=[0], outputs=[1, 2])(process_publication) """Rule interface for processing depublication of a vault package.""" -rule_process_depublication = rule.make(inputs=range(1), outputs=range(1, 3))(process_depublication) +rule_process_depublication = rule.make(inputs=[0], outputs=[1, 2])(process_depublication) """Rule interface for processing republication of a vault package.""" -rule_process_republication = rule.make(inputs=range(1), outputs=range(1, 3))(process_republication) +rule_process_republication = rule.make(inputs=[0], outputs=[1, 2])(process_republication) @rule.make() diff --git a/vault.py b/vault.py index 6f45028af..256636812 100644 --- a/vault.py +++ b/vault.py @@ -1297,7 +1297,7 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): return '0' if no_errors else '1' -@rule.make(inputs=range(4), outputs=range(4, 6)) +@rule.make(inputs=[0, 1, 2, 3], outputs=[4, 5]) def rule_vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous_version): """Rule interface for processing vault status transition request. From 99de46b93ff0600eaebbba33a2c6a130924260b7 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 20 Nov 2024 11:19:27 +0100 Subject: [PATCH 08/27] YDA-5992: write string to severLog instead of line since logs are in JSON --- groups.py | 12 ++++++------ iiDatamanagerPolicies.r | 6 +++--- iiFolderStatusTransitions.r | 14 +++++++------- iiMetadata.r | 8 ++++---- iiVault.r | 16 ++++++++-------- tools/add-epic-pids.r | 4 ++-- tools/check-metadata-for-schema-updates.r | 2 +- tools/create-revision-stores.r | 4 ++-- tools/generate-data-package-references.r | 14 +++++++------- tools/mail/mail-daily-report.r | 2 +- tools/mail/mail-weekly-report.r | 2 +- tools/metadata/export-vault-metadata.r | 4 ++-- tools/metadata/vault-correct-orcid-format.r | 2 +- tools/modify-data-object.r | 12 ++++++------ tools/process-vault-status-transitions.r | 2 +- tools/remove-orphan-vault-if-empty.r | 8 ++++---- util/log.py | 4 ++-- uuAcl.r | 12 ++++++------ uuBatch.r | 4 ++-- uuFunctions.r | 4 ++-- uuGroup.r | 10 +++++----- uuPolicies.r | 8 ++++---- uuSudoPolicies.r | 20 ++++++++++---------- uuTapeArchive.r | 4 ++-- 24 files changed, 89 insertions(+), 89 deletions(-) diff --git a/groups.py b/groups.py index 7bb383344..ac995813a 100644 --- a/groups.py +++ b/groups.py @@ -866,14 +866,14 @@ def rule_group_remove_external_user(ctx, username, userzone): """ if user.is_admin(ctx): ret = removeExternalUser(ctx, username, userzone) - ctx.writeLine("serverLog", "Status code for removing external user " - + username + "#" + userzone - + " : " + ret) + ctx.writeString("serverLog", "Status code for removing external user " + + username + "#" + userzone + + " : " + ret) return ret else: - ctx.writeLine("serverLog", "Cannot remove external user " - + username + "#" + userzone - + " : need admin permissions.") + ctx.writeString("serverLog", "Cannot remove external user " + + username + "#" + userzone + + " : need admin permissions.") return '0' diff --git a/iiDatamanagerPolicies.r b/iiDatamanagerPolicies.r index 4ebb0fd4a..9e31eec0a 100644 --- a/iiDatamanagerPolicies.r +++ b/iiDatamanagerPolicies.r @@ -36,7 +36,7 @@ iiDatamanagerPreSudoObjAclSet(*recursive, *accessLevel, *otherName, *objPath, *p } iiCanDatamanagerAclSet(*objPath, *actor, *otherName, *recursive, *accessLevel, *allowed, *reason); - writeLine("serverLog", "iiDatamanagerPreSudoObjAclSet: *reason"); + writeString("serverLog", "iiDatamanagerPreSudoObjAclSet: *reason"); if (*allowed) { succeed; } @@ -73,7 +73,7 @@ iiDatamanagerGroupFromVaultGroup(*vaultGroup, *datamanagerGroup) { iiCanDatamanagerAclSet(*objPath, *actor, *otherName, *recursive, *accessLevel, *allowed, *reason) { # When the datamanager needs write/read access to the root of a vault package this rule is run on (*otherName like "datamanager-*" && *objPath like regex "/[^/]+/home/" ++ IIVAULTPREFIX ++".*") { - writeLine("serverLog", "iiCanDatamanagerAclSet: <*actor> wants to obtain <*accessLevel> on <*objPath>"); + writeString("serverLog", "iiCanDatamanagerAclSet: <*actor> wants to obtain <*accessLevel> on <*objPath>"); if (*accessLevel != "write" && *accessLevel != "read") { *allowed = false; *reason = "A datamanager can only obtain or revoke write access for the datamanager group to a vault package"; @@ -107,7 +107,7 @@ iiCanDatamanagerAclSet(*objPath, *actor, *otherName, *recursive, *accessLevel, * # When a datamanager wants to grant or revoke read access for a research or read group in the vault, this rule will run on (*objPath like regex "/[^/]+/home/" ++ IIVAULTPREFIX ++".*") { - writeLine("serverLog", "iiCanDatamanagerAclSet: <*actor> wants to set <*accessLevel> for <*otherName> on <*objPath>"); + writeString("serverLog", "iiCanDatamanagerAclSet: <*actor> wants to set <*accessLevel> for <*otherName> on <*objPath>"); if (*accessLevel != "read" && *accessLevel != "null") { *allowed = false; *reason = "A datamanager can only grant write or read access or revoke access in the vault."; diff --git a/iiFolderStatusTransitions.r b/iiFolderStatusTransitions.r index 35d9c3afe..bb7f0bf12 100644 --- a/iiFolderStatusTransitions.r +++ b/iiFolderStatusTransitions.r @@ -140,7 +140,7 @@ iiFolderLockChange(*rootCollection, *lockIt, *status){ msiAddKeyVal(*buffer, IILOCKATTRNAME, *rootCollection) #DEBUG writeLine("ServerLog", "iiFolderLockChange: *buffer"); if (*lockIt == "lock") { - #DEBUG writeLine("serverLog", "iiFolderLockChange: recursive locking of *rootCollection"); + #DEBUG writeString("serverLog", "iiFolderLockChange: recursive locking of *rootCollection"); *direction = "forward"; uuTreeWalk(*direction, *rootCollection, "iiAddMetadataToItem", *buffer, *error); if (*error == 0) { @@ -152,7 +152,7 @@ iiFolderLockChange(*rootCollection, *lockIt, *status){ } } } else { - #DEBUG writeLine("serverLog", "iiFolderLockChange: recursive unlocking of *rootCollection"); + #DEBUG writeString("serverLog", "iiFolderLockChange: recursive unlocking of *rootCollection"); *direction="reverse"; uuTreeWalk(*direction, *rootCollection, "iiRemoveMetadataFromItem", *buffer, *error); if (*error == 0) { @@ -187,7 +187,7 @@ iitypeabbreviation(*itemIsCollection) = if *itemIsCollection then "-C" else "-d iiAddMetadataToItem(*itemParent, *itemName, *itemIsCollection, *buffer, *error) { *objPath = "*itemParent/*itemName"; *objType = iitypeabbreviation(*itemIsCollection); - #DEBUG writeLine("serverLog", "iiAddMetadataToItem: Setting *buffer on *objPath"); + #DEBUG writeString("serverLog", "iiAddMetadataToItem: Setting *buffer on *objPath"); *error = errorcode(msiAssociateKeyValuePairsToObj(*buffer, *objPath, *objType)); } @@ -202,14 +202,14 @@ iiAddMetadataToItem(*itemParent, *itemName, *itemIsCollection, *buffer, *error) iiRemoveMetadataFromItem(*itemParent, *itemName, *itemIsCollection, *buffer, *error) { *objPath = "*itemParent/*itemName"; *objType = iitypeabbreviation(*itemIsCollection); - #DEBUG writeLine("serverLog", "iiRemoveMetadataKeyFromItem: Removing *buffer on *objPath"); + #DEBUG writeString("serverLog", "iiRemoveMetadataKeyFromItem: Removing *buffer on *objPath"); *error = errormsg(msiRemoveKeyValuePairsFromObj(*buffer, *objPath, *objType), *msg); if (*error < 0) { - writeLine("serverLog", "iiRemoveMetadataFromItem: removing *buffer from *objPath failed with errorcode: *error"); - writeLine("serverLog", *msg); + writeString("serverLog", "iiRemoveMetadataFromItem: removing *buffer from *objPath failed with errorcode: *error"); + writeString("serverLog", *msg); if (*error == -819000) { # This happens when metadata was already removed or never there. - writeLine("serverLog", "iiRemoveMetadaFromItem: -819000 detected. Keep on trucking, this happens if metadata was already removed"); + writeString("serverLog", "iiRemoveMetadaFromItem: -819000 detected. Keep on trucking, this happens if metadata was already removed"); *error = 0; } } diff --git a/iiMetadata.r b/iiMetadata.r index 0bdfcf8e5..3ecde1642 100644 --- a/iiMetadata.r +++ b/iiMetadata.r @@ -11,7 +11,7 @@ # \param[in] prefix prefix of metadata to remov # iiRemoveAVUs(*coll, *prefix) { - #DEBUG writeLine("serverLog", "iiRemoveAVUs: Remove all AVU's from *coll prefixed with *prefix"); + #DEBUG writeString("serverLog", "iiRemoveAVUs: Remove all AVU's from *coll prefixed with *prefix"); msiString2KeyValPair("", *kvp); *prefix = *prefix ++ "%"; @@ -21,11 +21,11 @@ iiRemoveAVUs(*coll, *prefix) { *attr = *row.META_COLL_ATTR_NAME; *val = *row.META_COLL_ATTR_VALUE; if (*attr == *prev) { - #DEBUG writeLine("serverLog", "iiRemoveAVUs: Duplicate attribute " ++ *attr); + #DEBUG writeString("serverLog", "iiRemoveAVUs: Duplicate attribute " ++ *attr); *duplicates = cons((*attr, *val), *duplicates); } else { msiAddKeyVal(*kvp, *attr, *val); - #DEBUG writeLine("serverLog", "iiRemoveAVUs: Attribute=\"*attr\", Value=\"*val\" from *coll will be removed"); + #DEBUG writeString("serverLog", "iiRemoveAVUs: Attribute=\"*attr\", Value=\"*val\" from *coll will be removed"); *prev = *attr; } } @@ -35,7 +35,7 @@ iiRemoveAVUs(*coll, *prefix) { foreach(*pair in *duplicates) { (*attr, *val) = *pair; - #DEBUG writeLine("serverLog", "iiRemoveUserAVUs: Duplicate key Attribute=\"*attr\", Value=\"*val\" from *coll will be removed"); + #DEBUG writeString("serverLog", "iiRemoveUserAVUs: Duplicate key Attribute=\"*attr\", Value=\"*val\" from *coll will be removed"); msiString2KeyValPair("", *kvp); msiAddKeyVal(*kvp, *attr, *val); msiRemoveKeyValuePairsFromObj(*kvp, *coll, "-C"); diff --git a/iiVault.r b/iiVault.r index 132ad078d..41e2c8f6e 100644 --- a/iiVault.r +++ b/iiVault.r @@ -116,9 +116,9 @@ iiGenericSecureCopy(*argv, *origin_path, *err) { if (*intErr < 0 ) { msiGetStderrInExecCmdOut(*cmdExecOut, *stderr); msiGetStdoutInExecCmdOut(*cmdExecOut, *stdout); - writeLine("serverLog", "iiGenericSecureCopy: errorcode *err"); - writeLine("serverLog", *stderr); - writeLine("serverLog", *stdout); + writeString("serverLog", "iiGenericSecureCopy: errorcode *err"); + writeString("serverLog", *stderr); + writeString("serverLog", *stdout); } } @@ -142,13 +142,13 @@ iiCopyACLsFromParent(*path, *recursiveFlag) { if (*userFound) { if (*accessName == "own") { - writeLine("serverLog", "iiCopyACLsFromParent: granting own to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); + writeString("serverLog", "iiCopyACLsFromParent: granting own to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "own", *userName, *path); } else if (*accessName == "read_object") { - writeLine("serverLog", "iiCopyACLsFromParent: granting read to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); + writeString("serverLog", "iiCopyACLsFromParent: granting read to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "read", *userName, *path); } else if (*accessName == "modify_object") { - writeLine("serverLog", "iiCopyACLsFromParent: granting write to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); + writeString("serverLog", "iiCopyACLsFromParent: granting write to <*userName> on <*path> with recursiveFlag <*recursiveFlag>"); msiSetACL(*recursiveFlag, "write", *userName, *path); } } @@ -207,9 +207,9 @@ iiVaultGetActionActor(*folder, *actor, *actionActor) { foreach(*row in SELECT ORDER_DESC(META_COLL_MODIFY_TIME), COLL_ID, META_COLL_ATTR_VALUE WHERE META_COLL_ATTR_NAME = "org_vault_action_*collId") { *err = errorcode(msi_json_arrayops(*row.META_COLL_ATTR_VALUE, *actionActor, "get", 2)); if (*err < 0) { - writeLine("serverLog", "iiVaultGetActionActor: org_vault_action_*collId contains invalid JSON"); + writeString("serverLog", "iiVaultGetActionActor: org_vault_action_*collId contains invalid JSON"); } else { - writeLine("serverLog", "iiVaultGetActionActor: org_vault_action_*collId actor is *actionActor"); + writeString("serverLog", "iiVaultGetActionActor: org_vault_action_*collId actor is *actionActor"); } break; } diff --git a/tools/add-epic-pids.r b/tools/add-epic-pids.r index 67e3c2941..30f2f6808 100755 --- a/tools/add-epic-pids.r +++ b/tools/add-epic-pids.r @@ -26,9 +26,9 @@ addEpicPids { iiRegisterEpicPID(*path, *url, *pid, *httpCode); if (*httpCode == "200" || *httpCode == "201") { iiSaveEpicPID(*path, *url, *pid); - writeLine("serverLog", "Registered EPIC PID for *path"); + writeString("serverLog", "Registered EPIC PID for *path"); } else { - writeLine("serverLog", "Failed to register EPIC PID for *path, httpCode=*httpCode"); + writeString("serverLog", "Failed to register EPIC PID for *path, httpCode=*httpCode"); } } } diff --git a/tools/check-metadata-for-schema-updates.r b/tools/check-metadata-for-schema-updates.r index 778684de1..df57d0303 100644 --- a/tools/check-metadata-for-schema-updates.r +++ b/tools/check-metadata-for-schema-updates.r @@ -1,5 +1,5 @@ check { - writeLine("serverLog", "[METADATA] Start updating metadata."); + writeString("serverLog", "[METADATA] Start updating metadata."); rule_batch_transform_vault_metadata("0", *batch, *pause, *delay); } diff --git a/tools/create-revision-stores.r b/tools/create-revision-stores.r index 38893c685..7d549a0ea 100755 --- a/tools/create-revision-stores.r +++ b/tools/create-revision-stores.r @@ -64,7 +64,7 @@ createRevisionStores { iiRevisionCreate(*resource, *path, UUMAXREVISIONSIZE, *id); if (*id != "") { - writeLine("serverLog", "Revision created for *path with id: *id"); + writeString("serverLog", "Revision created for *path with id: *id"); } if (*objectReadPermission == 0) { @@ -106,7 +106,7 @@ createRevisionStores { iiRevisionCreate(*resource, *path, UUMAXREVISIONSIZE, *id); if (*id != "") { - writeLine("serverLog", "Revision created for *path with id: *id"); + writeString("serverLog", "Revision created for *path with id: *id"); } if (*objectReadPermission == 0) { diff --git a/tools/generate-data-package-references.r b/tools/generate-data-package-references.r index 17f972928..66aaac351 100644 --- a/tools/generate-data-package-references.r +++ b/tools/generate-data-package-references.r @@ -8,8 +8,8 @@ import genquery def main(rule_args, callback, rei): - callback.writeLine("serverLog", "Start generating Data Package References for vault packages") - callback.writeLine("serverLog", "------------------------------------") + callback.writeString("serverLog", "Start generating Data Package References for vault packages") + callback.writeString("serverLog", "------------------------------------") # Retrieve all vault packages. iter = genquery.row_iterator( @@ -32,20 +32,20 @@ def main(rule_args, callback, rei): # Generate Data Package Reference if data package has no reference. if not has_yoda_reference: - callback.writeLine("serverLog", "Data Package: {}".format(data_package)) + callback.writeString("serverLog", "Data Package: {}".format(data_package)) try: reference = str(uuid.uuid4()) out = callback.msiString2KeyValPair("org_data_package_reference={}".format(reference), 0) kvp = out['arguments'][1] callback.msiSetKeyValuePairsToObj(kvp, data_package, '-C') - callback.writeLine("serverLog", "Data Package Reference: {}".format(reference)) + callback.writeString("serverLog", "Data Package Reference: {}".format(reference)) except Exception: - callback.writeLine("serverLog", "Something went wrong generating the Data Package Reference.") + callback.writeString("serverLog", "Something went wrong generating the Data Package Reference.") - callback.writeLine("serverLog", "------------------------------------") + callback.writeString("serverLog", "------------------------------------") - callback.writeLine("serverLog", "Finished generating Data Package References for vault packages") + callback.writeString("serverLog", "Finished generating Data Package References for vault packages") INPUT null diff --git a/tools/mail/mail-daily-report.r b/tools/mail/mail-daily-report.r index 626b3008f..9155f8346 100644 --- a/tools/mail/mail-daily-report.r +++ b/tools/mail/mail-daily-report.r @@ -15,7 +15,7 @@ mail_daily_report msiCloseGenQuery(*GenQInpCount, *GenQOutCount); if (*count==0) { - writeLine("serverLog", "[EMAIL] No daily notification mail was sent out, no users are subscribed"); + writeString("serverLog", "[EMAIL] No daily notification mail was sent out, no users are subscribed"); succeed; } diff --git a/tools/mail/mail-weekly-report.r b/tools/mail/mail-weekly-report.r index 54d37c244..25a3a2311 100644 --- a/tools/mail/mail-weekly-report.r +++ b/tools/mail/mail-weekly-report.r @@ -15,7 +15,7 @@ mail_weekly_report msiCloseGenQuery(*GenQInpCount, *GenQOutCount); if (*count==0) { - writeLine("serverLog", "[EMAIL] No weekly notification mail was sent out, no users are subscribed"); + writeString("serverLog", "[EMAIL] No weekly notification mail was sent out, no users are subscribed"); succeed; } diff --git a/tools/metadata/export-vault-metadata.r b/tools/metadata/export-vault-metadata.r index 1786121cd..0c455812b 100644 --- a/tools/metadata/export-vault-metadata.r +++ b/tools/metadata/export-vault-metadata.r @@ -173,7 +173,7 @@ def main(rule_args, ctx, rei): metadata_export = OrderedDict([]) for (path, status) in package_statuses: try: - ctx.writeLine("serverLog", "[export] Collecting metadata for vault data package {}".format(path)) + ctx.writeString("serverLog", "[export] Collecting metadata for vault data package {}".format(path)) vault_metadata = OrderedDict() # Path @@ -209,7 +209,7 @@ def main(rule_args, ctx, rei): metadata_export[path] = vault_metadata except Exception: - ctx.writeLine("serverLog", "[export] Error collecting metadata for vault data package {}".format(path)) + ctx.writeString("serverLog", "[export] Error collecting metadata for vault data package {}".format(path)) ctx.writeLine("stdout", json.dumps(metadata_export, indent=4)) INPUT null diff --git a/tools/metadata/vault-correct-orcid-format.r b/tools/metadata/vault-correct-orcid-format.r index e57323aee..5f07f4e7f 100644 --- a/tools/metadata/vault-correct-orcid-format.r +++ b/tools/metadata/vault-correct-orcid-format.r @@ -3,7 +3,7 @@ # Correct vault data package metadata containing invalid ORCID person identifiers. # check { - writeLine("serverLog", "[METADATA] Start correcting ORCID format in person identifiers."); + writeString("serverLog", "[METADATA] Start correcting ORCID format in person identifiers."); rule_batch_vault_metadata_correct_orcid_format("0", *batch, *pause, *delay, *dryrun); } diff --git a/tools/modify-data-object.r b/tools/modify-data-object.r index 57abadf85..ce786a974 100644 --- a/tools/modify-data-object.r +++ b/tools/modify-data-object.r @@ -243,7 +243,7 @@ def preconditions_for_data_object(data, run_type, dry_run, callback): callback.writeLine("stdout", "Use case 1 - Data ID: " + data['data_id'] + ", Path: " + logical_path) if dry_run == 'False': - callback.writeLine("serverLog", "Modifying data object: " + data['data_id'] + " on path: " + logical_path + " for use case 1.") + callback.writeString("serverLog", "Modifying data object: " + data['data_id'] + " on path: " + logical_path + " for use case 1.") status = modify_data_object(data['data_id'], data['data_repl_num'], data['expected_data_path'], dry_run, callback) if status == '': @@ -254,7 +254,7 @@ def preconditions_for_data_object(data, run_type, dry_run, callback): callback.writeLine("stdout", "Use case 2 - Data ID: " + data['data_id'] + ", Path: " + logical_path) if dry_run == 'False': - callback.writeLine("serverLog", "Modifying data object: " + data['data_id'] + " on path: " + logical_path + " for use case 2.") + callback.writeString("serverLog", "Modifying data object: " + data['data_id'] + " on path: " + logical_path + " for use case 2.") status = modify_data_object(data['data_id'], data['data_repl_num'], data['expected_data_path'], dry_run, callback) if status == '': @@ -269,7 +269,7 @@ def preconditions_for_data_object(data, run_type, dry_run, callback): callback.writeLine("stdout", "Use case 3 - Data ID: " + data['data_id'] + ", Path: " + logical_path) if dry_run == 'False': - callback.writeLine("serverLog", "Unregistering replica with data id: " + data['data_id'] + " on path: " + logical_path + " for use case 3.") + callback.writeString("serverLog", "Unregistering replica with data id: " + data['data_id'] + " on path: " + logical_path + " for use case 3.") if len(actual_replicas_list) == 1: # Use scope = object for unregistering the replica @@ -294,7 +294,7 @@ def modify_data_object(data_id, repl_num, data_path, dry_run, callback): """Modify the replica with correct data path.""" if dry_run == 'False': try: - callback.writeLine("serverLog", "Subprocess - iadmin - data_path: " + data_path) + callback.writeString("serverLog", "Subprocess - iadmin - data_path: " + data_path) status = subprocess.check_output(['iadmin', 'modrepl', 'data_id', data_id, 'replica_number', repl_num, 'DATA_PATH', data_path], stderr=subprocess.STDOUT) except Exception as e: status = e.output[e.output.find("ERROR:"):].rstrip() @@ -313,13 +313,13 @@ def unregister_replica(repl_num, logical_path, dry_run, callback, scope='replica if dry_run == 'False': if scope == 'object': try: - callback.writeLine("serverLog", "Subprocess - iunreg - object scope - logical_path: " + logical_path) + callback.writeString("serverLog", "Subprocess - iunreg - object scope - logical_path: " + logical_path) status = subprocess.check_output(['iunreg', logical_path], stderr=subprocess.STDOUT) except Exception as e: status = e.output[e.output.find("ERROR:"):].rstrip() else: try: - callback.writeLine("serverLog", "Subprocess - iunreg - replica scope - logical_path: " + logical_path) + callback.writeString("serverLog", "Subprocess - iunreg - replica scope - logical_path: " + logical_path) status = subprocess.check_output(['iunreg', '-n', repl_num, '-N', '1', logical_path], stderr=subprocess.STDOUT) except Exception as e: status = e.output[e.output.find("ERROR:"):].rstrip() diff --git a/tools/process-vault-status-transitions.r b/tools/process-vault-status-transitions.r index ddce4228e..1697ef9bd 100644 --- a/tools/process-vault-status-transitions.r +++ b/tools/process-vault-status-transitions.r @@ -44,7 +44,7 @@ processVaultActions() { *pending = true; } } else { - writeLine("serverLog", "Error in vault transition: unable to find folder *folder, which was referred to in metadata from *collName. Ignoring ..."); + writeString("serverLog", "Error in vault transition: unable to find folder *folder, which was referred to in metadata from *collName. Ignoring ..."); } diff --git a/tools/remove-orphan-vault-if-empty.r b/tools/remove-orphan-vault-if-empty.r index 444aa008d..31e625e4e 100644 --- a/tools/remove-orphan-vault-if-empty.r +++ b/tools/remove-orphan-vault-if-empty.r @@ -37,11 +37,11 @@ removeOrphanVaultIfEmpty { # The vault belonged to a research group, of which a revision collection still exists. # Remove the revision coll as well. - writeLine("serverLog", "Orphan revision collection '*revisionColl' will be removed"); + writeString("serverLog", "Orphan revision collection '*revisionColl' will be removed"); # Add ourselves (rods) as an owner. msiSudoObjAclSet("recursive", "own", uuClientFullName, *revisionColl, ""); msiRmColl(*revisionColl, "forceFlag=", *error); - writeLine("serverLog", "Orphan revision collection '*revisionColl' was removed"); + writeString("serverLog", "Orphan revision collection '*revisionColl' was removed"); } # Now remove the vault group, if it is empty. @@ -50,9 +50,9 @@ removeOrphanVaultIfEmpty { if (*vaultIsEmpty) { msiSudoGroupRemove(*vaultName, ""); - writeLine("serverLog", "Empty orphan vault '*vaultName' was removed"); + writeString("serverLog", "Empty orphan vault '*vaultName' was removed"); } else { - writeLine("serverLog", "Orphan vault '*vaultName' was not removed as it is non-empty"); + writeString("serverLog", "Orphan vault '*vaultName' was not removed as it is non-empty"); } } diff --git a/util/log.py b/util/log.py index 9b5d70e36..7679b7023 100644 --- a/util/log.py +++ b/util/log.py @@ -39,9 +39,9 @@ def _write(ctx, message): :param message: Message to write to log """ if type(ctx) is rule.Context: - ctx.writeLine('serverLog', '{{{}#{}}} {}'.format(*list(user.user_and_zone(ctx)) + [message])) + ctx.writeString('serverLog', '{{{}#{}}} {}'.format(*list(user.user_and_zone(ctx)) + [message])) else: - ctx.writeLine('serverLog', message) + ctx.writeString('serverLog', message) def debug(ctx, message): diff --git a/uuAcl.r b/uuAcl.r index f7afa1682..4b58452b5 100644 --- a/uuAcl.r +++ b/uuAcl.r @@ -120,25 +120,25 @@ uuEnforceGroupAcl(*path) { uuAclListOfDataObj(*path, *aclList); } - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: aclList -> *aclList"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: aclList -> *aclList"); uuAclListOfColl("/*rodsZone/home/*groupName", *groupAclList); - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: groupAclList -> *groupAclList"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: groupAclList -> *groupAclList"); *aclsToRemove = uuAclListSetDiff(*aclList, *groupAclList, true); - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: aclsToRemove -> *aclsToRemove"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: aclsToRemove -> *aclsToRemove"); *aclsToAdd = uuAclListSetDiff(*groupAclList, *aclList, false); - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: aclsToAdd -> *aclsToAdd"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: aclsToAdd -> *aclsToAdd"); *recurse = if *objType == "-c" then "recursive" else "default" foreach(*acl in *aclsToAdd) { uuAclToStrings(*acl, *userName, *accessLevel); - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: Setting ACL *accessLevel *userName *path"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: Setting ACL *accessLevel *userName *path"); msiSetACL(*recurse, *accessLevel, *userName, *path); } foreach(*acl in *aclsToRemove) { uuAclToStrings(*acl, *userName, *accessLevel); - #DEBUG writeLine("serverLog", "uuEnforceGroupAcl: Removing ACL *accessLevel *userName *path"); + #DEBUG writeString("serverLog", "uuEnforceGroupAcl: Removing ACL *accessLevel *userName *path"); msiSetACL(*recurse, "null", *userName, *path); } } diff --git a/uuBatch.r b/uuBatch.r index 24586fc8a..225469035 100644 --- a/uuBatch.r +++ b/uuBatch.r @@ -30,13 +30,13 @@ verifyChecksumData(*path, *chksum, *update) { } if (*chksum == "") { - writeLine("serverLog", "*path: no checksum"); + writeString("serverLog", "*path: no checksum"); if (*update != 0) { errorcode(msiDataObjChksum(*path, "ChksumAll=", *status)); } } else { msiSubstr(*chksum, "0", "5", *type); - writeLine("serverLog", "*path: *chksum"); + writeString("serverLog", "*path: *chksum"); if (*type == "sha2:") { errorcode(msiDataObjChksum(*path, "verifyChksum=", *status)); } else if (*update != 0) { diff --git a/uuFunctions.r b/uuFunctions.r index b014d8a7a..34f204446 100644 --- a/uuFunctions.r +++ b/uuFunctions.r @@ -90,7 +90,7 @@ uuReplicateBatch(*verbose, *balance_id_min, *balance_id_max, *batch_size_limit, # Disable msiSendMail. msiSendMail(*xtoAddr,*xsubjectLine,*xbody){ - writeLine('serverLog','WARNING: msiSendMail is disabled'); + writeString('serverLog','WARNING: msiSendMail is disabled'); } # @@ -105,7 +105,7 @@ wrap_msi_file_checksum(*file, *resc, *sum) { } if (*host == "") { *result = "-1"; - writeLine("serverLog","Could not find resource location for *resc when invoking file checksum microservice. Resource probably does not exist."); + writeString("serverLog","Could not find resource location for *resc when invoking file checksum microservice. Resource probably does not exist."); } else { remote(*host, "null") { *result = errorcode(msi_file_checksum(*file, *resc, *sum)); diff --git a/uuGroup.r b/uuGroup.r index 6430f01b5..b59e3881a 100644 --- a/uuGroup.r +++ b/uuGroup.r @@ -675,10 +675,10 @@ uuGroupGetMemberType(*groupName, *user, *type) { # { #uuGetUserAndZone(*user, *userName, *userZone); - #writeLine("serverLog", "*user -> *userName # *userZone"); + #writeString("serverLog", "*user -> *userName # *userZone"); #uuGroupGetMembers(*groupName, true, true, *members); - #writeLine("serverLog", "*user -> *userName # *userZone"); + #writeString("serverLog", "*user -> *userName # *userZone"); # } # The above call to uuGroupGetMembers OVERWRITES *userName with a different @@ -974,15 +974,15 @@ uuGroupUserAdd(*groupName, *user, *creatorUser, *creatorZone, *status, *message) *message = "" rule_group_provision_external_user(*userName, *creatorUser, *creatorZone, *http_code, *message); if (*message != "") { - writeLine("serverLog", "[EXTERNAL USER] *message"); + writeString("serverLog", "[EXTERNAL USER] *message"); *status = *http_code; succeed; # Return here (fail would ruin the status and error message). } - writeLine("serverLog", "[EXTERNAL USER] User *userName added by $userNameClient on $rodsZoneClient on the behalf of *creatorUser on *creatorZone."); + writeString("serverLog", "[EXTERNAL USER] User *userName added by $userNameClient on $rodsZoneClient on the behalf of *creatorUser on *creatorZone."); } else { # Actor user is not allowed to do this action - writeLine("serverLog", "[EXTERNAL USER] Actor $userNameClient on $rodsZoneClient does not have sufficient permissions to create external user *userName"); + writeString("serverLog", "[EXTERNAL USER] Actor $userNameClient on $rodsZoneClient does not have sufficient permissions to create external user *userName"); succeed; # Return here (fail would ruin the status and error message). } } diff --git a/uuPolicies.r b/uuPolicies.r index 7e8dcba60..3fe5b8f68 100644 --- a/uuPolicies.r +++ b/uuPolicies.r @@ -100,7 +100,7 @@ acPostProcForDeleteUser { # Log removal of user. *actor = uuClientFullName; - writeLine("serverLog", "User *userName#*userZone is removed by *actor.") + writeString("serverLog", "User *userName#*userZone is removed by *actor.") } # Log auth requests to server log (reproduce behaviour before https://github.com/irods/irods/commit/70144d8251fdf0528da554d529952823b008211b) @@ -114,7 +114,7 @@ pep_api_auth_request_pre(*instanceName, *comm, *request) { *access_allowed = ''; rule_check_anonymous_access_allowed(*client_addr, *access_allowed); if ( *access_allowed != "true" ) { - writeLine("serverLog", "Refused access to anonymous account from address *client_addr."); + writeString("serverLog", "Refused access to anonymous account from address *client_addr."); failmsg(-1, "Refused access to anonymous account from address *client_addr."); } } @@ -122,11 +122,11 @@ pep_api_auth_request_pre(*instanceName, *comm, *request) { *max_connections_exceeded = ''; rule_check_max_connections_exceeded(*max_connections_exceeded); if ( *max_connections_exceeded == "true" ) { - writeLine("serverLog", "Refused access for *user_name#*zone_name, max connections exceeded."); + writeString("serverLog", "Refused access for *user_name#*zone_name, max connections exceeded."); failmsg(-1, "Refused access for *user_name#*zone_name, max connections exceeded."); } - writeLine("serverLog", "{*user_name#*zone_name} Agent process started from *client_addr"); + writeString("serverLog", "{*user_name#*zone_name} Agent process started from *client_addr"); } # Enforce server to use TLS encryption. diff --git a/uuSudoPolicies.r b/uuSudoPolicies.r index b11c0a33f..1040e0728 100644 --- a/uuSudoPolicies.r +++ b/uuSudoPolicies.r @@ -38,25 +38,25 @@ # Implementation 1: Allow access only to rodsadmin. {{{ acPreSudoUserAdd(*userName, *initialAttr, *initialValue, *initialUnit, *policyKv) { - writeLine("serverLog", "In acPreSudoUserAdd, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoUserAdd, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoUserRemove(*userName, *policyKv) { - writeLine("serverLog", "In acPreSudoUserRemove, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoUserRemove, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoGroupAdd(*groupName, *initialAttr, *initialValue, *initialUnit, *policyKv) { - writeLine("serverLog", "In acPreSudoGroupAdd, group is <*groupName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoGroupAdd, group is <*groupName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoGroupRemove(*groupName, *policyKv) { - writeLine("serverLog", "In acPreSudoGroupRemove, group is <*groupName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoGroupRemove, group is <*groupName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } # User is admin, check whether the vault has no datapackages. @@ -64,37 +64,37 @@ acPreSudoGroupRemove(*groupName, *policyKv) { } acPreSudoGroupMemberAdd(*groupName, *userName, *policyKv) { - writeLine("serverLog", "In acPreSudoGroupMemberAdd, group is <*groupName>, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoGroupMemberAdd, group is <*groupName>, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoGroupMemberRemove(*groupName, *userName, *policyKv) { - writeLine("serverLog", "In acPreSudoGroupMemberRemove, group is <*groupName>, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoGroupMemberRemove, group is <*groupName>, user is <*userName>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoObjAclSet(*recursive, *accessLevel, *otherName, *objPath, *policyKv) { - writeLine("serverLog", "In acPreSudoObjAclSet, recursive is <*recursive>, access level is <*accessLevel>, other name is <*otherName>, object path is <*objPath>, actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoObjAclSet, recursive is <*recursive>, access level is <*accessLevel>, other name is <*otherName>, object path is <*objPath>, actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoObjMetaSet(*objName, *objType, *attribute, *value, *unit, *policyKv) { - writeLine("serverLog", "In acPreSudoObjMetaSet, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoObjMetaSet, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoObjMetaAdd(*objName, *objType, *attribute, *value, *unit, *policyKv) { - writeLine("serverLog", "In acPreSudoObjMetaAdd, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoObjMetaAdd, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } acPreSudoObjMetaRemove(*objName, *objType, *wildcards, *attribute, *value, *unit, *policyKv) { - writeLine("serverLog", "In acPreSudoObjMetaRemove, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); + writeString("serverLog", "In acPreSudoObjMetaRemove, objname is <*objName>, objType is <*objType>, attribute is <*attribute>, value is <*value>, unit is <*unit> actor is <$userNameClient#$rodsZoneClient>"); uuGetUserType(uuClientFullName, *userType); if (*userType != "rodsadmin") { fail; } } diff --git a/uuTapeArchive.r b/uuTapeArchive.r index ce26b3d3f..9f1a16506 100644 --- a/uuTapeArchive.r +++ b/uuTapeArchive.r @@ -12,7 +12,7 @@ dmput(*data, *hostAddress, *dmfs) { #if (*dmfs not like "DUL" && *dmfs not like "OFL" && *dmfs not like "UNM" && *dmfs not like "MIG") { msiExecCmd("dmput", *data, *hostAddress, "", "", *dmRes); msiGetStdoutInExecCmdOut(*dmRes, *dmStat); - writeLine("serverLog", "DEBUG: $userNameClient:$clientAddr - Archive dmput started: *data. Returned Status - *dmStat."); + writeString("serverLog", "DEBUG: $userNameClient:$clientAddr - Archive dmput started: *data. Returned Status - *dmStat."); #} } @@ -26,7 +26,7 @@ dmget(*data, *hostAddress, *dmfs) { #if (*dmfs not like "DUL" && *dmfs not like "REG" && *dmfs not like "UNM" && *dmfs not like "MIG") { msiExecCmd("dmget", *data, *hostAddress, "", "", *dmRes); msiGetStdoutInExecCmdOut(*dmRes, *dmStat); - writeLine("serverLog", "DEBUG: $userNameClient:$clientAddr - Archive dmget started: *data. Returned Status - *dmStat."); + writeString("serverLog", "DEBUG: $userNameClient:$clientAddr - Archive dmget started: *data. Returned Status - *dmStat."); #} } From 01c72a08ee518afb2b10de091c6ab35098791a06 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Nov 2024 10:01:43 +0100 Subject: [PATCH 09/27] YDA-5992: upgrade workflows to Python 3.12 --- .github/workflows/python.yml | 4 +- .github/workflows/unit-tests.yml | 16 ++--- __init__.py | 5 +- requirements.txt | 7 +- unit-tests/test_util_pathutil.py | 102 ++++++++++++++--------------- unit-tests/test_util_yoda_names.py | 60 ++++++++--------- util/api.py | 6 +- 7 files changed, 102 insertions(+), 98 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 2d8a5923d..89a7b1704 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -4,10 +4,10 @@ on: [push, pull_request] jobs: lint: - runs-on: ubuntu-20.04 + runs-on: ubuntu-24.04 strategy: matrix: - python-version: ['3.8', '3.9', '3.10', '3.11'] + python-version: ['3.11', '3.12'] steps: - uses: actions/checkout@v4 - name: Set up Python diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index cf8c6c917..8a3519c1f 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -10,31 +10,29 @@ on: jobs: unit-tests: - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 strategy: matrix: - python-version: [2.7] + python-version: ['3.12'] steps: - uses: actions/checkout@v4 - - name: Set up Python - # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python - uses: MatteoH2O1999/setup-python@v3.2.1 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - allow-build: info - cache-build: true + architecture: x64 - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install -r requirements.txt - python -m pip install coveragepy==1.6.0 + python -m pip install coverage==7.6.7 - name: Run unit tests run: | cd unit-tests - coverage run --omit=test_*.py,unit_tests.py --source=$(cd .. ; pwd),$(cd ../util ; pwd) -m unittest unit_tests + export PYTHONPATH=$(cd ../util ; pwd):$PYTHONPATH + coverage run --omit=test_*.py,unit_tests.py -m unittest unit_tests - name: Report code coverage run: | diff --git a/__init__.py b/__init__.py index 1172214ca..fc16f3409 100644 --- a/__init__.py +++ b/__init__.py @@ -1,6 +1,6 @@ """Yoda core ruleset containing iRODS and Python rules and policies useful for all Yoda environments.""" -__version__ = '1.10.0' +__version__ = '2.0.0' __copyright__ = 'Copyright (c) 2015-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' @@ -22,6 +22,9 @@ + ', Jelmer Zondergeld') # (in alphabetical order) +import sys +sys.path.extend([ '/etc/irods/rules_uu', '/etc/irods/rules_uu/util' ]) + # Import all modules containing rules into the package namespace, # so that they become visible to iRODS. diff --git a/requirements.txt b/requirements.txt index bfb3f4ba1..58455725c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -setuptools==44.0.0 +setuptools==75.6.0 MarkupSafe==1.1.1 enum34==1.1.10 Jinja2==2.11.3 @@ -8,9 +8,8 @@ configparser==4.0.2 zipp==1.2.0 contextlib2==0.6.0.post1 importlib-metadata==2.1.1 -jsonschema==3.2.0 +jsonschema==4.19.1 pathvalidate==0.29.1 -lxml==4.6.5 requests_cache==0.5.2 irods-avu-json==2.2.0 python2-secrets==1.0.5 @@ -18,7 +17,7 @@ python-dateutil==2.7.0 python-magic==0.4.27 certifi==2021.10.8 execnet==1.9.0 -deepdiff==3.3.0 +deepdiff==8.0.1 persist-queue==0.8.1 redis==3.5.3 psutil==5.9.6 diff --git a/unit-tests/test_util_pathutil.py b/unit-tests/test_util_pathutil.py index 9e6d406ca..3c2d73ded 100644 --- a/unit-tests/test_util_pathutil.py +++ b/unit-tests/test_util_pathutil.py @@ -15,112 +15,112 @@ class UtilPathutilTest(TestCase): def test_chop(self): output = chop("") - self.assertEquals(output, ('/', '')) + self.assertEqual(output, ('/', '')) output = chop("/") - self.assertEquals(output, ('/', '')) + self.assertEqual(output, ('/', '')) output = chop("/tempZone") - self.assertEquals(output, ('', 'tempZone')) + self.assertEqual(output, ('', 'tempZone')) output = chop("/tempZone/yoda") - self.assertEquals(output, ('/tempZone', 'yoda')) + self.assertEqual(output, ('/tempZone', 'yoda')) output = chop("/tempZone/home") - self.assertEquals(output, ('/tempZone', 'home')) + self.assertEqual(output, ('/tempZone', 'home')) output = chop("/tempZone/home/rods") - self.assertEquals(output, ('/tempZone/home', 'rods')) + self.assertEqual(output, ('/tempZone/home', 'rods')) output = chop("/tempZone/home/research-test") - self.assertEquals(output, ('/tempZone/home', 'research-test')) + self.assertEqual(output, ('/tempZone/home', 'research-test')) output = chop("/tempZone/home/research-test/test") - self.assertEquals(output, ('/tempZone/home/research-test', 'test')) + self.assertEqual(output, ('/tempZone/home/research-test', 'test')) output = chop("/tempZone/home/research-test/test/file.txt") - self.assertEquals(output, ('/tempZone/home/research-test/test', 'file.txt')) + self.assertEqual(output, ('/tempZone/home/research-test/test', 'file.txt')) def test_dirname(self): output = dirname("") - self.assertEquals(output, '/') + self.assertEqual(output, '/') output = dirname("/") - self.assertEquals(output, '/') + self.assertEqual(output, '/') output = dirname("/tempZone") - self.assertEquals(output, '') + self.assertEqual(output, '') output = dirname("/tempZone/yoda") - self.assertEquals(output, '/tempZone') + self.assertEqual(output, '/tempZone') output = dirname("/tempZone/home") - self.assertEquals(output, '/tempZone') + self.assertEqual(output, '/tempZone') output = dirname("/tempZone/home/rods") - self.assertEquals(output, '/tempZone/home') + self.assertEqual(output, '/tempZone/home') output = dirname("/tempZone/home/research-test") - self.assertEquals(output, '/tempZone/home') + self.assertEqual(output, '/tempZone/home') output = dirname("/tempZone/home/research-test/test") - self.assertEquals(output, '/tempZone/home/research-test') + self.assertEqual(output, '/tempZone/home/research-test') output = dirname("/tempZone/home/research-test/test/file.txt") - self.assertEquals(output, '/tempZone/home/research-test/test') + self.assertEqual(output, '/tempZone/home/research-test/test') def test_basename(self): output = basename("") - self.assertEquals(output, '') + self.assertEqual(output, '') output = basename("/") - self.assertEquals(output, '') + self.assertEqual(output, '') output = basename("/tempZone") - self.assertEquals(output, 'tempZone') + self.assertEqual(output, 'tempZone') output = basename("/tempZone/yoda") - self.assertEquals(output, 'yoda') + self.assertEqual(output, 'yoda') output = basename("/tempZone/home") - self.assertEquals(output, 'home') + self.assertEqual(output, 'home') output = basename("/tempZone/home/rods") - self.assertEquals(output, 'rods') + self.assertEqual(output, 'rods') output = basename("/tempZone/home/research-test") - self.assertEquals(output, 'research-test') + self.assertEqual(output, 'research-test') output = basename("/tempZone/home/research-test/test") - self.assertEquals(output, 'test') + self.assertEqual(output, 'test') output = basename("/tempZone/home/research-test/test/file.txt") - self.assertEquals(output, 'file.txt') + self.assertEqual(output, 'file.txt') def test_chopext(self): output = chopext("") - self.assertEquals(output, ['']) + self.assertEqual(output, ['']) output = chopext("/") - self.assertEquals(output, ['/']) + self.assertEqual(output, ['/']) output = chopext("/tempZone") - self.assertEquals(output, ['/tempZone']) + self.assertEqual(output, ['/tempZone']) output = chopext("/tempZone/yoda") - self.assertEquals(output, ['/tempZone/yoda']) + self.assertEqual(output, ['/tempZone/yoda']) output = chopext("/tempZone/home") - self.assertEquals(output, ['/tempZone/home']) + self.assertEqual(output, ['/tempZone/home']) output = chopext("/tempZone/home/rods") - self.assertEquals(output, ['/tempZone/home/rods']) + self.assertEqual(output, ['/tempZone/home/rods']) output = chopext("/tempZone/home/research-test") - self.assertEquals(output, ['/tempZone/home/research-test']) + self.assertEqual(output, ['/tempZone/home/research-test']) output = chopext("/tempZone/home/research-test/test") - self.assertEquals(output, ['/tempZone/home/research-test/test']) + self.assertEqual(output, ['/tempZone/home/research-test/test']) output = chopext("/tempZone/home/research-test/test/file.txt") - self.assertEquals(output, ['/tempZone/home/research-test/test/file', 'txt']) + self.assertEqual(output, ['/tempZone/home/research-test/test/file', 'txt']) def test_info(self): output = info("") - self.assertEquals(output, (Space.OTHER, '', '', '')) + self.assertEqual(output, (Space.OTHER, '', '', '')) output = info("/") - self.assertEquals(output, (Space.OTHER, '', '', '')) + self.assertEqual(output, (Space.OTHER, '', '', '')) output = info("/tempZone") - self.assertEquals(output, (Space.OTHER, 'tempZone', '', '')) + self.assertEqual(output, (Space.OTHER, 'tempZone', '', '')) output = info("/tempZone/yoda") - self.assertEquals(output, (Space.OTHER, 'tempZone', '', 'yoda')) + self.assertEqual(output, (Space.OTHER, 'tempZone', '', 'yoda')) output = info("/tempZone/home") - self.assertEquals(output, (Space.OTHER, 'tempZone', '', 'home')) + self.assertEqual(output, (Space.OTHER, 'tempZone', '', 'home')) output = info("/tempZone/home/rods") - self.assertEquals(output, (Space.OTHER, 'tempZone', 'rods', '')) + self.assertEqual(output, (Space.OTHER, 'tempZone', 'rods', '')) output = info("/tempZone/home/research-test") - self.assertEquals(output, (Space.RESEARCH, 'tempZone', 'research-test', '')) + self.assertEqual(output, (Space.RESEARCH, 'tempZone', 'research-test', '')) output = info("/tempZone/home/research-test/test") - self.assertEquals(output, (Space.RESEARCH, 'tempZone', 'research-test', 'test')) + self.assertEqual(output, (Space.RESEARCH, 'tempZone', 'research-test', 'test')) output = info("/tempZone/home/research-test/test/file.txt") - self.assertEquals(output, (Space.RESEARCH, 'tempZone', 'research-test', 'test/file.txt')) + self.assertEqual(output, (Space.RESEARCH, 'tempZone', 'research-test', 'test/file.txt')) output = info("/tempZone/home/vault-test") - self.assertEquals(output, (Space.VAULT, 'tempZone', 'vault-test', '')) + self.assertEqual(output, (Space.VAULT, 'tempZone', 'vault-test', '')) output = info("/tempZone/home/datamanager-test") - self.assertEquals(output, (Space.DATAMANAGER, 'tempZone', 'datamanager-test', '')) + self.assertEqual(output, (Space.DATAMANAGER, 'tempZone', 'datamanager-test', '')) output = info("/tempZone/home/deposit-test") - self.assertEquals(output, (Space.DEPOSIT, 'tempZone', 'deposit-test', '')) + self.assertEqual(output, (Space.DEPOSIT, 'tempZone', 'deposit-test', '')) output = info("/tempZone/home/intake-test") - self.assertEquals(output, (Space.INTAKE, 'tempZone', 'intake-test', '')) + self.assertEqual(output, (Space.INTAKE, 'tempZone', 'intake-test', '')) output = info("/tempZone/home/grp-intake-test") - self.assertEquals(output, (Space.INTAKE, 'tempZone', 'grp-intake-test', '')) + self.assertEqual(output, (Space.INTAKE, 'tempZone', 'grp-intake-test', '')) output = info("/tempZone/home/datarequests-test") - self.assertEquals(output, (Space.DATAREQUEST, 'tempZone', 'datarequests-test', '')) + self.assertEqual(output, (Space.DATAREQUEST, 'tempZone', 'datarequests-test', '')) diff --git a/unit-tests/test_util_yoda_names.py b/unit-tests/test_util_yoda_names.py index 4d2b79dcf..4dab5fcbd 100644 --- a/unit-tests/test_util_yoda_names.py +++ b/unit-tests/test_util_yoda_names.py @@ -14,41 +14,41 @@ class UtilYodaNamesTest(TestCase): def test_is_valid_category(self): - self.assertEquals(is_valid_category(""), False) - self.assertEquals(is_valid_category("foo"), True) - self.assertEquals(is_valid_category("foo123"), True) - self.assertEquals(is_valid_category("foo-bar"), True) - self.assertEquals(is_valid_category("foo_bar"), True) + self.assertEqual(is_valid_category(""), False) + self.assertEqual(is_valid_category("foo"), True) + self.assertEqual(is_valid_category("foo123"), True) + self.assertEqual(is_valid_category("foo-bar"), True) + self.assertEqual(is_valid_category("foo_bar"), True) def test_is_valid_subcategory(self): - self.assertEquals(is_valid_subcategory(""), False) - self.assertEquals(is_valid_subcategory("foo"), True) - self.assertEquals(is_valid_subcategory("foo123"), True) - self.assertEquals(is_valid_subcategory("foo-bar"), True) - self.assertEquals(is_valid_subcategory("foo_bar"), True) + self.assertEqual(is_valid_subcategory(""), False) + self.assertEqual(is_valid_subcategory("foo"), True) + self.assertEqual(is_valid_subcategory("foo123"), True) + self.assertEqual(is_valid_subcategory("foo-bar"), True) + self.assertEqual(is_valid_subcategory("foo_bar"), True) def test_is_valid_groupname(self): - self.assertEquals(is_valid_groupname(""), False) - self.assertEquals(is_valid_groupname("foo"), True) - self.assertEquals(is_valid_groupname("foo123"), True) - self.assertEquals(is_valid_groupname("foo-bar"), True) - self.assertEquals(is_valid_groupname("foo_bar"), False) - self.assertEquals(is_valid_groupname("a" * 63), True) - self.assertEquals(is_valid_groupname("a" * 64), False) + self.assertEqual(is_valid_groupname(""), False) + self.assertEqual(is_valid_groupname("foo"), True) + self.assertEqual(is_valid_groupname("foo123"), True) + self.assertEqual(is_valid_groupname("foo-bar"), True) + self.assertEqual(is_valid_groupname("foo_bar"), False) + self.assertEqual(is_valid_groupname("a" * 63), True) + self.assertEqual(is_valid_groupname("a" * 64), False) def test_is_email_username(self): - self.assertEquals(is_email_username("peter"), False) - self.assertEquals(is_email_username("peter@uu.nl"), True) + self.assertEqual(is_email_username("peter"), False) + self.assertEqual(is_email_username("peter@uu.nl"), True) def test_is_internal_user(self): - self.assertEquals(_is_internal_user("peter", ["uu.nl"]), True) - self.assertEquals(_is_internal_user("peter@uu.nl", ["uu.nl"]), True) - self.assertEquals(_is_internal_user("peter@vu.nl", ["uu.nl"]), False) - self.assertEquals(_is_internal_user("peter@buu.nl", ["uu.nl"]), False) - self.assertEquals(_is_internal_user("peter@uu.nl", ["buu.nl"]), False) - self.assertEquals(_is_internal_user("peter@uu.nl", ["*.uu.nl"]), True) - self.assertEquals(_is_internal_user("peter@vu.nl", ["*.uu.nl"]), False) - self.assertEquals(_is_internal_user("peter@buu.nl", ["*.uu.nl"]), False) - self.assertEquals(_is_internal_user("peter@cs.uu.nl", ["*.uu.nl"]), True) - self.assertEquals(_is_internal_user("peter@ai.cs.uu.nl", ["*.cs.uu.nl"]), True) - self.assertEquals(_is_internal_user("peter@ai.hum.uu.nl", ["*.cs.uu.nl"]), False) + self.assertEqual(_is_internal_user("peter", ["uu.nl"]), True) + self.assertEqual(_is_internal_user("peter@uu.nl", ["uu.nl"]), True) + self.assertEqual(_is_internal_user("peter@vu.nl", ["uu.nl"]), False) + self.assertEqual(_is_internal_user("peter@buu.nl", ["uu.nl"]), False) + self.assertEqual(_is_internal_user("peter@uu.nl", ["buu.nl"]), False) + self.assertEqual(_is_internal_user("peter@uu.nl", ["*.uu.nl"]), True) + self.assertEqual(_is_internal_user("peter@vu.nl", ["*.uu.nl"]), False) + self.assertEqual(_is_internal_user("peter@buu.nl", ["*.uu.nl"]), False) + self.assertEqual(_is_internal_user("peter@cs.uu.nl", ["*.uu.nl"]), True) + self.assertEqual(_is_internal_user("peter@ai.cs.uu.nl", ["*.cs.uu.nl"]), True) + self.assertEqual(_is_internal_user("peter@ai.hum.uu.nl", ["*.cs.uu.nl"]), False) diff --git a/util/api.py b/util/api.py index fd893fc62..d67546254 100644 --- a/util/api.py +++ b/util/api.py @@ -94,7 +94,11 @@ def _api(f): :returns: Wrapper function to turn a Python function into a basic API function """ # Determine required and optional argument names from the function signature. - a_pos, a_var, a_kw, a_defaults = inspect.getargspec(f) + full_argspec = inspect.getfullargspec(f) + a_pos = full_argspec.args + a_kw = full_argspec.varkw + a_defaults = full_argspec.defaults + a_pos = a_pos[1:] # ignore callback/context param. required = set(a_pos if a_defaults is None else a_pos[:-len(a_defaults)]) From 6793b4b539f5d3506a15dc74a952d3c331d225ef Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Nov 2024 12:50:53 +0100 Subject: [PATCH 10/27] YDA-5992: install pysqlcipher3 with ruleset --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 58455725c..6a2da2c5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,4 +22,4 @@ persist-queue==0.8.1 redis==3.5.3 psutil==5.9.6 iteration_utilities==0.8.0 -#pysqlcipher3==1.2.1 (installed by yoda/roles/yoda_rulesets/tasks/main.yml) +rotki-pysqlcipher3==2024.10.1 From 530f78798d6fca49451b8b9104e7a64569b56cd4 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Nov 2024 14:38:24 +0100 Subject: [PATCH 11/27] YDA-5992: fix parsing query columns --- policies_utils.py | 5 ++--- unit-tests/test_policies.py | 12 ++++++------ 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/policies_utils.py b/policies_utils.py index 0d97901e0..c8ebe0eda 100644 --- a/policies_utils.py +++ b/policies_utils.py @@ -16,7 +16,7 @@ def is_safe_genquery_inp(genquery_inp): :returns: boolean value. True if query may be executed; false if query should be rejected for security or safety reasons. """ - return _is_safe_genquery_inp(genquery_inp.selectInp, genquery_inp.sqlCondInp) + return _is_safe_genquery_inp(genquery_inp.selectInp, genquery_inp.sqlCondInp.inx) def _column_in_select_inp(selectInp, columns): @@ -29,8 +29,7 @@ def _column_in_select_inp(selectInp, columns): def _column_in_cond_inp(sqlCondInp, columns): - condition_data = ast.literal_eval(str(sqlCondInp)) - condition_columns = list(map(lambda c: c[0], condition_data)) + condition_columns = ast.literal_eval(str(sqlCondInp)) for column in columns: if column in condition_columns: return True diff --git a/unit-tests/test_policies.py b/unit-tests/test_policies.py index 4d8ac896b..4e91ed816 100644 --- a/unit-tests/test_policies.py +++ b/unit-tests/test_policies.py @@ -17,22 +17,22 @@ def test_is_safe_genquery_inp(self): # Queries that do not pose any problems # select D_DATA_ID where DATA_NAME = 'rods' and COLL_NAME = '/tempZone/home' selectInp = {401: 1} - sqlCondInp = [(403, "= 'rods'"), (501, "= '/tempZone/home'")] + sqlCondInp = [403, 501] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select D_CREATE_TIME, D_MODIFY_TIME, DATA_MODE, D_RESC_ID, D_DATA_ID, DATA_SIZE, D_OWNER_NAME, D_OWNER_ZONE, D_REPL_STATUS, D_DATA_CHECKSUM where COLL_NAME ='/tempZone/home' and DATA_NAME ='rods' selectInp = {419: 1, 420: 1, 421: 1, 423: 1, 401: 1, 407: 1, 411: 1, 412: 1, 413: 1, 415: 1} - sqlCondInp = [(501, "='/tempZone/home'"), (403, "='rods'")] + sqlCondInp = [501, 403] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select COLL_INFO2, COLL_ID, COLL_NAME, COLL_OWNER_NAME, COLL_OWNER_ZONE, COLL_CREATE_TIME, COLL_MODIFY_TIME, COLL_TYPE, COLL_INFO1 where COLL_NAME ='/tempZone/home/rods' selectInp = {512: 1, 500: 1, 501: 1, 503: 1, 504: 1, 508: 1, 509: 1, 510: 1, 511: 1} - sqlCondInp = [(501, "='/tempZone/home/rods'")] + sqlCondInp = [501] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select D_CREATE_TIME, D_MODIFY_TIME, DATA_MODE, D_DATA_ID, DATA_NAME, COLL_NAME, DATA_SIZE where COLL_NAME = '/tempZone/home/rods' selectInp = {419: 1, 420: 1, 421: 1, 401: 1, 403: 1, 501: 1, 407: 1} - sqlCondInp = [(501, " = '/tempZone/home/rods'")] + sqlCondInp = [501] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select ZONE_CONNECTION, ZONE_COMMENT, ZONE_NAME, ZONE_TYPE where @@ -42,7 +42,7 @@ def test_is_safe_genquery_inp(self): # select META_COLL_ATTR_VALUE where COLL_NAME = '/a/b/c' selectInp = {611: 1} - sqlCondInp = [(501, "= '/a/b/c'")] + sqlCondInp = [501] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select META_COLL_ATTR_VALUE, COLL_NAME where @@ -67,7 +67,7 @@ def test_is_safe_genquery_inp(self): # select META_DATA_ATTR_VALUE where DATA_NAME = 'test.dat' selectInp = {601: 1} - sqlCondInp = [(403, "= 'test.dat'")] + sqlCondInp = [403] self.assertTrue(_is_safe_genquery_inp(selectInp, sqlCondInp)) # select META_DATA_ATTR_VALUE, COLL_NAME where From 90ed61cb1f5e311f18588f173a2bcf9e6817b561 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 22 Nov 2024 15:03:11 +0100 Subject: [PATCH 12/27] YDA-5992: convert binary string --- util/arb_data_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/arb_data_manager.py b/util/arb_data_manager.py index b8beb012a..dd6466da3 100644 --- a/util/arb_data_manager.py +++ b/util/arb_data_manager.py @@ -27,7 +27,7 @@ def get(self, ctx, keyname): :returns: data for this key (arb_status) """ value = super().get(ctx, keyname) - return constants.arb_status[value] + return constants.arb_status[value.decode("utf-8")] def put(self, ctx, keyname, data): """Update both the original value and cached value (if cache is not available, it is not updated) From dd69f975537b8cfa968a88384232247eceee2e24 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 26 Nov 2024 16:04:52 +0100 Subject: [PATCH 13/27] YDA-5992: use msiBytesBufToStr to convert iRODS bytes buffer to string --- util/data_object.py | 5 ++++- util/msi.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/util/data_object.py b/util/data_object.py index f8c3cb2ae..10f1bee39 100644 --- a/util/data_object.py +++ b/util/data_object.py @@ -151,9 +151,12 @@ def read(ctx, path, max_size=constants.IIDATA_MAX_SLURP_SIZE): buf = ret['arguments'][2] + # Convert BytesBuffer to string. + ret_val = msi.bytes_buf_to_str(ctx, buf, "") + output = ret_val["arguments"][1] msi.data_obj_close(ctx, handle, 0) - return ''.join(buf.buf[:buf.len]) + return output def copy(ctx, path_org, path_copy, force=True): diff --git a/util/msi.py b/util/msi.py index 42170467c..74bfa7c9f 100644 --- a/util/msi.py +++ b/util/msi.py @@ -111,6 +111,7 @@ def _make_exception(name, message): get_obj_type, GetObjTypeError = make('GetObjType', 'Could not get object type') mod_avu_metadata, ModAVUMetadataError = make('ModAVUMetadata', 'Could not modify AVU metadata') stat_vault, MSIStatVaultError = make("_stat_vault", 'Could not stat file system object in vault.') +bytes_buf_to_str, BytesBufToStr = make('BytesBufToStr', 'Could not write bytes buffer to string') # The file checksum microservice should not be invoked directly. This microservice should be invoked via wrap_file_checksum.r wrapper. file_checksum, FileChecksumError = make("_file_checksum", 'Could not calculate non-persistent checksum of vault file.') From c91a0f30ac0071789c7b3341928d579b8bbd2502 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 27 Nov 2024 10:26:15 +0100 Subject: [PATCH 14/27] YDA-5992: fix type errors --- tests/features/api/api_revisions.feature | 26 +++++++++++------------ tests/step_defs/api/test_api_revisions.py | 2 +- tools/async-job.py | 2 +- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/features/api/api_revisions.feature b/tests/features/api/api_revisions.feature index 172108afc..5e311ef89 100644 --- a/tests/features/api/api_revisions.feature +++ b/tests/features/api/api_revisions.feature @@ -4,32 +4,32 @@ Feature: Revisions API Scenario Outline: Search revisions on file name Given user is authenticated And the Yoda revision search API is queried with - Then the response status code is "200" - And is found + Then the response status code is "200" + And is found Examples: - | user | filename | revision_search_result | - | researcher | SIPI | SIPI_Jelly_Beans | + | user | filename | revision_search_result | + | researcher | SIPI | SIPI_Jelly_Beans | Scenario Outline: Find actual revisions for one particular data object Given user is authenticated And the Yoda revision list API is queried with - Then the response status code is "200" - And revisions list is returned + Then the response status code is "200" + And revisions list is returned Examples: - | user | path | - | researcher | /tempZone/home/research-initial/testdata/SIPI_Jelly_Beans_4.1.07.tiff | + | user | path | + | researcher | /tempZone/home/research-initial/testdata/SIPI_Jelly_Beans_4.1.07.tiff | Scenario Outline: Restore a revision Given user is authenticated And the Yoda revision API is requested for first revision for And the Yoda revision API is requested to restore revision in collection with name with revision id - Then the response status code is "200" - And revision is restored successfully + Then the response status code is "200" + And revision is restored successfully - Examples: - | user | path | coll_target | new_filename | - | researcher | /tempZone/home/research-initial/testdata/SIPI_Jelly_Beans_4.1.07.tiff | /tempZone/home/research-revisions | SIPI_Jelly_Beans_2.tiff | + Examples: + | user | path | coll_target | new_filename | + | researcher | /tempZone/home/research-initial/testdata/SIPI_Jelly_Beans_4.1.07.tiff | /tempZone/home/research-revisions | SIPI_Jelly_Beans_2.tiff | diff --git a/tests/step_defs/api/test_api_revisions.py b/tests/step_defs/api/test_api_revisions.py index 62fc8be0d..e8e01c2e0 100644 --- a/tests/step_defs/api/test_api_revisions.py +++ b/tests/step_defs/api/test_api_revisions.py @@ -21,7 +21,7 @@ def api_search_revisions_on_filename(user, filename): return api_request( user, "revisions_search_on_filename", - {"searchString": filename, "offset": 0, "limit": "10"} + {"searchString": filename, "offset": 0, "limit": 10} ) diff --git a/tools/async-job.py b/tools/async-job.py index ac5db51eb..f9612ab8b 100755 --- a/tools/async-job.py +++ b/tools/async-job.py @@ -51,7 +51,7 @@ def lock_or_die(balance_id_min, balance_id_max): exit(1) else: raise - os.write(fd, str(os.getpid())) + os.write(fd, str(os.getpid()).encode()) os.close(fd) # Remove lock no matter how we exit. From d64a41a551917aed4fd1065fef437b4b354dab65 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 27 Nov 2024 11:25:39 +0100 Subject: [PATCH 15/27] YDA-5992: replace itertools.imap() with map() --- util/avu.py | 26 +++++++++++++------------- util/collection.py | 8 ++++---- vault.py | 6 +++--- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/util/avu.py b/util/avu.py index 1ccb9f7e5..585d9756d 100644 --- a/util/avu.py +++ b/util/avu.py @@ -22,16 +22,16 @@ def of_data(ctx, path): """Get (a,v,u) triplets for a given data object.""" - return itertools.imap(lambda x: Avu(*x), - genquery.Query(ctx, "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS", - "COLL_NAME = '{}' AND DATA_NAME = '{}'".format(*pathutil.chop(path)))) + return map(lambda x: Avu(*x), + genquery.Query(ctx, "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS", + "COLL_NAME = '{}' AND DATA_NAME = '{}'".format(*pathutil.chop(path)))) def of_coll(ctx, coll): """Get (a,v,u) triplets for a given collection.""" - return itertools.imap(lambda x: Avu(*x), - genquery.Query(ctx, "META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS", - "COLL_NAME = '{}'".format(coll))) + return map(lambda x: Avu(*x), + genquery.Query(ctx, "META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS", + "COLL_NAME = '{}'".format(coll))) def get_attr_val_of_coll(ctx, coll, attr): @@ -72,13 +72,13 @@ def to_absolute(row, type): "COLL_PARENT_NAME, COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS", "COLL_PARENT_NAME = '{}'".format(path), genquery.AS_LIST, ctx) - collection_root = itertools.imap(lambda x: to_absolute(x, "collection"), collection_root) + collection_root = map(lambda x: to_absolute(x, "collection"), collection_root) data_objects_root = genquery.row_iterator( "COLL_NAME, DATA_NAME, META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS", "COLL_NAME = '{}'".format(path), genquery.AS_LIST, ctx) - data_objects_root = itertools.imap(lambda x: to_absolute(x, "data_object"), data_objects_root) + data_objects_root = map(lambda x: to_absolute(x, "data_object"), data_objects_root) if not recursive: return itertools.chain(collection_root, data_objects_root) @@ -87,22 +87,22 @@ def to_absolute(row, type): "COLL_PARENT_NAME, COLL_NAME, META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS", "COLL_PARENT_NAME like '{}/%'".format(path), genquery.AS_LIST, ctx) - collection_sub = itertools.imap(lambda x: to_absolute(x, "collection"), collection_sub) + collection_sub = map(lambda x: to_absolute(x, "collection"), collection_sub) data_objects_sub = genquery.row_iterator( "COLL_NAME, DATA_NAME, META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS", "COLL_NAME like '{}/%'".format(path), genquery.AS_LIST, ctx) - data_objects_sub = itertools.imap(lambda x: to_absolute(x, "data_object"), data_objects_sub) + data_objects_sub = map(lambda x: to_absolute(x, "data_object"), data_objects_sub) return itertools.chain(collection_root, data_objects_root, collection_sub, data_objects_sub) def of_group(ctx, group): """Get (a,v,u) triplets for a given group.""" - return itertools.imap(lambda x: Avu(*x), - genquery.Query(ctx, "META_USER_ATTR_NAME, META_USER_ATTR_VALUE, META_USER_ATTR_UNITS", - "USER_NAME = '{}' AND USER_TYPE = 'rodsgroup'".format(group))) + return map(lambda x: Avu(*x), + genquery.Query(ctx, "META_USER_ATTR_NAME, META_USER_ATTR_VALUE, META_USER_ATTR_UNITS", + "USER_NAME = '{}' AND USER_TYPE = 'rodsgroup'".format(group))) def set_on_data(ctx, path, a, v): diff --git a/util/collection.py b/util/collection.py index 23d0ae0dc..711787bb5 100644 --- a/util/collection.py +++ b/util/collection.py @@ -104,14 +104,14 @@ def to_absolute(row): genquery.AS_LIST, ctx) if not recursive: - return itertools.imap(to_absolute, q_root) + return map(to_absolute, q_root) # Recursive? Return a generator combining both queries. q_sub = genquery.row_iterator("COLL_PARENT_NAME, COLL_NAME", "COLL_PARENT_NAME like '{}/%'".format(path), genquery.AS_LIST, ctx) - return itertools.imap(to_absolute, itertools.chain(q_root, q_sub)) + return map(to_absolute, itertools.chain(q_root, q_sub)) def data_objects(ctx, path, recursive=False): @@ -138,14 +138,14 @@ def to_absolute(row): genquery.AS_LIST, ctx) if not recursive: - return itertools.imap(to_absolute, q_root) + return map(to_absolute, q_root) # Recursive? Return a generator combining both queries. q_sub = genquery.row_iterator("COLL_NAME, DATA_NAME", "COLL_NAME like '{}/%'".format(path), genquery.AS_LIST, ctx) - return itertools.imap(to_absolute, itertools.chain(q_root, q_sub)) + return map(to_absolute, itertools.chain(q_root, q_sub)) def create(ctx, path, entire_tree=''): diff --git a/vault.py b/vault.py index 256636812..4ad36ae42 100644 --- a/vault.py +++ b/vault.py @@ -281,8 +281,8 @@ def api_vault_unpreservable_files(ctx, coll, list_name): preservable_formats = set(list_data['formats']) # Get basenames of all data objects within this collection. - data_names = itertools.imap(lambda x: pathutil.chop(x)[1], - collection.data_objects(ctx, coll, recursive=True)) + data_names = map(lambda x: pathutil.chop(x)[1], + collection.data_objects(ctx, coll, recursive=True)) # Exclude Yoda metadata files data_names = itertools.ifilter(lambda @@ -290,7 +290,7 @@ def api_vault_unpreservable_files(ctx, coll, list_name): data_names) # Data names -> lowercase extensions, without the dot. - exts = set(list(itertools.imap(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) + exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) exts -= {''} # Return any ext that is not in the preservable list. From ee1fb916191445a4b27c4123f16a9274c965eb96 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 28 Nov 2024 09:29:59 +0100 Subject: [PATCH 16/27] YDA-5992: catch all exceptions when loading text data object --- browse.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/browse.py b/browse.py index f9c02f0aa..e9a80bf86 100644 --- a/browse.py +++ b/browse.py @@ -344,3 +344,5 @@ def api_load_text_obj(ctx, file_path='/'): return api.Error('large_size', 'The given text file is too large to render') except error.UUError: return api.Error('ReadError', 'Could not retrieve file') + except Exception: + return api.Error('not_valid', 'The given data object is not a text file') From 6f57454edc6764930b4c2e02ef75a4ea6b798bce Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 28 Nov 2024 15:55:41 +0100 Subject: [PATCH 17/27] YDA-5992: fix encoding for DataCite payload --- datacite.py | 4 ++-- publication.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datacite.py b/datacite.py index 4dd2f47c4..3f5146f56 100644 --- a/datacite.py +++ b/datacite.py @@ -19,7 +19,7 @@ def metadata_post(ctx, payload): response = requests.post(url, auth=auth, - data=payload, + data=payload.encode(), headers=headers, timeout=30, verify=config.datacite_tls_verify) @@ -35,7 +35,7 @@ def metadata_put(ctx, doi, payload): response = requests.put(url, auth=auth, - data=payload, + data=payload.encode(), headers=headers, timeout=30, verify=config.datacite_tls_verify) diff --git a/publication.py b/publication.py index 522605497..c30aca478 100644 --- a/publication.py +++ b/publication.py @@ -605,7 +605,7 @@ def set_access_restrictions(ctx, vault_package, publication_state): """ # Embargo handling combiJsonPath = publication_state["combiJsonPath"] - dictJsonData = jsonutil.read(ctx, combiJsonPath, want_bytes=False) + dictJsonData = jsonutil.read(ctx, combiJsonPath) # Remove empty objects to prevent empty fields on landingpage. dictJsonData = misc.remove_empty_objects(dictJsonData) From dfb2ae91865de802792c559e3c29b7e20eafc56a Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 28 Nov 2024 16:05:24 +0100 Subject: [PATCH 18/27] YDA-5992: don't try to decode Unicode strings --- json_landing_page.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/json_landing_page.py b/json_landing_page.py index 674865a27..38ff78fee 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -123,9 +123,8 @@ def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_j try: language = "" language_id = json_data["Language"] - # Convert just the language schemas to unicode to handle when a language has non-ascii characters (like Volapรผk) - schema_lang_ids = list(map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enum"])) - schema_lang_names = list(map(lambda x: x.decode("utf-8"), json_schema["definitions"]["optionsISO639-1"]["enumNames"])) + schema_lang_ids = json_schema["definitions"]["optionsISO639-1"]["enum"] + schema_lang_names = json_schema["definitions"]["optionsISO639-1"]["enumNames"] index = schema_lang_ids.index(language_id) # Language variable must be kept in unicode, otherwise landing page fails to build with a language with non-ascii characters language = schema_lang_names[index] From ff7d99ed4be04859a391cbb605847e79f54ea3db Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Fri, 29 Nov 2024 12:44:00 +0100 Subject: [PATCH 19/27] YDA-5992: cleanup and replace itertools.ifilter with filter --- util/collection.py | 4 +--- vault.py | 9 +++------ 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/util/collection.py b/util/collection.py index 711787bb5..74bbb8e79 100644 --- a/util/collection.py +++ b/util/collection.py @@ -5,9 +5,7 @@ import itertools import json -import sys -if sys.version_info > (2, 7): - from functools import reduce +from functools import reduce import genquery import irods_types diff --git a/vault.py b/vault.py index 4ad36ae42..5a02a994d 100644 --- a/vault.py +++ b/vault.py @@ -3,7 +3,6 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' -import itertools import os import re import subprocess @@ -264,13 +263,13 @@ def api_vault_preservable_formats_lists(ctx): @api.make() def api_vault_unpreservable_files(ctx, coll, list_name): - """Retrieve the set of unpreservable file formats in a collection. + """Retrieve list of unpreservable file formats in a collection. :param ctx: Combined type of a callback and rei struct :param coll: Collection of folder to check :param list_name: Name of preservable file format list - :returns: Set of unpreservable file formats + :returns: List of unpreservable file formats """ space, zone, _, _ = pathutil.info(coll) if space not in [pathutil.Space.RESEARCH, pathutil.Space.VAULT]: @@ -285,9 +284,7 @@ def api_vault_unpreservable_files(ctx, coll, list_name): collection.data_objects(ctx, coll, recursive=True)) # Exclude Yoda metadata files - data_names = itertools.ifilter(lambda - x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), - data_names) + data_names = filter(lambda x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), data_names) # Data names -> lowercase extensions, without the dot. exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) From 72a31af25f1cc81d201233da4fca5cfe74d2ae5b Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Mon, 2 Dec 2024 12:15:17 +0100 Subject: [PATCH 20/27] YDA-5992: rearrange admin scripts --- tools/{ => admin}/admin-datarequest-temp-write-permission.sh | 0 tools/{ => admin}/admin-datarequestactions.sh | 0 tools/{ => admin}/admin-remove-orphan-vault-if-empty.sh | 0 .../admin-scheduled-copytovault.sh} | 0 tools/{ => admin}/admin-vault-archive.sh | 0 tools/{ => admin}/admin-vaultactions.sh | 0 tools/{ => admin}/admin-vaultingest.sh | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename tools/{ => admin}/admin-datarequest-temp-write-permission.sh (100%) rename tools/{ => admin}/admin-datarequestactions.sh (100%) rename tools/{ => admin}/admin-remove-orphan-vault-if-empty.sh (100%) rename tools/{scheduled-copytovault.sh => admin/admin-scheduled-copytovault.sh} (100%) mode change 100755 => 100644 rename tools/{ => admin}/admin-vault-archive.sh (100%) rename tools/{ => admin}/admin-vaultactions.sh (100%) rename tools/{ => admin}/admin-vaultingest.sh (100%) diff --git a/tools/admin-datarequest-temp-write-permission.sh b/tools/admin/admin-datarequest-temp-write-permission.sh similarity index 100% rename from tools/admin-datarequest-temp-write-permission.sh rename to tools/admin/admin-datarequest-temp-write-permission.sh diff --git a/tools/admin-datarequestactions.sh b/tools/admin/admin-datarequestactions.sh similarity index 100% rename from tools/admin-datarequestactions.sh rename to tools/admin/admin-datarequestactions.sh diff --git a/tools/admin-remove-orphan-vault-if-empty.sh b/tools/admin/admin-remove-orphan-vault-if-empty.sh similarity index 100% rename from tools/admin-remove-orphan-vault-if-empty.sh rename to tools/admin/admin-remove-orphan-vault-if-empty.sh diff --git a/tools/scheduled-copytovault.sh b/tools/admin/admin-scheduled-copytovault.sh old mode 100755 new mode 100644 similarity index 100% rename from tools/scheduled-copytovault.sh rename to tools/admin/admin-scheduled-copytovault.sh diff --git a/tools/admin-vault-archive.sh b/tools/admin/admin-vault-archive.sh similarity index 100% rename from tools/admin-vault-archive.sh rename to tools/admin/admin-vault-archive.sh diff --git a/tools/admin-vaultactions.sh b/tools/admin/admin-vaultactions.sh similarity index 100% rename from tools/admin-vaultactions.sh rename to tools/admin/admin-vaultactions.sh diff --git a/tools/admin-vaultingest.sh b/tools/admin/admin-vaultingest.sh similarity index 100% rename from tools/admin-vaultingest.sh rename to tools/admin/admin-vaultingest.sh From 2943ea72f0a7275c6a4630c272bdb12a357d92a8 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 3 Dec 2024 10:36:35 +0100 Subject: [PATCH 21/27] YDA-5992: fix urllib quoting --- notifications.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/notifications.py b/notifications.py index ae45d3ee7..c875e9956 100644 --- a/notifications.py +++ b/notifications.py @@ -8,7 +8,7 @@ import random import string import time -import urllib +import urllib.parse from datetime import datetime, timedelta import genquery @@ -86,10 +86,10 @@ def api_notifications_load(ctx, sort_order="desc"): space, _, group, subpath = pathutil.info(notification["target"]) if space is pathutil.Space.RESEARCH: notification["data_package"] = group if subpath == '' else pathutil.basename(subpath) - notification["link"] = "/research/browse?dir=" + urllib.quote("/{}/{}".format(group, subpath)) + notification["link"] = "/research/browse?dir=" + urllib.parse.quote(f"/{group}/{subpath}") elif space is pathutil.Space.VAULT: notification["data_package"] = group if subpath == '' else pathutil.basename(subpath) - notification["link"] = "/vault/browse?dir=" + urllib.quote("/{}/{}".format(group, subpath)) + notification["link"] = "/vault/browse?dir=" + urllib.parse.quote(f"/{group}/{subpath}") # Deposit situation required different information to be presented. if subpath.startswith('deposit-'): From e863948a85c7384b78d3bd9241fd1e5e9f7e62b2 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 3 Dec 2024 12:11:25 +0100 Subject: [PATCH 22/27] YDA-5992: init UUError message --- util/error.py | 3 +++ util/msi.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/util/error.py b/util/error.py index 44b9b0e01..f4f40d981 100644 --- a/util/error.py +++ b/util/error.py @@ -6,6 +6,9 @@ class UUError(Exception): """Generic Python rule error.""" + def __init__(self, message): + self.message = message + super(UUError, self).__init__(message) class UUFileSizeError(UUError): diff --git a/util/msi.py b/util/msi.py index 74bfa7c9f..7835fd34d 100644 --- a/util/msi.py +++ b/util/msi.py @@ -98,7 +98,7 @@ def _make_exception(name, message): data_obj_write, DataObjWriteError = make('DataObjWrite', 'Could not write data object') data_obj_close, DataObjCloseError = make('DataObjClose', 'Could not close data object') data_obj_copy, DataObjCopyError = make('DataObjCopy', 'Could not copy data object') -data_obj_repl, DataObjReplError = make('DataObjRepl', 'Could not replicate data object') +data_obj_repl, DataObjReplError = make('DataObjRepl', 'Could not replicate data object') data_obj_unlink, DataObjUnlinkError = make('DataObjUnlink', 'Could not remove data object') data_obj_rename, DataObjRenameError = make('DataObjRename', 'Could not rename data object') data_obj_chksum, DataObjChksumError = make('DataObjChksum', 'Could not checksum data object') From 991fc21af9e677bb8e4dd854c4553e1b5663d917 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 4 Dec 2024 10:20:35 +0100 Subject: [PATCH 23/27] YDA-5992: limit search for data packages to vault --- publication_troubleshoot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/publication_troubleshoot.py b/publication_troubleshoot.py index 4350888fc..8d2c7e027 100644 --- a/publication_troubleshoot.py +++ b/publication_troubleshoot.py @@ -31,9 +31,11 @@ def find_full_package_path(ctx, package_name, write_stdout): :returns: The full path of the data package if found, otherwise None. """ + user_zone = user.zone(ctx) + try: query_condition = ( - "COLL_NAME like '%{}%'".format(package_name) + "COLL_NAME like '/{}/home/vault-%{}%'".format(user_zone, package_name) ) query_attributes = "COLL_NAME" iter = genquery.row_iterator(query_attributes, query_condition, genquery.AS_LIST, ctx) From b8797bb9a786b25dbb35d5816fef8668e8e7ea03 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 4 Dec 2024 11:19:07 +0100 Subject: [PATCH 24/27] YDA-5992: clean up scheduled admin jobs --- iiFolderStatusTransitions.r | 11 ++--------- policies.py | 10 +++++----- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/iiFolderStatusTransitions.r b/iiFolderStatusTransitions.r index bb7f0bf12..adccc1360 100644 --- a/iiFolderStatusTransitions.r +++ b/iiFolderStatusTransitions.r @@ -18,13 +18,6 @@ iiFolderStatus(*folder, *folderStatus) { } } -# \brief Schedule copy-to-vault (asynchronously). -# -iiScheduleCopyToVault() { - delay ("irods_rule_engine_plugin-irods_rule_language-instance1s") { - msiExecCmd("scheduled-copytovault.sh", "", "", "", 0, *out); - } -} # \brief Schedule copy-to-vault for just one coll (asynchronously). # @@ -32,7 +25,7 @@ iiScheduleCopyToVault() { # iiScheduleCollCopyToVault(*coll) { delay ("irods_rule_engine_plugin-irods_rule_language-instance1s") { - msiExecCmd("scheduled-copytovault.sh", "'*coll'", "", "", 0, *out); + msiExecCmd("admin-scheduled-copytovault.sh", "'*coll'", "", "", 0, *out); } } @@ -332,7 +325,7 @@ iiCanTransitionFolderStatus(*folder, *transitionFrom, *transitionTo, *actor, *al } # Note: The folders will not transition to SECURED status. - if (*transitionFrom == ACCEPTED && *transitionTo == FOLDER) { + if (*transitionFrom == ACCEPTED && *transitionTo == FOLDER) { *allowed = false; *reason = "Only a rodsadmin is allowed to secure a folder to the vault"; succeed; diff --git a/policies.py b/policies.py index b02f39056..0935f8abd 100644 --- a/policies.py +++ b/policies.py @@ -503,17 +503,17 @@ def py_acPreProcForExecCmd(ctx, cmd, args, addr, hint): if not (hint == addr == ''): return policy.fail('Disallowed hint/addr in execcmd') - # allow 'admin-*' scripts, if first arg is the actor username&zone. + # Allow scheduled admin scripts. + if cmd.startswith('admin-scheduled-'): + return policy.succeed() + + # Allow 'admin-*' scripts, if first arg is the actor username&zone. if cmd.startswith('admin-'): if args == str(actor) or args.startswith(str(actor) + ' '): return policy.succeed() else: return policy.fail('Actor not given as first arg to admin- execcmd') - # Allow scheduled scripts. - if cmd.startswith('scheduled-'): - return policy.succeed() - return policy.fail('No execcmd privileges for this command') From 889378874f2175ee6580a84af5107b9005ac3d08 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Thu, 5 Dec 2024 10:47:42 +0100 Subject: [PATCH 25/27] YDA-5992: increase waiting time for archival of deposit --- tests/step_defs/api/test_api_deposit.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/step_defs/api/test_api_deposit.py b/tests/step_defs/api/test_api_deposit.py index 621dcc25c..f17cff44e 100644 --- a/tests/step_defs/api/test_api_deposit.py +++ b/tests/step_defs/api/test_api_deposit.py @@ -55,7 +55,7 @@ def deposit_exists(user): @given('deposit is archived') def deposit_is_archived(user): - time.sleep(15) + time.sleep(30) @given('the Yoda deposit status API is queried', target_fixture="api_response") From 23d6670bf310780aff07ff73c4697241e5d6356e Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Tue, 10 Dec 2024 12:57:34 +0100 Subject: [PATCH 26/27] YDA-5992: add type annotations --- .../workflows/api-and-integration-tests.yml | 2 +- .github/workflows/python.yml | 6 +- admin.py | 2 +- browse.py | 58 +-- data_access_token.py | 23 +- datacite.py | 9 +- datarequest.py | 403 +++++++----------- deposit.py | 25 +- epic.py | 7 +- folder.py | 121 +++--- groups.py | 111 ++--- groups_import.py | 24 +- json_datacite.py | 99 ++--- json_landing_page.py | 19 +- mail.py | 15 +- meta.py | 115 +++-- meta_form.py | 13 +- notifications.py | 27 +- policies.py | 131 ++++-- policies_datamanager.py | 7 +- policies_datapackage_status.py | 21 +- policies_datarequest_status.py | 8 +- policies_folder_status.py | 22 +- policies_intake.py | 16 +- policies_utils.py | 9 +- provenance.py | 15 +- publication.py | 101 +++-- publication_troubleshoot.py | 2 +- replication.py | 28 +- research.py | 60 +-- resources.py | 45 +- revision_strategies.py | 48 ++- revision_utils.py | 28 +- revisions.py | 108 +++-- schema.py | 29 +- schema_transformation.py | 26 +- schema_transformations.py | 23 +- schema_transformations_utils.py | 14 +- settings.py | 8 +- setup.cfg | 14 + sram.py | 21 +- vault.py | 292 +++++-------- vault_archive.py | 46 +- vault_download.py | 16 +- 44 files changed, 1080 insertions(+), 1137 deletions(-) diff --git a/.github/workflows/api-and-integration-tests.yml b/.github/workflows/api-and-integration-tests.yml index e69d7b878..530256846 100644 --- a/.github/workflows/api-and-integration-tests.yml +++ b/.github/workflows/api-and-integration-tests.yml @@ -114,7 +114,7 @@ jobs: cd tests nohup bash -c 'while true ; do sleep 5 ; ../yoda/docker/run-cronjob.sh copytovault >> ../copytovault.log 2>&1 ; ../yoda/docker/run-cronjob.sh publication >> ../publication.log 2>&1 ; done' & test -d mycache || mkdir -p mycache - python3 -m pytest --skip-ui --datarequest --deposit -o cache_dir=mycache --environment environments/docker.json + python3 -m pytest --skip-ui --deposit -o cache_dir=mycache --environment environments/docker.json cat ../copytovault.log cat ../publication.log diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 89a7b1704..3a5b5afa7 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -19,12 +19,16 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell types-requests + python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell mypy types-requests types-python-dateutil - name: Lint with flake8 run: | flake8 --statistics + - name: Check static typing + run: | + mypy . --explicit-package-bases + - name: Check code for common misspellings run: | codespell -q 3 --skip="*.r,*.xsd,*.json" || true diff --git a/admin.py b/admin.py index b869cf880..988a43096 100644 --- a/admin.py +++ b/admin.py @@ -11,7 +11,7 @@ @api.make() -def api_admin_has_access(ctx): +def api_admin_has_access(ctx: rule.Context) -> api.Result: """ Checks if the user has admin access based on user rights or membership in admin-priv group. diff --git a/browse.py b/browse.py index e9a80bf86..aff9e9e1c 100644 --- a/browse.py +++ b/browse.py @@ -5,6 +5,7 @@ import re from collections import OrderedDict +from typing import Dict import magic from genquery import AS_DICT, Query @@ -18,13 +19,13 @@ @api.make() -def api_browse_folder(ctx, - coll='/', - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_browse_folder(ctx: rule.Context, + coll: str = '/', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. :param ctx: Combined type of a callback and rei struct @@ -37,7 +38,7 @@ def api_browse_folder(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} if 'DATA_NAME' in x and 'META_DATA_ATTR_VALUE' in x: @@ -104,13 +105,13 @@ def transform(row): @api.make() -def api_browse_collections(ctx, - coll='/', - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_browse_collections(ctx: rule.Context, + coll: str = '/', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. This function browses a folder and only looks at the collections in it. No dataobjects. @@ -126,7 +127,7 @@ def api_browse_collections(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -184,13 +185,13 @@ def transform(row): @api.make() -def api_search(ctx, - search_string, - search_type='filename', - sort_on='name', - sort_order='asc', - offset=0, - limit=10): +def api_search(ctx: rule.Context, + search_string: str, + search_type: str = 'filename', + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10) -> api.Result: """Get paginated search results, including size/modify date/location information. :param ctx: Combined type of a callback and rei struct @@ -203,7 +204,7 @@ def api_search(ctx, :returns: Dict with paginated search results """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -216,8 +217,7 @@ def transform(row): 'type': 'data', 'size': int(x['DATA_SIZE']), 'modify_time': int(x['DATA_MODIFY_TIME'])} - - if 'COLL_NAME' in x: + elif 'COLL_NAME' in x: _, _, path, subpath = pathutil.info(x['COLL_NAME']) if subpath != '': path = path + "/" + subpath @@ -225,6 +225,8 @@ def transform(row): return {'name': "/{}".format(path), 'type': 'coll', 'modify_time': int(x['COLL_MODIFY_TIME'])} + else: + return {} # Replace, %, _ and \ since iRODS does not handle those correctly. # HdR this can only be done in a situation where search_type is NOT status! @@ -285,7 +287,7 @@ def transform(row): ('items', datas)]) -def _filter_vault_deposit_index(row): +def _filter_vault_deposit_index(row: Dict) -> bool: """This internal function filters out index collections in deposit vault collections. These collections are used internally by Yoda for indexing data package metadata, and should not be displayed. @@ -302,7 +304,7 @@ def _filter_vault_deposit_index(row): @api.make() -def api_load_text_obj(ctx, file_path='/'): +def api_load_text_obj(ctx: rule.Context, file_path: str = '/') -> api.Result: """Retrieve a text file (as a string) in either the research, deposit, or vault space. :param ctx: Combined type of a callback and rei struct diff --git a/data_access_token.py b/data_access_token.py index 29afd5537..1425e6384 100644 --- a/data_access_token.py +++ b/data_access_token.py @@ -7,6 +7,7 @@ import secrets from datetime import datetime, timedelta from traceback import print_exc +from typing import List from pysqlcipher3 import dbapi2 as sqlite3 @@ -19,7 +20,7 @@ @api.make() -def api_token_generate(ctx, label=None): +def api_token_generate(ctx: rule.Context, label: str = "") -> api.Result: """Generates a token for user authentication. :param ctx: Combined type of a callback and rei struct @@ -27,7 +28,7 @@ def api_token_generate(ctx, label=None): :returns: Generated token or API error """ - def generate_token(): + def generate_token() -> str: length = int(config.token_length) token = secrets.token_urlsafe(length) return token[:length] @@ -63,14 +64,13 @@ def generate_token(): @api.make() -def api_token_load(ctx): +def api_token_load(ctx: rule.Context) -> api.Result: """Loads valid tokens of user. :param ctx: Combined type of a callback and rei struct :returns: Valid tokens """ - if not token_database_initialized(): return api.Error('DatabaseError', 'Internal error: token database unavailable') @@ -83,8 +83,8 @@ def api_token_load(ctx): conn.execute("PRAGMA key='%s'" % (config.token_database_password)) for row in conn.execute('''SELECT label, exp_time FROM tokens WHERE user=:user_id AND exp_time > :now''', {"user_id": user_id, "now": datetime.now()}): - exp_time = datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S.%f') - exp_time = exp_time.strftime('%Y-%m-%d %H:%M:%S') + date_time = datetime.strptime(row[1], '%Y-%m-%d %H:%M:%S.%f') + exp_time = date_time.strftime('%Y-%m-%d %H:%M:%S') result.append({"label": row[0], "exp_time": exp_time}) except Exception: print_exc() @@ -98,7 +98,7 @@ def api_token_load(ctx): @api.make() -def api_token_delete(ctx, label): +def api_token_delete(ctx: rule.Context, label: str) -> api.Result: """Deletes a token of the user. :param ctx: Combined type of a callback and rei struct @@ -130,10 +130,10 @@ def api_token_delete(ctx, label): @api.make() -def api_token_delete_expired(ctx): +def api_token_delete_expired(ctx: rule.Context) -> api.Result: """Deletes expired tokens of current user - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :returns: Status of token deletion """ @@ -160,8 +160,9 @@ def api_token_delete_expired(ctx): return result -def get_all_tokens(ctx): +def get_all_tokens(ctx: rule.Context) -> List: """Retrieve all valid tokens. + :param ctx: Combined type of a callback and rei struct :returns: Valid tokens @@ -192,7 +193,7 @@ def get_all_tokens(ctx): return result -def token_database_initialized(): +def token_database_initialized() -> bool: """Checks whether token database has been initialized :returns: Boolean value diff --git a/datacite.py b/datacite.py index 3f5146f56..4c2605ef3 100644 --- a/datacite.py +++ b/datacite.py @@ -5,13 +5,14 @@ import random import string +from typing import Dict import requests from util import * -def metadata_post(ctx, payload): +def metadata_post(payload: Dict) -> int: """Register DOI metadata with DataCite.""" url = "{}/dois".format(config.datacite_rest_api_url) auth = (config.datacite_username, config.datacite_password) @@ -27,7 +28,7 @@ def metadata_post(ctx, payload): return response.status_code -def metadata_put(ctx, doi, payload): +def metadata_put(doi: str, payload: str) -> int: """Update metadata with DataCite.""" url = "{}/dois/{}".format(config.datacite_rest_api_url, doi) auth = (config.datacite_username, config.datacite_password) @@ -43,7 +44,7 @@ def metadata_put(ctx, doi, payload): return response.status_code -def metadata_get(ctx, doi): +def metadata_get(doi: str) -> int: """Check with DataCite if DOI is available.""" url = "{}/dois/{}".format(config.datacite_rest_api_url, doi) auth = (config.datacite_username, config.datacite_password) @@ -58,7 +59,7 @@ def metadata_get(ctx, doi): return response.status_code -def generate_random_id(ctx, length): +def generate_random_id(length: int) -> str: """Generate random ID for DOI.""" characters = string.ascii_uppercase + string.digits return ''.join(random.choice(characters) for x in range(int(length))) diff --git a/datarequest.py b/datarequest.py index 71b98fd14..d4f5a16fb 100644 --- a/datarequest.py +++ b/datarequest.py @@ -10,6 +10,7 @@ from collections import OrderedDict from datetime import datetime from enum import Enum +from typing import Dict, List import jsonschema from genquery import AS_DICT, AS_LIST, Query, row_iterator @@ -198,14 +199,14 @@ class status(Enum): ('DTA_SIGNED', 'DATA_READY')]] -def status_transition_allowed(ctx, current_status, new_status): +def status_transition_allowed(ctx: rule.Context, current_status: status, new_status: status) -> bool: transition = (current_status, new_status) return transition in status_transitions -def status_set(ctx, request_id, status): - """Set the status of a data request +def status_set(ctx: rule.Context, request_id: str, status: status) -> None: + """Set the status of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -214,8 +215,8 @@ def status_set(ctx, request_id, status): metadata_set(ctx, request_id, "status", status.value) -def status_get_from_path(ctx, path): - """Get the status of a datarequest from a path +def status_get_from_path(ctx: rule.Context, path: str) -> status: + """Get the status of a datarequest from a path. :param ctx: Combined type of a callback and rei struct :param path: Path of the datarequest collection @@ -228,8 +229,8 @@ def status_get_from_path(ctx, path): return status_get(ctx, request_id) -def status_get(ctx, request_id): - """Get the status of a data request +def status_get(ctx: rule.Context, request_id: str) -> status: + """Get the status of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -256,8 +257,8 @@ def status_get(ctx, request_id): raise error.UUError("Could not unambiguously determine the current status for datarequest <{}>".format(request_id)) -def type_get(ctx, request_id): - """Get the type of a data request +def type_get(ctx: rule.Context, request_id: str) -> type: + """Get the type of a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -281,8 +282,7 @@ def type_get(ctx, request_id): return datarequest_type -def available_documents_get(ctx, request_id, datarequest_type, datarequest_status): - +def available_documents_get(ctx: rule.Context, request_id: str, datarequest_type: str, datarequest_status: str) -> List: # Construct list of existing documents available_documents = [] if datarequest_type == type.REGULAR.value: @@ -327,15 +327,14 @@ def available_documents_get(ctx, request_id, datarequest_type, datarequest_statu # Helper functions # ################################################### -def metadata_set(ctx, request_id, key, value): - """Set an arbitrary metadata field on a data request +def metadata_set(ctx: rule.Context, request_id: str, key: str, value: str) -> None: + """Set an arbitrary metadata field on a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param key: Key of the metadata field :param value: Value of the metadata field """ - # Construct path to the collection of the data request coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -349,7 +348,7 @@ def metadata_set(ctx, request_id, key, value): ctx.adminDatarequestActions() -def generate_request_id(ctx): +def generate_request_id(ctx: rule.Context) -> int: coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) max_request_id = 0 @@ -362,38 +361,35 @@ def generate_request_id(ctx): @api.make() -def api_datarequest_action_permitted(ctx, request_id, roles, statuses): - """Wrapper around datarequest_action_permitted +def api_datarequest_action_permitted(ctx: rule.Context, request_id: str, roles: List, statuses: List) -> api.Result: + """Wrapper around datarequest_action_permitted. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request + :param roles: List of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) + :param statuses: List of permitted current data request statuses or None (check skipped) - :param roles: Array of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) - :param statuses: Array of permitted current data request statuses or None (check skipped) - - :returns: True if permitted, False if not - :rtype: Boolean + :returns: True if permitted, False if not """ # Convert statuses to list of status enumeration elements if statuses is not None: - def get_status(stat): + def get_status(stat: str) -> status: return status[stat] statuses = list(map(get_status, statuses)) return datarequest_action_permitted(ctx, request_id, roles, statuses) -def datarequest_action_permitted(ctx, request_id, roles, statuses): - """Check if current user and data request status meet specified restrictions +def datarequest_action_permitted(ctx: rule.Context, request_id: str, roles: List, statuses: List | None) -> bool: + """Check if current user and data request status meet specified restrictions. - :param ctx: Combined type of a callback and rei struct - :param request_id: Unique identifier of the data request - :param roles: Array of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) - :param statuses: Array of permitted current data request statuses or None (check skipped) + :param ctx: Combined type of a callback and rei struct + :param request_id: Unique identifier of the data request + :param roles: List of permitted roles (possible values: PM, ED, DM, DAC, OWN, REV) + :param statuses: List of permitted current data request statuses or None (check skipped) - :returns: True if permitted, False if not - :rtype: Boolean + :returns: True if permitted, False if not """ try: # Force conversion of request_id to string @@ -428,28 +424,26 @@ def datarequest_action_permitted(ctx, request_id, roles, statuses): @api.make() -def api_datarequest_roles_get(ctx, request_id=None): - """Get roles of invoking user +def api_datarequest_roles_get(ctx: rule.Context, request_id: str | None = None) -> api.Result: + """Get roles of invoking user. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request (OWN and REV roles will not be checked if this parameter is missing) - :returns: Array of user roles - :rtype: Array + :returns: List of user roles """ return datarequest_roles_get(ctx, request_id) -def datarequest_roles_get(ctx, request_id): - """Get roles of invoking user +def datarequest_roles_get(ctx: rule.Context, request_id: str | None = None) -> List: + """Get roles of invoking user. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request (OWN and REV roles will not be checked if this parameter is missing) - :returns: Array of user roles - :rtype: Array + :returns: List of user roles """ roles = [] if user.is_member_of(ctx, GROUP_PM): @@ -467,27 +461,24 @@ def datarequest_roles_get(ctx, request_id): return roles -def datarequest_is_owner(ctx, request_id): - """Check if the invoking user is also the owner of a given data request +def datarequest_is_owner(ctx: rule.Context, request_id: str) -> bool: + """Check if the invoking user is also the owner of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :return: True if user_name is owner of specified data request else False - :rtype: bool + :return: True if user_name is owner of specified data request else False """ return datarequest_owner_get(ctx, request_id) == user.name(ctx) -def datarequest_owner_get(ctx, request_id): - """Get the account name (i.e. email address) of the owner of a data request +def datarequest_owner_get(ctx: rule.Context, request_id: str) -> str | None: + """Get the account name (i.e. email address) of the owner of a data request. :param ctx: Combined type of a callback and a rei struct :param request_id: Unique identifier of the data request - :type request_id: str - :return: Account name of data request owner - :rtype: string + :return: Account name of data request owner """ # Construct path to the data request file_path = "/{}/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id, DATAREQUEST @@ -500,8 +491,8 @@ def datarequest_owner_get(ctx, request_id): return None -def datarequest_is_reviewer(ctx, request_id, pending=False): - """Check if a user is assigned as reviewer to a data request +def datarequest_is_reviewer(ctx: rule.Context, request_id: str, pending: bool = False) -> bool: + """Check if a user is assigned as reviewer to a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -525,8 +516,8 @@ def datarequest_is_reviewer(ctx, request_id, pending=False): return is_reviewer -def datarequest_reviewers_get(ctx, request_id, pending=False): - """Return a list of users assigned as reviewers to a data request +def datarequest_reviewers_get(ctx: rule.Context, request_id: str, pending: bool = False) -> List[str]: + """Return a list of users assigned as reviewers to a data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request @@ -557,12 +548,12 @@ def datarequest_reviewers_get(ctx, request_id, pending=False): @api.make() -def api_datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): +def api_datarequest_schema_get(ctx: rule.Context, schema_name: str, version: str = SCHEMA_VERSION) -> api.Result: return datarequest_schema_get(ctx, schema_name, version) -def datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): - """Get schema and UI schema of a datarequest form +def datarequest_schema_get(ctx: rule.Context, schema_name: str, version: str = SCHEMA_VERSION) -> api.Result: + """Get schema and UI schema of a datarequest form. :param ctx: Combined type of a callback and rei struct :param schema_name: Name of schema @@ -587,13 +578,13 @@ def datarequest_schema_get(ctx, schema_name, version=SCHEMA_VERSION): @api.make() -def api_datarequest_resubmission_id_get(ctx, request_id): - """Given a request ID, get the request ID of the associated resubmitted data request +def api_datarequest_resubmission_id_get(ctx: rule.Context, request_id: str) -> api.Result: + """Given a request ID, get the request ID of the associated resubmitted data request. - :param ctx: Combined type of a callback and rei struct - :param request_id: Unique identifier of the data request + :param ctx: Combined type of a callback and rei struct + :param request_id: Unique identifier of the data request - :returns: String containing the request ID of the resubmitted data request + :returns: String containing the request ID of the resubmitted data request """ coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) coll_path = list(Query(ctx, ['COLL_NAME'], "COLL_PARENT_NAME = '{}' AND DATA_NAME = '{}' AND META_DATA_ATTR_NAME = 'previous_request_id' AND META_DATA_ATTR_VALUE in '{}'".format(coll, DATAREQUEST + JSON_EXT, request_id), output=AS_DICT)) @@ -605,14 +596,14 @@ def api_datarequest_resubmission_id_get(ctx, request_id): return api.Error("metadata_read_error", "Not exactly 1 match for when searching for data requests with previous_request_id = {}".format(request_id)) -def datarequest_provenance_write(ctx, request_id, request_status): - """Write the timestamp of a status transition to a provenance log +def datarequest_provenance_write(ctx: rule.Context, request_id: str, request_status: status) -> api.Result: + """Write the timestamp of a status transition to a provenance log. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param request_status: Status of which to write a timestamp - :returns: Nothing + :returns: Nothing or API error """ # Check if request ID is valid if re.search(r"^\d+$", request_id) is None: @@ -644,7 +635,7 @@ def datarequest_provenance_write(ctx, request_id, request_status): return api.Error("write_error", "Could not write timestamp to provenance log: {}.".format(e)) -def datarequest_data_valid(ctx, data, schema_name=False, schema=False): +def datarequest_data_valid(ctx: rule.Context, data: Dict, schema_name: str | None = None, schema: str | None = None) -> bool: """Check if form data contains no errors Default mode of operation is to provide schema data and the schema name of the schema against @@ -665,7 +656,7 @@ def datarequest_data_valid(ctx, data, schema_name=False, schema=False): :param schema: JSON schema against which to validate the form data (in case a default schema doesn't suffice) - :returns: Boolean indicating if datarequest is valid or API error + :returns: Boolean indicating if datarequest is valid """ # Check if a schema is specified if not (schema_name or schema): @@ -682,11 +673,10 @@ def datarequest_data_valid(ctx, data, schema_name=False, schema=False): return len(errors) == 0 except error.UUJsonValidationError: # File may be missing or not valid JSON - return api.Error("validation_error", - "{} form data could not be validated against its schema.".format(schema_name)) + return False -def cc_email_addresses_get(contact_object): +def cc_email_addresses_get(contact_object: Dict) -> str | None: try: cc = contact_object['cc_email_addresses'] return cc.replace(' ', '') @@ -695,7 +685,7 @@ def cc_email_addresses_get(contact_object): @rule.make(inputs=[], outputs=[0, 1]) -def rule_datarequest_review_period_expiration_check(ctx): +def rule_datarequest_review_period_expiration_check(ctx: rule.Context) -> None: coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) criteria = "COLL_PARENT_NAME = '{}' AND DATA_NAME = '{}' AND META_DATA_ATTR_NAME = 'endOfReviewPeriod' AND META_DATA_ATTR_VALUE < '{}' AND META_DATA_ATTR_NAME = 'status' AND META_DATA_ATTR_VALUE = 'UNDER_REVIEW'".format(coll, DATAREQUEST + JSON_EXT, int(time.time())) ccols = ['COLL_NAME'] @@ -704,7 +694,7 @@ def rule_datarequest_review_period_expiration_check(ctx): datarequest_process_expired_review_periods(ctx, [result['COLL_NAME'].split('/')[-1] for result in list(qcoll)]) -def datarequest_sync_avus(ctx, request_id): +def datarequest_sync_avus(ctx: rule.Context, request_id: str) -> None: """Sometimes data requests are manually edited in place (e.g. for small textual changes). This in-place editing is done on the datarequest.json file. @@ -741,8 +731,13 @@ def datarequest_sync_avus(ctx, request_id): ################################################### @api.make() -def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limit=10, - archived=False, dacrequests=True): +def api_datarequest_browse(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + archived: bool = False, + dacrequests: bool = True) -> api.Result: """Get paginated datarequests, including size/modify date information. :param ctx: Combined type of a callback and rei struct @@ -755,7 +750,7 @@ def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limi :param dacrequests: If true, show a DAC member's own data requests (instead of data requests to be reviewed - :returns: Dict with paginated datarequests + :returns: Dict with paginated datarequests """ # Convert parameters that couldn't be passed as actual boolean values to booleans archived = archived == "True" @@ -764,7 +759,7 @@ def api_datarequest_browse(ctx, sort_on='name', sort_order='asc', offset=0, limi dac_member = user.is_member_of(ctx, GROUP_DAC) coll = "/{}/{}".format(user.zone(ctx), DRCOLLECTION) - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -773,14 +768,14 @@ def transform(row): 'create_time': int(x['COLL_CREATE_TIME']), 'status': x['META_DATA_ATTR_VALUE']} - def transform_title(row): + def transform_title(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} return {'id': x['COLL_NAME'].split('/')[-1], 'title': x['META_DATA_ATTR_VALUE']} - def transform_status(row): + def transform_status(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} @@ -852,26 +847,25 @@ def transform_status(row): return OrderedDict([('total', qcoll.total_rows()), ('items', colls)]) -def datarequest_process_expired_review_periods(ctx, request_ids): +def datarequest_process_expired_review_periods(ctx: rule.Context, request_ids: List) -> None: """Process expired review periods by setting their status to REVIEWED. :param ctx: Combined type of a callback and rei struct - :param request_ids: Array of unique data request identifiers + :param request_ids: List of unique data request identifiers """ for request_id in request_ids: status_set(ctx, request_id, status.REVIEWED) -def file_write_and_lock(ctx, coll_path, filename, data, readers): +def file_write_and_lock(ctx: rule.Context, coll_path: str, filename: str, data: Dict, readers: List[str]) -> None: """Grant temporary write permission and write file to disk. :param ctx: Combined type of a callback and rei struct :param coll_path: Path to collection of file :param filename: Name of file :param data: The data to be written to disk - :param readers: Array of user names that should be given read access to the file + :param readers: List of user names that should be given read access to the file """ - file_path = "{}/{}".format(coll_path, filename) # Grant temporary write permission @@ -896,7 +890,7 @@ def file_write_and_lock(ctx, coll_path, filename, data, readers): @api.make() -def api_datarequest_submit(ctx, data, draft, draft_request_id=None): +def api_datarequest_submit(ctx: rule.Context, data: Dict, draft: bool, draft_request_id: str | None = None) -> api.Result: """Persist a data request to disk. :param ctx: Combined type of a callback and rei struct @@ -935,7 +929,7 @@ def api_datarequest_submit(ctx, data, draft, draft_request_id=None): request_id = draft_request_id else: # Generate request ID and construct data request collection path. - request_id = generate_request_id(ctx) + request_id = str(generate_request_id(ctx)) # Construct data request collection and file path. coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -1030,7 +1024,7 @@ def api_datarequest_submit(ctx, data, draft, draft_request_id=None): @api.make() -def api_datarequest_get(ctx, request_id): +def api_datarequest_get(ctx: rule.Context, request_id: int) -> api.Result: """Retrieve a data request. :param ctx: Combined type of a callback and rei struct @@ -1039,25 +1033,25 @@ def api_datarequest_get(ctx, request_id): :returns: Dict with request JSON and status or API error on failure """ # Force conversion of request_id to string - request_id = str(request_id) + request_id_str = str(request_id) # Permission check - datarequest_action_permitted(ctx, request_id, ["PM", "DM", "DAC", "OWN"], None) + datarequest_action_permitted(ctx, request_id_str, ["PM", "DM", "DAC", "OWN"], None) # Get request type try: - datarequest_type = type_get(ctx, request_id).value + datarequest_type = type_get(ctx, request_id_str).value except Exception as e: return api.Error("datarequest_type_fail", "Error: {}".format(e)) # Get request status - datarequest_status = status_get(ctx, request_id).value + datarequest_status = status_get(ctx, request_id_str).value # Get list of available documents - datarequest_available_documents = available_documents_get(ctx, request_id, datarequest_type, datarequest_status) + datarequest_available_documents = available_documents_get(ctx, request_id_str, datarequest_type, datarequest_status) # Get request - datarequest_json = datarequest_get(ctx, request_id) + datarequest_json = datarequest_get(ctx, request_id_str) datarequest = json.loads(datarequest_json) # Get request schema version @@ -1080,7 +1074,7 @@ def api_datarequest_get(ctx, request_id): 'requestStatus': datarequest_status, 'requestAvailableDocuments': datarequest_available_documents} -def datarequest_get(ctx, request_id): +def datarequest_get(ctx: rule.Context, request_id: str) -> str | api.Error: """Retrieve a data request. :param ctx: Combined type of a callback and rei struct @@ -1088,9 +1082,6 @@ def datarequest_get(ctx, request_id): :returns: Datarequest JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename and filepath coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = DATAREQUEST + JSON_EXT @@ -1104,18 +1095,15 @@ def datarequest_get(ctx, request_id): @api.make() -def api_datarequest_attachment_upload_permission(ctx, request_id, action): +def api_datarequest_attachment_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("grantread" or "revoke") - :returns: Nothing + :returns: Nothing """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PENDING_ATTACHMENTS]) @@ -1131,16 +1119,13 @@ def api_datarequest_attachment_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_attachment_post_upload_actions(ctx, request_id, filename): +def api_datarequest_attachment_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the attachment to the owner of the associated data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of attachment """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PENDING_ATTACHMENTS]) @@ -1152,31 +1137,28 @@ def api_datarequest_attachment_post_upload_actions(ctx, request_id, filename): @api.make() -def api_datarequest_attachments_get(ctx, request_id): - """Get all attachments of a given data request +def api_datarequest_attachments_get(ctx: rule.Context, request_id: str) -> api.Result: + """Get all attachments of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: List of attachment filenames + :returns: List of attachment filenames """ return datarequest_attachments_get(ctx, request_id) -def datarequest_attachments_get(ctx, request_id): - """Get all attachments of a given data request +def datarequest_attachments_get(ctx: rule.Context, request_id: str) -> List: + """Get all attachments of a given data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: List of attachment filenames + :returns: List of attachment filenames """ - def get_filename(file_path): + def get_filename(file_path: str) -> str: return file_path.split('/')[-1] - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "DAC", "OWN"], None) @@ -1187,7 +1169,7 @@ def get_filename(file_path): @api.make() -def api_datarequest_attachments_submit(ctx, request_id): +def api_datarequest_attachments_submit(ctx: rule.Context, request_id: str) -> api.Result: """Finalize the submission of uploaded attachments :param ctx: Combined type of a callback and rei struct @@ -1209,7 +1191,7 @@ def api_datarequest_attachments_submit(ctx, request_id): @api.make() -def api_datarequest_preliminary_review_submit(ctx, data, request_id): +def api_datarequest_preliminary_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a preliminary review to disk. :param ctx: Combined type of a callback and rei struct @@ -1218,9 +1200,6 @@ def api_datarequest_preliminary_review_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, PR_REVIEW): return api.Error("validation_fail", @@ -1255,7 +1234,7 @@ def api_datarequest_preliminary_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_preliminary_review_get(ctx, request_id): +def api_datarequest_preliminary_review_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preliminary review. :param ctx: Combined type of a callback and rei struct @@ -1263,16 +1242,13 @@ def api_datarequest_preliminary_review_get(ctx, request_id): :returns: Preliminary review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "REV"], None) return datarequest_preliminary_review_get(ctx, request_id) -def datarequest_preliminary_review_get(ctx, request_id): +def datarequest_preliminary_review_get(ctx: rule.Context, request_id: str) -> str | api.Result: """Retrieve a preliminary review. :param ctx: Combined type of a callback and rei struct @@ -1280,9 +1256,6 @@ def datarequest_preliminary_review_get(ctx, request_id): :returns: Preliminary review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = PR_REVIEW + JSON_EXT @@ -1296,7 +1269,7 @@ def datarequest_preliminary_review_get(ctx, request_id): @api.make() -def api_datarequest_datamanager_review_submit(ctx, data, request_id): +def api_datarequest_datamanager_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a datamanager review to disk. :param ctx: Combined type of a callback and rei struct @@ -1305,9 +1278,6 @@ def api_datarequest_datamanager_review_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, DM_REVIEW): return api.Error("validation_fail", @@ -1343,7 +1313,7 @@ def api_datarequest_datamanager_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_datamanager_review_get(ctx, request_id): +def api_datarequest_datamanager_review_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a data manager review. :param ctx: Combined type of a callback and rei struct @@ -1351,9 +1321,6 @@ def api_datarequest_datamanager_review_get(ctx, request_id): :returns: Datamanager review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "REV"], None) @@ -1361,7 +1328,7 @@ def api_datarequest_datamanager_review_get(ctx, request_id): return datarequest_datamanager_review_get(ctx, request_id) -def datarequest_datamanager_review_get(ctx, request_id): +def datarequest_datamanager_review_get(ctx: rule.Context, request_id: str) -> str | api.Result: """Retrieve a data manager review. :param ctx: Combined type of a callback and rei struct @@ -1369,9 +1336,6 @@ def datarequest_datamanager_review_get(ctx, request_id): :returns: Datamanager review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = DM_REVIEW + JSON_EXT @@ -1385,11 +1349,11 @@ def datarequest_datamanager_review_get(ctx, request_id): @api.make() -def api_datarequest_dac_members_get(ctx, request_id): +def api_datarequest_dac_members_get(ctx: rule.Context, request_id: str) -> api.Result: return datarequest_dac_members_get(ctx, request_id) -def datarequest_dac_members_get(ctx, request_id): +def datarequest_dac_members_get(ctx: rule.Context, request_id: str) -> List: """Get list of DAC members :param ctx: Combined type of a callback and rei struct @@ -1408,7 +1372,7 @@ def datarequest_dac_members_get(ctx, request_id): @api.make() -def api_datarequest_assignment_submit(ctx, data, request_id): +def api_datarequest_assignment_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist an assignment to disk. :param ctx: Combined type of a callback and rei struct @@ -1417,9 +1381,6 @@ def api_datarequest_assignment_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema dac_members = datarequest_dac_members_get(ctx, request_id) schema = datarequest_schema_get(ctx, ASSIGNMENT) @@ -1475,7 +1436,7 @@ def api_datarequest_assignment_submit(ctx, data, request_id): return api.Error("InvalidData", "Invalid value for 'decision' key in datamanager review review JSON data.") -def assign_request(ctx, assignees, request_id): +def assign_request(ctx: rule.Context, assignees: str, request_id: str) -> None: """Assign a data request to one or more DAC members for review. :param ctx: Combined type of a callback and rei struct @@ -1509,7 +1470,7 @@ def assign_request(ctx, assignees, request_id): @api.make() -def api_datarequest_assignment_get(ctx, request_id): +def api_datarequest_assignment_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve assignment. :param ctx: Combined type of a callback and rei struct @@ -1517,16 +1478,13 @@ def api_datarequest_assignment_get(ctx, request_id): :returns: Datarequest assignment JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], None) return datarequest_assignment_get(ctx, request_id) -def datarequest_assignment_get(ctx, request_id): +def datarequest_assignment_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an assignment :param ctx: Combined type of a callback and rei struct @@ -1550,7 +1508,7 @@ def datarequest_assignment_get(ctx, request_id): @api.make() -def api_datarequest_review_submit(ctx, data, request_id): +def api_datarequest_review_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a data request review to disk. :param ctx: Combined type of a callback and rei struct @@ -1559,9 +1517,6 @@ def api_datarequest_review_submit(ctx, data, request_id): :returns: A JSON dict with status info for the front office """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, REVIEW): return api.Error("validation_fail", @@ -1616,7 +1571,7 @@ def api_datarequest_review_submit(ctx, data, request_id): @api.make() -def api_datarequest_reviews_get(ctx, request_id): +def api_datarequest_reviews_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a data request review. :param ctx: Combined type of a callback and rei struct @@ -1624,9 +1579,6 @@ def api_datarequest_reviews_get(ctx, request_id): :returns: Datarequest review JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "REV"], None) @@ -1650,7 +1602,7 @@ def api_datarequest_reviews_get(ctx, request_id): @api.make() -def api_datarequest_evaluation_submit(ctx, data, request_id): +def api_datarequest_evaluation_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist an evaluation to disk. :param ctx: Combined type of a callback and rei struct @@ -1658,10 +1610,9 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): :param request_id: Unique identifier of the data request :returns: API status - """ - # Force conversion of request_id to string - request_id = str(request_id) + :raises UUError: If datarequest owner could not be determined + """ # Validate data against schema if not datarequest_data_valid(ctx, data, EVALUATION): return api.Error("validation_fail", @@ -1676,8 +1627,11 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): # Write approval conditions to disk if applicable if 'approval_conditions' in data: try: + datarequest_owner = datarequest_owner_get(ctx, request_id) + if datarequest_owner is None: + raise error.UUError file_write_and_lock(ctx, coll_path, APPROVAL_CONDITIONS + JSON_EXT, - data['approval_conditions'], [datarequest_owner_get(ctx, request_id)]) + data['approval_conditions'], [datarequest_owner]) except error.UUError: return api.Error('write_error', 'Could not write approval conditions to disk') @@ -1709,7 +1663,7 @@ def api_datarequest_evaluation_submit(ctx, data, request_id): @api.make() -def api_datarequest_approval_conditions_get(ctx, request_id): +def api_datarequest_approval_conditions_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve approval conditions :param ctx: Combined type of a callback and rei struct @@ -1717,9 +1671,6 @@ def api_datarequest_approval_conditions_get(ctx, request_id): :returns: Approval conditions JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], None) @@ -1741,7 +1692,7 @@ def api_datarequest_approval_conditions_get(ctx, request_id): @api.make() -def api_datarequest_evaluation_get(ctx, request_id): +def api_datarequest_evaluation_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an evaluation. :param ctx: Combined type of a callback and rei struct @@ -1749,16 +1700,13 @@ def api_datarequest_evaluation_get(ctx, request_id): :returns: Evaluation JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DAC"], None) return datarequest_evaluation_get(ctx, request_id) -def datarequest_evaluation_get(ctx, request_id): +def datarequest_evaluation_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve an evaluation :param ctx: Combined type of a callback and rei struct @@ -1766,9 +1714,6 @@ def datarequest_evaluation_get(ctx, request_id): :returns: Evaluation JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = EVALUATION + JSON_EXT @@ -1781,7 +1726,7 @@ def datarequest_evaluation_get(ctx, request_id): return api.Error("ReadError", "Could not get evaluation data.") -def datarequest_feedback_write(ctx, request_id, feedback): +def datarequest_feedback_write(ctx: rule.Context, request_id: str, feedback: str) -> api.Result: """ Write feedback to researcher to a separate file and grant the researcher read access :param ctx: Combined type of a callback and rei struct @@ -1790,9 +1735,6 @@ def datarequest_feedback_write(ctx, request_id, feedback): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct path to feedback file coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) @@ -1811,7 +1753,7 @@ def datarequest_feedback_write(ctx, request_id, feedback): @api.make() -def api_datarequest_feedback_get(ctx, request_id): +def api_datarequest_feedback_get(ctx: rule.Context, request_id: str) -> api.Result: """Get feedback for researcher :param ctx: Combined type of a callback and rei struct @@ -1819,9 +1761,6 @@ def api_datarequest_feedback_get(ctx, request_id): :returns: JSON-formatted string containing feedback for researcher """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.PRELIMINARY_REJECT, status.PRELIMINARY_RESUBMIT, @@ -1841,7 +1780,7 @@ def api_datarequest_feedback_get(ctx, request_id): @api.make() -def api_datarequest_preregistration_submit(ctx, data, request_id): +def api_datarequest_preregistration_submit(ctx: rule.Context, data: Dict, request_id: str) -> api.Result: """Persist a preregistration to disk. :param ctx: Combined type of a callback and rei struct @@ -1850,9 +1789,6 @@ def api_datarequest_preregistration_submit(ctx, data, request_id): :returns: API status """ - # Force conversion of request_id to string - request_id = str(request_id) - # Validate data against schema if not datarequest_data_valid(ctx, data, PREREGISTRATION): return api.Error("validation_fail", @@ -1875,7 +1811,7 @@ def api_datarequest_preregistration_submit(ctx, data, request_id): @api.make() -def api_datarequest_preregistration_get(ctx, request_id): +def api_datarequest_preregistration_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preregistration. :param ctx: Combined type of a callback and rei struct @@ -1883,16 +1819,13 @@ def api_datarequest_preregistration_get(ctx, request_id): :returns: Preregistration JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], None) return datarequest_preregistration_get(ctx, request_id) -def datarequest_preregistration_get(ctx, request_id): +def datarequest_preregistration_get(ctx: rule.Context, request_id: str) -> api.Result: """Retrieve a preregistration. :param ctx: Combined type of a callback and rei struct @@ -1900,9 +1833,6 @@ def datarequest_preregistration_get(ctx, request_id): :returns: Preregistration JSON or API error on failure """ - # Force conversion of request_id to string - request_id = str(request_id) - # Construct filename coll_path = "/{}/{}/{}".format(user.zone(ctx), DRCOLLECTION, request_id) file_name = PREREGISTRATION + JSON_EXT @@ -1916,15 +1846,12 @@ def datarequest_preregistration_get(ctx, request_id): @api.make() -def api_datarequest_preregistration_confirm(ctx, request_id): +def api_datarequest_preregistration_confirm(ctx: rule.Context, request_id: str) -> api.Result: """Set the status of a submitted datarequest to PREREGISTRATION_CONFIRMED. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM"], [status.PREREGISTRATION_SUBMITTED]) @@ -1932,18 +1859,15 @@ def api_datarequest_preregistration_confirm(ctx, request_id): @api.make() -def api_datarequest_dta_upload_permission(ctx, request_id, action): +def api_datarequest_dta_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("revoke") - :returns: Nothing + :returns: API result """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.APPROVED, status.DAO_APPROVED]) @@ -1958,16 +1882,15 @@ def api_datarequest_dta_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_dta_post_upload_actions(ctx, request_id, filename): +def api_datarequest_dta_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the DTA to the owner of the associated data request. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of DTA - """ - # Force conversion of request_id to string - request_id = str(request_id) + :returns: API result + """ # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.APPROVED, status.DAO_APPROVED]) @@ -1981,25 +1904,22 @@ def api_datarequest_dta_post_upload_actions(ctx, request_id, filename): # Set status to dta_ready status_set(ctx, request_id, status.DTA_READY) + return api.OK() @api.make() -def api_datarequest_dta_path_get(ctx, request_id): +def api_datarequest_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: return datarequest_dta_path_get(ctx, request_id) -def datarequest_dta_path_get(ctx, request_id): - - """Get path to DTA +def datarequest_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: + """Get path to DTA. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request - :returns: Path to DTA + :returns: Path to DTA """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "OWN"], None) @@ -2008,18 +1928,15 @@ def datarequest_dta_path_get(ctx, request_id): @api.make() -def api_datarequest_signed_dta_upload_permission(ctx, request_id, action): +def api_datarequest_signed_dta_upload_permission(ctx: rule.Context, request_id: str, action: str) -> api.Result: """ :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param action: String specifying whether write permission must be granted ("grant") or revoked ("revoke") - :returns: Nothing + :returns: API result """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.DTA_READY]) @@ -2033,16 +1950,15 @@ def api_datarequest_signed_dta_upload_permission(ctx, request_id, action): @api.make() -def api_datarequest_signed_dta_post_upload_actions(ctx, request_id, filename): +def api_datarequest_signed_dta_post_upload_actions(ctx: rule.Context, request_id: str, filename: str) -> api.Result: """Grant read permissions on the signed DTA to the datamanagers group. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request :param filename: Filename of signed DTA - """ - # Force conversion of request_id to string - request_id = str(request_id) + :return: API result + """ # Permission check datarequest_action_permitted(ctx, request_id, ["OWN"], [status.DTA_READY]) @@ -2055,10 +1971,11 @@ def api_datarequest_signed_dta_post_upload_actions(ctx, request_id, filename): # Set status to dta_signed status_set(ctx, request_id, status.DTA_SIGNED) + return api.OK() @api.make() -def api_datarequest_signed_dta_path_get(ctx, request_id): +def api_datarequest_signed_dta_path_get(ctx: rule.Context, request_id: str) -> api.Result: """Get path to signed DTA :param ctx: Combined type of a callback and rei struct @@ -2066,9 +1983,6 @@ def api_datarequest_signed_dta_path_get(ctx, request_id): :returns: Path to signed DTA """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["PM", "DM", "OWN"], None) @@ -2077,15 +1991,12 @@ def api_datarequest_signed_dta_path_get(ctx, request_id): @api.make() -def api_datarequest_data_ready(ctx, request_id): +def api_datarequest_data_ready(ctx: rule.Context, request_id: str) -> api.Result: """Set the status of a submitted datarequest to DATA_READY. :param ctx: Combined type of a callback and rei struct :param request_id: Unique identifier of the data request """ - # Force conversion of request_id to string - request_id = str(request_id) - # Permission check datarequest_action_permitted(ctx, request_id, ["DM"], [status.DTA_SIGNED]) @@ -2096,14 +2007,14 @@ def api_datarequest_data_ready(ctx, request_id): # Email logic # ################################################### -def truncated_title_get(ctx, request_id): +def truncated_title_get(ctx: rule.Context, request_id: str) -> str: datarequest = json.loads(datarequest_get(ctx, request_id)) study_title = datarequest['datarequest']['study_information']['title'] return study_title if len(study_title) < 16 else study_title[0:15] + "..." -def send_emails(ctx, obj_name, status_to): +def send_emails(ctx: rule.Context, obj_name: str, status_to: str) -> None: # Get request ID temp, _ = pathutil.chop(obj_name) _, request_id = pathutil.chop(temp) @@ -2160,7 +2071,7 @@ def send_emails(ctx, obj_name, status_to): data_ready_emails(ctx, request_id) -def datarequest_submit_emails(ctx, request_id, dao=False): +def datarequest_submit_emails(ctx: rule.Context, request_id: str, dao: bool = False) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2190,7 +2101,7 @@ def datarequest_submit_emails(ctx, request_id, dao=False): researcher['department'], timestamp, study_title) -def preliminary_review_emails(ctx, request_id, datarequest_status): +def preliminary_review_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datamanager_members = group.members(ctx, GROUP_DM) truncated_title = truncated_title_get(ctx, request_id) @@ -2222,7 +2133,7 @@ def preliminary_review_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def datamanager_review_emails(ctx, request_id, datarequest_status): +def datamanager_review_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters pm_members = group.members(ctx, GROUP_PM) datamanager_review = json.loads(datarequest_datamanager_review_get(ctx, request_id)) @@ -2242,7 +2153,7 @@ def datamanager_review_emails(ctx, request_id, datarequest_status): request_id) -def assignment_emails(ctx, request_id, datarequest_status): +def assignment_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2275,7 +2186,7 @@ def assignment_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def review_emails(ctx, request_id): +def review_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2292,7 +2203,7 @@ def review_emails(ctx, request_id): mail_review_pm(ctx, truncated_title, pm_email, request_id) -def evaluation_emails(ctx, request_id, datarequest_status): +def evaluation_emails(ctx: rule.Context, request_id: str, datarequest_status: status) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2315,7 +2226,7 @@ def evaluation_emails(ctx, request_id, datarequest_status): feedback_for_researcher, pm_email, request_id, cc) -def preregistration_submit_emails(ctx, request_id): +def preregistration_submit_emails(ctx: rule.Context, request_id: str) -> None: # Get parameters truncated_title = truncated_title_get(ctx, request_id) @@ -2324,7 +2235,7 @@ def preregistration_submit_emails(ctx, request_id): mail_preregistration_submit(ctx, truncated_title, pm_email, request_id) -def datarequest_approved_emails(ctx, request_id, dao=False): +def datarequest_approved_emails(ctx: rule.Context, request_id: str, dao: bool = False) -> None: # Get parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2347,7 +2258,7 @@ def datarequest_approved_emails(ctx, request_id, dao=False): request_id) -def dta_post_upload_actions_emails(ctx, request_id): +def dta_post_upload_actions_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] @@ -2362,7 +2273,7 @@ def dta_post_upload_actions_emails(ctx, request_id): mail_dta(ctx, truncated_title, researcher_email, researcher['name'], request_id, cc) -def signed_dta_post_upload_actions_emails(ctx, request_id): +def signed_dta_post_upload_actions_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datamanager_members = group.members(ctx, GROUP_DM) authoring_dm = data_object.owner(ctx, datarequest_dta_path_get(ctx, request_id))[0] @@ -2375,7 +2286,7 @@ def signed_dta_post_upload_actions_emails(ctx, request_id): mail_signed_dta(ctx, truncated_title, authoring_dm, datamanager_email, request_id, cc) -def data_ready_emails(ctx, request_id): +def data_ready_emails(ctx: rule.Context, request_id: str) -> None: # Get (source data for) email input parameters datarequest = json.loads(datarequest_get(ctx, request_id)) researcher = datarequest['contact']['principal_investigator'] diff --git a/deposit.py b/deposit.py index 69161a9f2..efbc0aadc 100644 --- a/deposit.py +++ b/deposit.py @@ -5,6 +5,7 @@ import re from collections import OrderedDict +from typing import Dict import genquery from genquery import AS_DICT, Query @@ -24,7 +25,7 @@ @api.make() -def api_deposit_copy_data_package(ctx, reference, deposit_group): +def api_deposit_copy_data_package(ctx: rule.Context, reference: str, deposit_group: str) -> api.Result: """Create deposit collection and copies selected datapackage into the newly created deposit :param ctx: Combined type of a callback and rei struct @@ -82,7 +83,7 @@ def api_deposit_copy_data_package(ctx, reference, deposit_group): @api.make() -def api_deposit_create(ctx, deposit_group): +def api_deposit_create(ctx: rule.Context, deposit_group: str) -> api.Result: """Create deposit collection through API :param ctx: Combined type of a callback and rei struct @@ -98,7 +99,7 @@ def api_deposit_create(ctx, deposit_group): return {"deposit_path": result["deposit_path"]} -def deposit_create(ctx, deposit_group): +def deposit_create(ctx: rule.Context, deposit_group: str | None) -> Dict: """Create deposit collection. :param ctx: Combined type of a callback and rei struct @@ -139,7 +140,7 @@ def deposit_create(ctx, deposit_group): @api.make() -def api_deposit_status(ctx, path): +def api_deposit_status(ctx: rule.Context, path: str) -> api.Result: """Retrieve status of deposit. :param ctx: Combined type of a callback and rei struct @@ -174,7 +175,7 @@ def api_deposit_status(ctx, path): @api.make() -def api_deposit_submit(ctx, path): +def api_deposit_submit(ctx: rule.Context, path: str) -> api.Result: """Submit deposit collection. :param ctx: Combined type of a callback and rei struct @@ -195,12 +196,12 @@ def api_deposit_submit(ctx, path): @api.make() -def api_deposit_overview(ctx, - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - space=pathutil.Space.OTHER.value): +def api_deposit_overview(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + space: str = pathutil.Space.OTHER.value) -> api.Result: """Get paginated collection contents, including size/modify date information. This function browses a folder and only looks at the collections in it. No dataobjects. @@ -215,7 +216,7 @@ def api_deposit_overview(ctx, :returns: Dict with paginated collection contents """ - def transform(row): + def transform(row: Dict) -> Dict: # Remove ORDER_BY etc. wrappers from column names. x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()} diff --git a/epic.py b/epic.py index 335c9288b..209fb799a 100644 --- a/epic.py +++ b/epic.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import uuid +from typing import Dict import publication from util import * @@ -11,7 +12,7 @@ __all__ = ['rule_generate_uuid'] -def generate_uuid(ctx): +def generate_uuid(ctx: rule.Context) -> str: """Generate random ID for DOI.""" randomuuid = str(uuid.uuid4()) return randomuuid.upper() @@ -20,7 +21,7 @@ def generate_uuid(ctx): rule_generate_uuid = rule.make(inputs=[], outputs=[0])(generate_uuid) -def register_epic_pid(ctx, target): +def register_epic_pid(ctx: rule.Context, target: str) -> Dict: """Create and try to register an EPIC PID. :param ctx: Combined type of a callback and rei struct @@ -43,7 +44,7 @@ def register_epic_pid(ctx, target): 'httpCode': ret['arguments'][2]} -def save_epic_pid(ctx, target, url, pid): +def save_epic_pid(ctx: rule.Context, target: str, url: str, pid: str) -> None: """Save persistent EPIC ID. :param ctx: Combined type of a callback and rei struct diff --git a/folder.py b/folder.py index bec8e82d5..ec922b3b8 100644 --- a/folder.py +++ b/folder.py @@ -5,6 +5,7 @@ import time import uuid +from typing import List, Tuple import genquery import irods_types @@ -28,7 +29,7 @@ 'rule_folder_secure'] -def set_status(ctx, coll, status): +def set_status(ctx: rule.Context, coll: str, status: constants.research_package_state) -> api.Result: """Change a folder's status. Status changes are validated by policy (AVU modify preproc). @@ -68,7 +69,7 @@ def set_status(ctx, coll, status): return api.Result.ok() -def set_status_as_datamanager(ctx, coll, status): +def set_status_as_datamanager(ctx: rule.Context, coll: str, status: constants.research_package_state) -> api.Result: """Change a folder's status as a datamanager. :param ctx: Combined type of a callback and rei struct @@ -87,7 +88,7 @@ def set_status_as_datamanager(ctx, coll, status): @api.make() -def api_folder_lock(ctx, coll): +def api_folder_lock(ctx: rule.Context, coll: str) -> api.Result: """Lock a folder. :param ctx: Combined type of a callback and rei struct @@ -99,7 +100,7 @@ def api_folder_lock(ctx, coll): @api.make() -def api_folder_unlock(ctx, coll): +def api_folder_unlock(ctx: rule.Context, coll: str) -> api.Result: """Unlock a folder. Unlocking is implemented by clearing the folder status. Since this action @@ -119,7 +120,7 @@ def api_folder_unlock(ctx, coll): @api.make() -def api_folder_submit(ctx, coll): +def api_folder_submit(ctx: rule.Context, coll: str) -> api.Result: """Submit a folder. :param ctx: Combined type of a callback and rei struct @@ -131,7 +132,7 @@ def api_folder_submit(ctx, coll): @api.make() -def api_folder_unsubmit(ctx, coll): +def api_folder_unsubmit(ctx: rule.Context, coll: str) -> api.Result: """Unsubmit a folder. :param ctx: Combined type of a callback and rei struct @@ -147,7 +148,7 @@ def api_folder_unsubmit(ctx, coll): @api.make() -def api_folder_accept(ctx, coll): +def api_folder_accept(ctx: rule.Context, coll: str) -> api.Result: """Accept a folder. :param ctx: Combined type of a callback and rei struct @@ -159,7 +160,7 @@ def api_folder_accept(ctx, coll): @api.make() -def api_folder_reject(ctx, coll): +def api_folder_reject(ctx: rule.Context, coll: str) -> api.Result: """Reject a folder. :param ctx: Combined type of a callback and rei struct @@ -171,7 +172,7 @@ def api_folder_reject(ctx, coll): @rule.make(inputs=[0], outputs=[1]) -def rule_folder_secure(ctx, coll): +def rule_folder_secure(ctx: rule.Context, coll: str) -> str: """Rule interface for processing vault status transition request. :param ctx: Combined type of a callback and rei struct :param coll: Collection to be copied to vault @@ -188,7 +189,7 @@ def rule_folder_secure(ctx, coll): return '1' -def precheck_folder_secure(ctx, coll): +def precheck_folder_secure(ctx: rule.Context, coll: str) -> bool: """Whether to continue with securing. Should not touch the retry attempts, these are prechecks and don't count toward the retry attempts limit @@ -210,7 +211,7 @@ def precheck_folder_secure(ctx, coll): return True -def folder_secure(ctx, coll): +def folder_secure(ctx: rule.Context, coll: str) -> bool: """Secure a folder to the vault. If the previous copy did not finish, retry This function should only be called by a rodsadmin @@ -292,7 +293,7 @@ def folder_secure(ctx, coll): return True -def check_folder_secure(ctx, coll): +def check_folder_secure(ctx: rule.Context, coll: str) -> bool: """Some initial set up that determines whether folder secure can continue. These WILL affect the retry attempts. @@ -309,7 +310,7 @@ def check_folder_secure(ctx, coll): return True -def correct_copytovault_start_status(ctx, coll): +def correct_copytovault_start_status(ctx: rule.Context, coll: str) -> bool: """Confirm that the copytovault cronjob avu status is correct state to start securing""" cronjob_status = get_cronjob_status(ctx, coll) if cronjob_status in (constants.CRONJOB_STATE['PENDING'], constants.CRONJOB_STATE['RETRY']): @@ -318,7 +319,7 @@ def correct_copytovault_start_status(ctx, coll): return False -def correct_copytovault_start_location(coll): +def correct_copytovault_start_location(coll: str) -> bool: """Confirm that the folder to be copied is in the correct location. For example: in a research or deposit folder and not in the trash. @@ -330,7 +331,7 @@ def correct_copytovault_start_location(coll): return space in (pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT) -def get_last_run_time(ctx, coll): +def get_last_run_time(ctx: rule.Context, coll: str) -> Tuple[bool, int]: """Get the last run time, if found""" found = False last_run = 1 @@ -346,13 +347,13 @@ def get_last_run_time(ctx, coll): return found, last_run -def set_last_run_time(ctx, coll): +def set_last_run_time(ctx: rule.Context, coll: str) -> bool: """Set last run time, return True for successful set""" now = int(time.time()) return avu.set_on_coll(ctx, coll, constants.IICOPYLASTRUN, str(now), True) -def set_can_modify(ctx, coll): +def set_can_modify(ctx: rule.Context, coll: str) -> bool: """Check if have permission to modify, set if necessary""" check_access_result = msi.check_access(ctx, coll, 'modify_object', irods_types.BytesBuf()) modify_access = check_access_result['arguments'][2] @@ -367,7 +368,7 @@ def set_can_modify(ctx, coll): return True -def get_retry_count(ctx, coll): +def get_retry_count(ctx: rule.Context, coll: str) -> int: """ Get the retry count, if not such AVU, return 0 """ retry_count = 0 iter = genquery.row_iterator( @@ -381,7 +382,7 @@ def get_retry_count(ctx, coll): return retry_count -def retry_attempts(ctx, coll): +def retry_attempts(ctx: rule.Context, coll: str) -> bool: """ Check if there have been too many retries. """ retry_count = get_retry_count(ctx, coll) @@ -391,7 +392,7 @@ def retry_attempts(ctx, coll): return True -def folder_secure_succeed_avus(ctx, coll, group_name): +def folder_secure_succeed_avus(ctx: rule.Context, coll: str, group_name: str) -> bool: """Set/rm AVUs on source folder when successfully secured folder""" attributes = [x[0] for x in get_org_metadata(ctx, coll)] @@ -425,7 +426,7 @@ def folder_secure_succeed_avus(ctx, coll, group_name): return True -def folder_secure_set_retry(ctx, coll): +def folder_secure_set_retry(ctx: rule.Context, coll: str) -> None: # When a folder secure fails, try to set the retry AVU and other applicable AVUs on source folder. # If too many attempts, fail. new_retry_count = get_retry_count(ctx, coll) + 1 @@ -436,12 +437,12 @@ def folder_secure_set_retry(ctx, coll): send_folder_secure_notification(ctx, coll, "Failed to set retry state on data package") -def folder_secure_set_retry_avus(ctx, coll, retry_count): +def folder_secure_set_retry_avus(ctx: rule.Context, coll: str, retry_count: int) -> bool: avu.set_on_coll(ctx, coll, constants.IICOPYRETRYCOUNT, str(retry_count), True) return set_cronjob_status(ctx, constants.CRONJOB_STATE['RETRY'], coll) -def folder_secure_fail(ctx, coll): +def folder_secure_fail(ctx: rule.Context, coll: str) -> None: """When there are too many retries, give up, set the AVUs and send notifications""" # Errors are caught here in hopes that will still be able to set UNRECOVERABLE status at least avu.rmw_from_coll(ctx, coll, constants.IICOPYRETRYCOUNT, "%", True) @@ -450,7 +451,7 @@ def folder_secure_fail(ctx, coll): set_cronjob_status(ctx, constants.CRONJOB_STATE['UNRECOVERABLE'], coll) -def send_folder_secure_notification(ctx, coll, message): +def send_folder_secure_notification(ctx: rule.Context, coll: str, message: str) -> None: """Send notification about folder secure to relevant datamanagers""" if datamanager_exists(ctx, coll): datamanagers = get_datamanagers(ctx, coll) @@ -459,7 +460,7 @@ def send_folder_secure_notification(ctx, coll, message): notifications.set(ctx, "system", datamanager, coll, message) -def set_epic_pid(ctx, target): +def set_epic_pid(ctx: rule.Context, target: str) -> bool: """Try to set epic pid, if fails return False""" if config.epic_pid_enabled: ret = epic.register_epic_pid(ctx, target) @@ -479,7 +480,7 @@ def set_epic_pid(ctx, target): return True -def get_cronjob_status(ctx, coll): +def get_cronjob_status(ctx: rule.Context, coll: str) -> str | None: """Get the cronjob status of given collection""" iter = genquery.row_iterator( "META_COLL_ATTR_VALUE", @@ -489,8 +490,10 @@ def get_cronjob_status(ctx, coll): for row in iter: return row[0] + return None -def rm_cronjob_status(ctx, coll): + +def rm_cronjob_status(ctx: rule.Context, coll: str) -> bool: """Remove cronjob_copy_to_vault attribute on source collection :param ctx: Combined type of a callback and rei struct @@ -501,7 +504,7 @@ def rm_cronjob_status(ctx, coll): return avu.rmw_from_coll(ctx, coll, constants.UUORGMETADATAPREFIX + "cronjob_copy_to_vault", "%", True) -def set_cronjob_status(ctx, status, coll): +def set_cronjob_status(ctx: rule.Context, status: str, coll: str) -> bool: """Set cronjob_copy_to_vault attribute on source collection :param ctx: Combined type of a callback and rei struct @@ -513,7 +516,7 @@ def set_cronjob_status(ctx, status, coll): return avu.set_on_coll(ctx, coll, constants.UUORGMETADATAPREFIX + "cronjob_copy_to_vault", status, True) -def set_acl_parents(ctx, acl_recurse, acl_type, coll): +def set_acl_parents(ctx: rule.Context, acl_recurse: str, acl_type: str, coll: str) -> None: """Set ACL for parent collections""" parent, _ = pathutil.chop(coll) while parent != "/" + user.zone(ctx) + "/home": @@ -521,7 +524,7 @@ def set_acl_parents(ctx, acl_recurse, acl_type, coll): parent, _ = pathutil.chop(parent) -def set_acl_check(ctx, acl_recurse, acl_type, coll, error_msg=''): +def set_acl_check(ctx: rule.Context, acl_recurse: str, acl_type: str, coll: str, error_msg: str = '') -> bool: """Set the ACL if possible, log error_msg if it goes wrong""" # TODO turn acl_recurse into a boolean try: @@ -534,7 +537,7 @@ def set_acl_check(ctx, acl_recurse, acl_type, coll, error_msg=''): return True -def get_existing_vault_target(ctx, coll): +def get_existing_vault_target(ctx: rule.Context, coll: str) -> Tuple[bool, str]: """Determine vault target on coll, if it was already determined before """ found = False target = "" @@ -550,7 +553,7 @@ def get_existing_vault_target(ctx, coll): return found, target -def set_vault_target(ctx, coll, target): +def set_vault_target(ctx: rule.Context, coll: str, target: str) -> bool: """Create vault target and AVUs""" msi.coll_create(ctx, target, '', irods_types.BytesBuf()) if not avu.set_on_coll(ctx, target, constants.IIVAULTSTATUSATTRNAME, constants.vault_package_state.INCOMPLETE, True): @@ -563,7 +566,7 @@ def set_vault_target(ctx, coll, target): return True -def determine_and_set_vault_target(ctx, coll): +def determine_and_set_vault_target(ctx: rule.Context, coll: str) -> str: """Determine and set target on coll""" found, target = get_existing_vault_target(ctx, coll) @@ -581,7 +584,7 @@ def determine_and_set_vault_target(ctx, coll): return target -def determine_new_vault_target(ctx, folder): +def determine_new_vault_target(ctx: rule.Context, folder: str) -> str: """Determine vault target path for a folder.""" group = collection_group_name(ctx, folder) @@ -614,7 +617,7 @@ def determine_new_vault_target(ctx, folder): return target -def collection_group_name(callback, coll): +def collection_group_name(ctx: rule.Context, coll: str) -> str: """Return the name of the group a collection belongs to.""" if pathutil.info(coll).space is pathutil.Space.DEPOSIT: @@ -624,7 +627,7 @@ def collection_group_name(callback, coll): iter = genquery.row_iterator( "COLL_ACCESS_USER_ID", "COLL_NAME = '{}'".format(coll), - genquery.AS_LIST, callback + genquery.AS_LIST, ctx ) for row in iter: @@ -634,7 +637,7 @@ def collection_group_name(callback, coll): iter2 = genquery.row_iterator( "USER_GROUP_NAME", "USER_GROUP_ID = '{}'".format(id), - genquery.AS_LIST, callback + genquery.AS_LIST, ctx ) for row2 in iter2: @@ -654,14 +657,14 @@ def collection_group_name(callback, coll): return group_name # No results found. Not a group folder - log.write(callback, "{} does not belong to a research or intake group or is not available to current user.".format(coll)) + log.write(ctx, "{} does not belong to a research or intake group or is not available to current user.".format(coll)) return "" rule_collection_group_name = rule.make(inputs=[0], outputs=[1])(collection_group_name) -def get_org_metadata(ctx, path, object_type=pathutil.ObjectType.COLL): +def get_org_metadata(ctx: rule.Context, path: str, object_type: pathutil.ObjectType = pathutil.ObjectType.COLL) -> List[Tuple[str, str]]: """Obtain a (k,v) list of all organisation metadata on a given collection or data object.""" typ = 'DATA' if object_type is pathutil.ObjectType.DATA else 'COLL' @@ -673,7 +676,7 @@ def get_org_metadata(ctx, path, object_type=pathutil.ObjectType.COLL): else " AND COLL_NAME = '{}'".format(path)))] -def get_locks(ctx, path, org_metadata=None, object_type=pathutil.ObjectType.COLL): +def get_locks(ctx: rule.Context, path: str, org_metadata: List[Tuple[str, str]] | None = None, object_type: pathutil.ObjectType = pathutil.ObjectType.COLL) -> List[str]: """Return all locks on a collection or data object (includes locks on parents and children).""" if org_metadata is None: org_metadata = get_org_metadata(ctx, path, object_type=object_type) @@ -684,7 +687,7 @@ def get_locks(ctx, path, org_metadata=None, object_type=pathutil.ObjectType.COLL @api.make() -def api_folder_get_locks(ctx, coll): +def api_folder_get_locks(ctx: rule.Context, coll: str) -> api.Result: """Return a list of locks on a collection.""" locks = [] @@ -697,12 +700,12 @@ def api_folder_get_locks(ctx, coll): return locks -def has_locks(ctx, coll, org_metadata=None): +def has_locks(ctx: rule.Context, coll: str, org_metadata: List[Tuple[str, str]] | None = None) -> bool: """Check whether a lock exists on the given collection, its parents or children.""" return len(get_locks(ctx, coll, org_metadata=org_metadata)) > 0 -def is_locked(ctx, coll, org_metadata=None): +def is_locked(ctx: rule.Context, coll: str, org_metadata: List[Tuple[str, str]] | None = None) -> bool: """Check whether a lock exists on the given collection itself or a parent collection. Locks on subcollections are not counted. @@ -719,22 +722,22 @@ def is_locked(ctx, coll, org_metadata=None): return len([x for x in locks if coll.startswith(x)]) > 0 -def is_data_locked(ctx, path, org_metadata=None): +def is_data_locked(ctx: rule.Context, path: str, org_metadata: List[Tuple[str, str]] | None = None) -> bool: """Check whether a lock exists on the given data object.""" locks = get_locks(ctx, path, org_metadata=org_metadata, object_type=pathutil.ObjectType.DATA) return len(locks) > 0 -def get_status(ctx, path, org_metadata=None): +def get_status(ctx: rule.Context, path: str, org_metadata: List[Tuple[str, str]] | None = None) -> constants.research_package_state: """Get the status of a research folder.""" if org_metadata is None: org_metadata = get_org_metadata(ctx, path) # Don't care about duplicate attr names here. - org_metadata = dict(org_metadata) - if constants.IISTATUSATTRNAME in org_metadata: - x = org_metadata[constants.IISTATUSATTRNAME] + org_metadata_dict = dict(org_metadata) + if constants.IISTATUSATTRNAME in org_metadata_dict: + x = org_metadata_dict[constants.IISTATUSATTRNAME] try: x = "" if x == "FOLDER" else x return constants.research_package_state(x) @@ -744,7 +747,7 @@ def get_status(ctx, path, org_metadata=None): return constants.research_package_state.FOLDER -def datamanager_exists(ctx, coll): +def datamanager_exists(ctx: rule.Context, coll: str) -> bool: """Check if a datamanager exists for a given collection.""" group_name = collection_group_name(ctx, coll) category = group.get_category(ctx, group_name) @@ -752,7 +755,7 @@ def datamanager_exists(ctx, coll): return group.exists(ctx, "datamanager-" + category) -def get_datamanagers(ctx, coll): +def get_datamanagers(ctx: rule.Context, coll: str) -> List[str]: """Retrieve datamanagers for a given collection.""" group_name = collection_group_name(ctx, coll) category = group.get_category(ctx, group_name) @@ -760,13 +763,13 @@ def get_datamanagers(ctx, coll): return group.members(ctx, "datamanager-" + category) -def set_submitter(ctx, path, actor): +def set_submitter(ctx: rule.Context, path: str, actor: str) -> None: """Set submitter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "submitted_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_submitter(ctx, path): +def get_submitter(ctx: rule.Context, path: str) -> str: """Get submitter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "submitted_actor" org_metadata = dict(get_org_metadata(ctx, path)) @@ -774,16 +777,16 @@ def get_submitter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_accepter(ctx, path, actor): +def set_accepter(ctx: rule.Context, path: str, actor: str) -> None: """Set accepter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "accepted_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_accepter(ctx, path): +def get_accepter(ctx: rule.Context, path: str) -> str: """Get accepter of folder for the vault.""" attribute = constants.UUORGMETADATAPREFIX + "accepted_actor" org_metadata = dict(get_org_metadata(ctx, path)) @@ -791,16 +794,16 @@ def get_accepter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_vault_data_package(ctx, path, vault): +def set_vault_data_package(ctx: rule.Context, path: str, vault: str) -> None: """Set vault data package for deposit.""" attribute = constants.UUORGMETADATAPREFIX + "vault_data_package" avu.set_on_coll(ctx, path, attribute, vault) -def get_vault_data_package(ctx, path): +def get_vault_data_package(ctx: rule.Context, path: str) -> str: """Get vault data package for deposit.""" attribute = constants.UUORGMETADATAPREFIX + "vault_data_package" org_metadata = dict(get_org_metadata(ctx, path)) @@ -808,4 +811,4 @@ def get_vault_data_package(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" diff --git a/groups.py b/groups.py index ac995813a..7009dd074 100644 --- a/groups.py +++ b/groups.py @@ -6,6 +6,7 @@ import time from collections import OrderedDict from datetime import datetime +from typing import Any, Dict, Iterable, List, Tuple import genquery import requests @@ -40,7 +41,7 @@ 'rule_group_sram_sync'] -def getGroupsData(ctx): +def getGroupsData(ctx: rule.Context) -> Iterable[Any]: """Return groups and related data.""" groups = {} @@ -117,7 +118,7 @@ def getGroupsData(ctx): return groups.values() -def getGroupData(ctx, name): +def getGroupData(ctx: rule.Context, name: str) -> Dict | None: """Get data for one group.""" group = None @@ -192,7 +193,7 @@ def getGroupData(ctx, name): return group -def getCategories(ctx): +def getCategories(ctx: rule.Context) -> List[str]: """Get a list of all group categories.""" categories = [] @@ -208,7 +209,7 @@ def getCategories(ctx): return categories -def getDatamanagerCategories(ctx): +def getDatamanagerCategories(ctx: rule.Context) -> List: """Get a list of all datamanager group categories.""" categories = [] @@ -229,7 +230,7 @@ def getDatamanagerCategories(ctx): return categories -def getSubcategories(ctx, category): +def getSubcategories(ctx: rule.Context, category: str) -> List: """Get a list of all subcategories within a given group category. :param ctx: Combined type of a ctx and rei struct @@ -272,7 +273,7 @@ def getSubcategories(ctx, category): return list(categories) -def user_role(ctx, username, group_name): +def user_role(ctx: rule.Context, username: str, group_name: str) -> str: """Get role of user in group. :param ctx: Combined type of a ctx and rei struct @@ -300,7 +301,7 @@ def user_role(ctx, username, group_name): api_group_get_user_role = api.make()(user_role) -def user_is_datamanager(ctx, category, user): +def user_is_datamanager(ctx: rule.Context, category: str, user: str) -> bool: """Return if user is datamanager of category. :param ctx: Combined type of a ctx and rei struct @@ -313,7 +314,7 @@ def user_is_datamanager(ctx, category, user): in ('normal', 'manager') -def group_category(ctx, group): +def group_category(ctx: rule.Context, group: str) -> str: """Return category of group. :param ctx: Combined type of a ctx and rei struct @@ -327,7 +328,7 @@ def group_category(ctx, group): @api.make() -def api_group_data(ctx): +def api_group_data(ctx: rule.Context) -> Dict: """Retrieve group data as hierarchy for user. The structure of the group hierarchy parameter is as follows: @@ -355,7 +356,7 @@ def api_group_data(ctx): return (internal_api_group_data(ctx)) -def internal_api_group_data(ctx): +def internal_api_group_data(ctx: rule.Context) -> Dict: # This is the entry point for integration tests against api_group_data if user.is_admin(ctx): groups = getGroupsData(ctx) @@ -460,7 +461,7 @@ def internal_api_group_data(ctx): return {'group_hierarchy': subcat_ordered_group_hierarchy, 'user_type': user.user_type(ctx), 'user_zone': user.zone(ctx)} -def user_is_a_datamanager(ctx): +def user_is_a_datamanager(ctx: rule.Context) -> bool: """Return groups whether current user is datamanager of a group, not specifically of a specific group. :param ctx: Combined type of a ctx and rei struct @@ -485,7 +486,7 @@ def user_is_a_datamanager(ctx): @api.make() -def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): +def api_group_process_csv(ctx: rule.Context, csv_header_and_data: str, allow_update: bool, delete_users: bool) -> api.Result: """Process contents of CSV file containing group definitions. Parsing is stopped immediately when an error is found and the rownumber is returned to the user. @@ -520,7 +521,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): return api.Result.ok(info=[status_msg['message']]) -def validate_data(ctx, data, allow_update): +def validate_data(ctx: rule.Context, data: Dict, allow_update: bool) -> List: """Validation of extracted data. :param ctx: Combined type of a ctx and rei struct @@ -551,7 +552,7 @@ def validate_data(ctx, data, allow_update): return errors -def apply_data(ctx, data, allow_update, delete_users): +def apply_data(ctx: rule.Context, data: Dict, allow_update: bool, delete_users: bool) -> Dict: """ Update groups with the validated data :param ctx: Combined type of a ctx and rei struct @@ -581,7 +582,7 @@ def apply_data(ctx, data, allow_update, delete_users): log.write(ctx, 'CSV import - WARNING: group "{}" not created, it already exists'.format(group_name)) message += "Group '{}' already exists.".format(group_name) else: - return {status: 'error', message: "Error while attempting to create group {}. Status/message: {} / {}".format(group_name, response.status, response.status_info)} + return {"status": "error", "message": "Error while attempting to create group {}. Status/message: {} / {}".format(group_name, response.status, response.status_info)} # Now add the users and set their role if other than member allusers = managers + members + viewers @@ -675,7 +676,7 @@ def apply_data(ctx, data, allow_update, delete_users): return {"status": "ok", "message": message} -def _are_roles_equivalent(a, b): +def _are_roles_equivalent(a: str, b: str) -> bool: """Checks whether two roles are equivalent, Yoda and Yoda-clienttools use slightly different names.""" r_role_names = ["viewer", "reader"] m_role_names = ["member", "normal"] @@ -690,7 +691,7 @@ def _are_roles_equivalent(a, b): return False -def group_user_exists(ctx, group_name, username, include_readonly): +def group_user_exists(ctx: rule.Context, group_name: str, username: str, include_readonly: bool) -> bool: group = getGroupData(ctx, group_name) if '#' not in username: username = username + "#" + session_vars.get_map(ctx.rei)["client_user"]["irods_zone"] @@ -705,7 +706,7 @@ def group_user_exists(ctx, group_name, username, include_readonly): @rule.make(inputs=[0], outputs=[1]) -def rule_user_exists(ctx, username): +def rule_user_exists(ctx: rule.Context, username: str) -> str: """Rule wrapper to check if a user exists. :param ctx: Combined type of a ctx and rei struct @@ -716,7 +717,8 @@ def rule_user_exists(ctx, username): return "true" if user.exists(ctx, username) else "false" -def rule_group_user_exists(rule_args, callback, rei): +@rule.make(inputs=[0, 1, 2], outputs=[3]) +def rule_group_user_exists(ctx: rule.Context, group_name: str, user_name: str, include_readonly: bool) -> str: """Check if a user is a member of the given group. rule_group_user_exists(group, user, includeRo, membership) @@ -724,25 +726,25 @@ def rule_group_user_exists(rule_args, callback, rei): considered as well. Otherwise, the user must be a normal member or manager of the given group. - :param rule_args: [0] Group to check for user membership - [1] User to check for membership - [2] Include read-only shadow group users - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a ctx and rei struct + :param group_name: Group to check for user membership + :param user_name: User to check for membership + :param include_readonly: Include read-only shadow group users + + :returns: Indicator if user is a member of the given group. """ - ctx = rule.Context(callback, rei) - exists = group_user_exists(ctx, rule_args[0], rule_args[1], rule_args[2]) - rule_args[3] = "true" if exists else "false" + exists = group_user_exists(ctx, group_name, user_name, include_readonly) + return "true" if exists else "false" @api.make() -def api_group_categories(ctx): +def api_group_categories(ctx: rule.Context) -> api.Result: """Retrieve category list.""" return getCategories(ctx) @api.make() -def api_group_subcategories(ctx, category): +def api_group_subcategories(ctx: rule.Context, category: str) -> api.Result: """Retrieve subcategory list. :param ctx: Combined type of a ctx and rei struct @@ -753,7 +755,7 @@ def api_group_subcategories(ctx, category): return getSubcategories(ctx, category) -def provisionExternalUser(ctx, username, creatorUser, creatorZone): +def provisionExternalUser(ctx: rule.Context, username: str, creatorUser: str, creatorZone: str) -> int: """Call External User Service API to add new user. :param ctx: Combined type of a ctx and rei struct @@ -824,7 +826,7 @@ def rule_group_provision_external_user(rule_args, ctx, rei): rule_args[4] = message -def removeExternalUser(ctx, username, userzone): +def removeExternalUser(ctx: rule.Context, username: str, userzone: str) -> str: """Call External User Service API to remove user. :param ctx: Combined type of a ctx and rei struct @@ -854,7 +856,7 @@ def removeExternalUser(ctx, username, userzone): @rule.make(inputs=[0, 1], outputs=[]) -def rule_group_remove_external_user(ctx, username, userzone): +def rule_group_remove_external_user(ctx: rule.Context, username: str, userzone: str) -> str: """Remove external user from EUS :param ctx: Combined type of a ctx and rei struct @@ -878,7 +880,7 @@ def rule_group_remove_external_user(ctx, username, userzone): @rule.make(inputs=[0], outputs=[1]) -def rule_group_check_external_user(ctx, username): +def rule_group_check_external_user(ctx: rule.Context, username: str) -> str: """Check that a user is external. :param ctx: Combined type of a ctx and rei struct @@ -896,7 +898,7 @@ def rule_group_check_external_user(ctx, username): @rule.make(inputs=[0], outputs=[1]) -def rule_group_expiration_date_validate(ctx, expiration_date): +def rule_group_expiration_date_validate(ctx: rule.Context, expiration_date: str) -> str: """Validation of expiration date. :param ctx: Combined type of a callback and rei struct @@ -920,7 +922,7 @@ def rule_group_expiration_date_validate(ctx, expiration_date): @api.make() -def api_group_search_users(ctx, pattern): +def api_group_search_users(ctx: rule.Context, pattern: str) -> api.Result: (username, zone_name) = user.from_str(ctx, pattern) userList = list() @@ -942,7 +944,7 @@ def api_group_search_users(ctx, pattern): @api.make() -def api_group_exists(ctx, group_name): +def api_group_exists(ctx: rule.Context, group_name: str) -> api.Result: """Check if group exists. :param ctx: Combined type of a ctx and rei struct @@ -953,7 +955,14 @@ def api_group_exists(ctx, group_name): return group.exists(ctx, group_name) -def group_create(ctx, group_name, category, subcategory, schema_id, expiration_date, description, data_classification): +def group_create(ctx: rule.Context, + group_name: str, + category: str, + subcategory: str, + schema_id: str, + expiration_date: str, + description: str, + data_classification: str) -> api.Result: """Create a new group. :param ctx: Combined type of a ctx and rei struct @@ -965,7 +974,7 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d :param description: Description of the group to create :param data_classification: Data classification of the group to create - :returns: Dict with API status result + :returns: API status result """ try: co_identifier = '' @@ -1005,7 +1014,7 @@ def group_create(ctx, group_name, category, subcategory, schema_id, expiration_d @api.make() -def api_group_update(ctx, group_name, property_name, property_value): +def api_group_update(ctx: rule.Context, group_name: str, property_name: str, property_value: str) -> api.Result: """Update group property. :param ctx: Combined type of a ctx and rei struct @@ -1013,7 +1022,7 @@ def api_group_update(ctx, group_name, property_name, property_value): :param property_name: Name of the property to update :param property_value: Value of the property to update - :returns: Dict with API status result + :returns: API status result """ try: response = ctx.uuGroupModify(group_name, property_name, property_value, '', '')['arguments'] @@ -1028,13 +1037,13 @@ def api_group_update(ctx, group_name, property_name, property_value): @api.make() -def api_group_delete(ctx, group_name): +def api_group_delete(ctx: rule.Context, group_name: str) -> api.Result: """Delete a group. :param ctx: Combined type of a ctx and rei struct :param group_name: Name of the group to delete - :returns: Dict with API status result + :returns: API status result """ try: # Delete SRAM collaboration if group is a SRAM group. @@ -1057,7 +1066,7 @@ def api_group_delete(ctx, group_name): @api.make() -def api_group_get_description(ctx, group_name): +def api_group_get_description(ctx: rule.Context, group_name: str) -> api.Result: """Retrieve description of a group. :param ctx: Combined type of a ctx and rei struct @@ -1072,7 +1081,7 @@ def api_group_get_description(ctx, group_name): @api.make() -def api_group_user_is_member(ctx, username, group_name): +def api_group_user_is_member(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Check if user is member of a group. :param ctx: Combined type of a ctx and rei struct @@ -1084,7 +1093,7 @@ def api_group_user_is_member(ctx, username, group_name): return group_user_exists(ctx, group_name, username, True) -def group_user_add(ctx, username, group_name): +def group_user_add(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Add a user to a group. :param ctx: Combined type of a ctx and rei struct @@ -1127,7 +1136,7 @@ def group_user_add(ctx, username, group_name): api_group_user_add = api.make()(group_user_add) -def group_user_update_role(ctx, username, group_name, new_role): +def group_user_update_role(ctx: rule.Context, username: str, group_name: str, new_role: str) -> api.Result: """Update role of a user in a group. :param ctx: Combined type of a ctx and rei struct @@ -1135,7 +1144,7 @@ def group_user_update_role(ctx, username, group_name, new_role): :param group_name: Name of the group :param new_role: New role of the user - :returns: Dict with API status result + :returns: API status result """ try: if config.enable_sram: @@ -1164,14 +1173,14 @@ def group_user_update_role(ctx, username, group_name, new_role): api_group_user_update_role = api.make()(group_user_update_role) -def group_remove_user_from_group(ctx, username, group_name): +def group_remove_user_from_group(ctx: rule.Context, username: str, group_name: str) -> api.Result: """Remove a user from a group. :param ctx: Combined type of a ctx and rei struct :param username: Name of the user :param group_name: Name of the group - :returns: Dict with API status result + :returns: API status result """ try: if config.enable_sram: @@ -1200,7 +1209,7 @@ def group_remove_user_from_group(ctx, username, group_name): api_group_remove_user_from_group = api.make()(group_remove_user_from_group) -def sram_enabled(ctx, group_name): +def sram_enabled(ctx: rule.Context, group_name: str) -> Tuple[bool, str]: """Checks if the group is SRAM enabled :param ctx: Combined type of a ctx and rei struct @@ -1226,7 +1235,7 @@ def sram_enabled(ctx, group_name): @rule.make() -def rule_group_sram_sync(ctx): +def rule_group_sram_sync(ctx: rule.Context) -> None: """Synchronize groups with SRAM. :param ctx: Combined type of a ctx and rei struct diff --git a/groups_import.py b/groups_import.py index 1933c95fd..a95cf846a 100644 --- a/groups_import.py +++ b/groups_import.py @@ -3,18 +3,20 @@ __copyright__ = 'Copyright (c) 2018-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, List, Set, Tuple + from iteration_utilities import duplicates, unique_everseen from util import * -def process_csv_line(ctx, line): +def process_csv_line(ctx: 'rule.Context', line: Dict) -> Tuple: """Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers, and optionally schema id and expiration date. - :param ctx: Combined type of a ctx and rei struct - :param line: Dictionary of labels and corresponding lists of values + :param ctx: Combined type of a ctx and rei struct + :param line: Dictionary of labels and corresponding lists of values :returns: Tuple of processed row data (None if error), and error message """ @@ -78,29 +80,29 @@ def process_csv_line(ctx, line): return row_data, None -def column_name_is_role_label(column_name): +def column_name_is_role_label(column_name: str) -> bool: return (column_name.lower() in get_role_labels() or column_name.lower().startswith(tuple(map(lambda s: s + ":", get_role_labels())))) -def get_role_labels(): +def get_role_labels() -> List[str]: return ['viewer', 'member', 'manager'] -def get_csv_possible_labels(): +def get_csv_possible_labels() -> List[str]: return ['category', 'subcategory', 'groupname', 'viewer', 'member', 'manager', 'schema_id', 'expiration_date'] -def get_csv_required_labels(): +def get_csv_required_labels() -> List[str]: return ['category', 'subcategory', 'groupname'] -def get_csv_predefined_labels(): +def get_csv_predefined_labels() -> List[str]: """These labels should not repeat""" return ['category', 'subcategory', 'groupname', 'schema_id', 'expiration_date'] -def get_duplicate_columns(fields_list): +def get_duplicate_columns(fields_list: List) -> Set: fields_seen = set() duplicate_fields = set() @@ -152,12 +154,12 @@ def parse_csv_file(ctx): return extracted_data -def get_duplicate_groups(row_data): +def get_duplicate_groups(row_data: List) -> List: group_names = list(map(lambda r: r[2], row_data)) return list(unique_everseen(duplicates(group_names))) -def parse_data(ctx, csv_header_and_data): +def parse_data(ctx: 'rule.Context', csv_header_and_data: str) -> Tuple: """Process contents of csv data consisting of header and rows of data. :param ctx: Combined type of a ctx and rei struct diff --git a/json_datacite.py b/json_datacite.py index 2ee72f0dc..5f5efc6ca 100644 --- a/json_datacite.py +++ b/json_datacite.py @@ -3,53 +3,21 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, List + from dateutil import parser from util import * -__all__ = ['rule_json_datacite_create_combi_metadata_json', - 'rule_json_datacite_create_datacite_json'] - - -@rule.make() -def rule_json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri): - """Frontend function to add system info to yoda-metadata in json format. - :param ctx: Combined type of a callback and rei struct - :param metadataJsonPath: Path to the most recent vault yoda-metadata.json in the corresponding vault - :param combiJsonPath: Path to where the combined info will be placed so it can be used for DataciteXml & landingpage generation - other are system info parameters - :param lastModifiedDateTime: Last modification time of publication - :param yodaDOI: DOI of publication - :param publicationDate: Date of publication - :param openAccessLink: Open access link to data of publication - :param licenseUri: URI to license of publication - """ - json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri) - - -def json_datacite_create_combi_metadata_json(ctx, - metadataJsonPath, - combiJsonPath, - lastModifiedDateTime, - yodaDOI, - publicationDate, - openAccessLink, - licenseUri): +def json_datacite_create_combi_metadata_json(ctx: rule.Context, + metadataJsonPath: str, + combiJsonPath: str, + lastModifiedDateTime: str, + yodaDOI: str, + publicationDate: str, + openAccessLink: str, + licenseUri: str) -> None: """Frontend function to add system info to yoda-metadata in json format. :param ctx: Combined type of a callback and rei struct @@ -81,12 +49,7 @@ def json_datacite_create_combi_metadata_json(ctx, jsonutil.write(ctx, combiJsonPath, metaDict) -@rule.make(inputs=[0], outputs=[1]) -def rule_json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): - return json_datacite_create_datacite_json(ctx, landing_page_url, combi_path) - - -def json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): +def json_datacite_create_datacite_json(ctx: rule.Context, landing_page_url: str, combi_path: str) -> Dict: """Based on content of combi json, get Datacite metadata as a dict. :param ctx: Combined type of a callback and rei struct @@ -134,32 +97,32 @@ def json_datacite_create_datacite_json(ctx, landing_page_url, combi_path): return metadata -def get_DOI(combi): +def get_DOI(combi: Dict) -> str: return combi['System']['Persistent_Identifier_Datapackage']['Identifier'] -def get_identifiers(combi): +def get_identifiers(combi: Dict) -> List: return [{'identifier': combi['System']['Persistent_Identifier_Datapackage']['Identifier'], 'identifierType': 'DOI'}] -def get_titles(combi): +def get_titles(combi: Dict) -> List: return [{'title': combi['Title'], 'language': 'en-us'}] -def get_descriptions(combi): +def get_descriptions(combi: Dict) -> List: return [{'description': combi['Description'], 'descriptionType': 'Abstract'}] -def get_publisher(combi): +def get_publisher(combi: Dict) -> str: return config.datacite_publisher -def get_publication_year(combi): +def get_publication_year(combi: Dict) -> str: return combi['System']['Publication_Date'][0:4] -def get_subjects(combi): +def get_subjects(combi: Dict) -> List: """Get list in DataCite format containing: 1) standard objects like tags/disciplne @@ -204,7 +167,7 @@ def get_subjects(combi): return subjects -def get_funders(combi): +def get_funders(combi: Dict) -> List: funders = [] try: for funder in combi.get('Funding_Reference', []): @@ -216,7 +179,7 @@ def get_funders(combi): return funders -def get_creators(combi): +def get_creators(combi: Dict) -> List: """Return creator information in DataCite format. :param combi: Combined JSON file that holds both user and system metadata @@ -253,7 +216,7 @@ def get_creators(combi): return all_creators -def get_contributors(combi): +def get_contributors(combi: Dict) -> List: """Get string in DataCite format containing contributors, including contact persons if these were added explicitly (GEO). @@ -327,7 +290,7 @@ def get_contributors(combi): return all -def get_dates(combi): +def get_dates(combi: Dict) -> List: """Return list of dates in DataCite format.""" # Format last modified date for DataCite: https://support.datacite.org/docs/schema-optional-properties-v41#8-date @@ -339,11 +302,11 @@ def get_dates(combi): dates = [{'date': last_modified_date, 'dateType': 'Updated'}] - embargo_end_date = combi.get('Embargo_End_Date', None) + embargo_end_date = combi.get('Embargo_End_Date') if embargo_end_date is not None: dates.append({'date': embargo_end_date, 'dateType': 'Available'}) - collected = combi.get('Collected', None) + collected = combi.get('Collected') if collected is not None: try: x = collected.get('Start_Date') @@ -356,12 +319,12 @@ def get_dates(combi): return dates -def get_version(combi): +def get_version(combi: Dict) -> str: """Get string in DataCite format containing version info.""" return combi.get('Version', '') -def get_rights_list(combi): +def get_rights_list(combi: Dict) -> List: """Get list in DataCite format containing rights related information.""" options = {'Open': 'info:eu-repo/semantics/openAccess', 'Restricted': 'info:eu-repo/semantics/restrictedAccess', @@ -374,12 +337,12 @@ def get_rights_list(combi): return rights_list -def get_language(combi): +def get_language(combi: Dict) -> str: """Get string in DataCite format containing language.""" return 'en-us' -def get_resource_type(combi): +def get_resource_type(combi: Dict) -> Dict: """Get dict in DataCite format containing Resource type and default handling.""" """ "types": { @@ -409,7 +372,7 @@ def get_resource_type(combi): return {"resourceTypeGeneral": type, "resourceType": descr} -def get_related_resources(combi): +def get_related_resources(combi: Dict) -> List: """Get list in DataCite format containing related datapackages.""" """ "relatedIdentifiers": [ @@ -444,7 +407,7 @@ def get_related_resources(combi): return related_dps -def get_geo_locations(combi): +def get_geo_locations(combi: Dict) -> List: """Get list of geoLocation elements in datacite format containing the information of geo locations. There are two versions of this: @@ -491,6 +454,6 @@ def get_geo_locations(combi): if location: geoLocations.append({'geoLocationPlace': location}) except KeyError: - return + return [] return geoLocations diff --git a/json_landing_page.py b/json_landing_page.py index 38ff78fee..d12cedfca 100644 --- a/json_landing_page.py +++ b/json_landing_page.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' from datetime import datetime +from typing import Dict import jinja2 from dateutil import parser @@ -11,7 +12,7 @@ from util import * -def persistent_identifier_to_uri(identifier_scheme, identifier): +def persistent_identifier_to_uri(identifier_scheme: str, identifier: str) -> str: """Transform a persistent identifier to URI. Supported identifier schemes are Handle, DOI, ORCID and URL. @@ -41,7 +42,13 @@ def persistent_identifier_to_uri(identifier_scheme, identifier): return uri -def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_json_path, json_schema, base_doi, versions): +def json_landing_page_create_json_landing_page(ctx: rule.Context, + zone: str, + template_name: str, + combi_json_path: str, + json_schema: Dict, + base_doi: str, + versions: Dict) -> str: """Get the landing page of published YoDa metadata as a string. :param ctx: Combined type of a ctx and rei struct @@ -187,12 +194,12 @@ def json_landing_page_create_json_landing_page(ctx, zone, template_name, combi_j # Format last modified and publication date. # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # last_modified_date = date.fromisoformat(json_data['System']['Last_Modified_Date']) - last_modified_date = parser.parse(json_data["System"]["Last_Modified_Date"]) - last_modified_date = last_modified_date.strftime("%Y-%m-%d %H:%M:%S%z") + last_modified_date_time = parser.parse(json_data["System"]["Last_Modified_Date"]) + last_modified_date = last_modified_date_time.strftime("%Y-%m-%d %H:%M:%S%z") # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # publication_date = date.fromisoformat(json_data['System']['Publication_Date']) - publication_date = parser.parse(json_data["System"]["Publication_Date"]) - publication_date = publication_date.strftime("%Y-%m-%d %H:%M:%S%z") + publication_date_time = parser.parse(json_data["System"]["Publication_Date"]) + publication_date = publication_date_time.strftime("%Y-%m-%d %H:%M:%S%z") tm = Template(template) # Add custom function to transform a persistent identifier to URI. diff --git a/mail.py b/mail.py index 98e9025f9..07b0c04fc 100644 --- a/mail.py +++ b/mail.py @@ -7,13 +7,14 @@ import re import smtplib from email.mime.text import MIMEText +from typing import Tuple from util import * __all__ = ['rule_mail_test'] -def send(ctx, to, actor, subject, body, cc=None): +def send(ctx: rule.Context, to: str, actor: str, subject: str, body: str, cc: str | None = None) -> api.Result: """Send an e-mail with specified recipient, subject and body. The originating address and mail server credentials are taken from the @@ -26,6 +27,8 @@ def send(ctx, to, actor, subject, body, cc=None): :param body: Body of mail :param cc: Comma-separated list of CC recipient(s) of email (optional) + :raises: When smtp is not configer correctly + :returns: API status """ if not config.notifications_enabled: @@ -51,7 +54,11 @@ def send(ctx, to, actor, subject, body, cc=None): try: # e.g. 'smtps://smtp.gmail.com:465' for SMTP over TLS, or # 'smtp://smtp.gmail.com:587' for STARTTLS on the mail submission port. - proto, host, port = re.search(r'^(smtps?)://([^:]+)(?::(\d+))?$', cfg['server']).groups() + smtp_config = re.search(r'^(smtps?)://([^:]+)(?::(\d+))?$', cfg['server'])\ + + if smtp_config is None: + raise Exception + proto, host, port = smtp_config.groups() # Default to port 465 for SMTP over TLS, and 587 for standard mail # submission with STARTTLS. @@ -107,7 +114,7 @@ def send(ctx, to, actor, subject, body, cc=None): pass -def wrapper(ctx, to, actor, subject, body): +def wrapper(ctx: rule.Context, to: str, actor: str, subject: str, body: str) -> Tuple[str, str]: """Send mail, returns status/statusinfo in rule-language style.""" x = send(ctx, to, actor, subject, body) @@ -117,7 +124,7 @@ def wrapper(ctx, to, actor, subject, body): @rule.make(inputs=[0], outputs=[1, 2]) -def rule_mail_test(ctx, to): +def rule_mail_test(ctx: rule.Context, to: str) -> Tuple[str, str]: if not user.is_admin(ctx): return api.Error('not_allowed', 'Only rodsadmin can send test mail') diff --git a/meta.py b/meta.py index 52b51ff93..0be0620b6 100644 --- a/meta.py +++ b/meta.py @@ -7,6 +7,7 @@ import re from collections import OrderedDict from datetime import datetime +from typing import Dict, List import genquery import irods_types @@ -28,7 +29,7 @@ 'rule_get_latest_vault_metadata_path'] -def metadata_get_links(metadata): +def metadata_get_links(metadata: Dict) -> List: if 'links' not in metadata or type(metadata['links']) is not list: return [] return list(filter(lambda x: type(x) in (dict, OrderedDict) @@ -39,13 +40,14 @@ def metadata_get_links(metadata): metadata['links'])) -def metadata_get_schema_id(metadata): +def metadata_get_schema_id(metadata: Dict) -> str | None: desc = list(filter(lambda x: x['rel'] == 'describedby', metadata_get_links(metadata))) if len(desc) > 0: return desc[0]['href'] + return None -def metadata_set_schema_id(metadata, schema_id): +def metadata_set_schema_id(metadata: Dict, schema_id: str) -> None: other_links = list(filter(lambda x: x['rel'] != 'describedby', metadata_get_links(metadata))) metadata['links'] = [OrderedDict([ @@ -54,11 +56,11 @@ def metadata_set_schema_id(metadata, schema_id): ])] + other_links -def get_json_metadata_errors(callback, - metadata_path, - metadata=None, - schema=None, - ignore_required=False): +def get_json_metadata_errors(ctx: rule.Context, + metadata_path: str, + metadata: Dict | None = None, + schema: Dict | None = None, + ignore_required: bool = False) -> List: """ Validate JSON metadata, and return a list of errors, if any. @@ -72,7 +74,7 @@ def get_json_metadata_errors(callback, This will throw exceptions on missing metadata / schema files and invalid JSON formats. - :param callback: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param metadata_path: Path to the JSON object :param metadata: Pre-parsed JSON object :param schema: Schema to check against @@ -81,10 +83,10 @@ def get_json_metadata_errors(callback, :returns: List of errors in JSON object """ if schema is None: - schema = schema_.get_active_schema(callback, metadata_path) + schema = schema_.get_active_schema(ctx, metadata_path) if metadata is None: - metadata = jsonutil.read(callback, metadata_path) + metadata = jsonutil.read(ctx, metadata_path) # Perform validation and filter errors. # Validation is handed to a Python 3 interpreter to validate with the Draft201909 validator. @@ -144,22 +146,21 @@ def transform_error(e): # Log metadata errors. for error in errors: - log.write(callback, error) + log.write(ctx, error) return errors -def is_json_metadata_valid(callback, - metadata_path, - metadata=None, - ignore_required=False): - """ - Check if json metadata contains no errors. +def is_json_metadata_valid(ctx: rule.Context, + metadata_path: str, + metadata: Dict | None = None, + ignore_required: bool = False) -> bool: + """Check if json metadata contains no errors. Argument 'metadata' may contain a preparsed JSON document, otherwise it is loaded from the provided path. - :param callback: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param metadata_path: Path to the JSON object :param metadata: Pre-parsed JSON object :param ignore_required: Ignore required fields @@ -167,7 +168,7 @@ def is_json_metadata_valid(callback, :returns: Boolean indicating if JSON metadata is valid """ try: - return len(get_json_metadata_errors(callback, + return len(get_json_metadata_errors(ctx, metadata_path, metadata=metadata, ignore_required=ignore_required)) == 0 @@ -176,14 +177,13 @@ def is_json_metadata_valid(callback, return False -def get_collection_metadata_path(ctx, coll): - """ - Check if a collection has a JSON metadata file and provide its path, if any. +def get_collection_metadata_path(ctx: rule.Context, coll: str) -> str | None: + """Check if a collection has a JSON metadata file and provide its path, if any. :param ctx: Combined type of a callback and rei struct :param coll: Path of collection to check for metadata - :returns: String with path to metadata file + :returns: Path to metadata file """ path = '{}/{}'.format(coll, constants.IIJSONMETADATA) if data_object.exists(ctx, path): @@ -192,14 +192,13 @@ def get_collection_metadata_path(ctx, coll): return None -def get_latest_vault_metadata_path(ctx, vault_pkg_coll): - """ - Get the latest vault metadata JSON file. +def get_latest_vault_metadata_path(ctx: rule.Context, vault_pkg_coll: str) -> str | None: + """Get the latest vault metadata JSON file. :param ctx: Combined type of a callback and rei struct :param vault_pkg_coll: Vault package collection - :returns: string -- Metadata JSON path + :returns: Metadata JSON path """ name = None @@ -239,28 +238,27 @@ def rule_meta_validate(rule_args, callback, rei): rule_args[2] = 'metadata validated' -def collection_has_cloneable_metadata(callback, coll): - """ - Check if a collection has metadata, and validate it. +def collection_has_cloneable_metadata(ctx: rule.Context, coll: str) -> str | None: + """Check if a collection has metadata, and validate it. This always ignores 'required' schema attributes, since metadata can only be cloned in the research area. - :param callback: Combined type of a callback and rei struct - :param coll: Path of collection to check for cloneable metadata + :param ctx: Combined type of a callback and rei struct + :param coll: Path of collection to check for cloneable metadata - :returns: String with the parent metadata_path on success, or False otherwise. + :returns: String with the parent metadata_path on success or None otherwise. """ - path = get_collection_metadata_path(callback, coll) + path = get_collection_metadata_path(ctx, coll) if path is None: - return False + return None if path.endswith('.json'): - if is_json_metadata_valid(callback, path, ignore_required=True): + if is_json_metadata_valid(ctx, path, ignore_required=True): return path - return False + return None rule_meta_collection_has_cloneable_metadata = ( @@ -270,7 +268,7 @@ def collection_has_cloneable_metadata(callback, coll): @api.make() -def api_meta_remove(ctx, coll): +def api_meta_remove(ctx: rule.Context, coll: str) -> None: """Remove a collection's metadata JSON, if it exists.""" log.write(ctx, 'Remove metadata of coll {}'.format(coll)) @@ -283,34 +281,31 @@ def api_meta_remove(ctx, coll): @api.make() -def api_meta_clone_file(ctx, target_coll): +def api_meta_clone_file(ctx: rule.Context, target_coll: str) -> api.Result: """Clone a metadata file from a parent collection to a subcollection. :param ctx: Combined type of a callback and rei struct :param target_coll: Target collection (where the metadata is copied to) - :returns: None - - :raises Error: The metadata file could not be copied + :returns: API result """ source_coll = pathutil.chop(target_coll)[0] # = parent collection source_data = get_collection_metadata_path(ctx, source_coll) - if source_data.endswith('.json'): + if source_data and source_data.endswith('.json'): target_data = '{}/{}'.format(target_coll, constants.IIJSONMETADATA) else: - # No metadata to clone? Abort. - return + return api.Error('no_metadata', 'No metadata file exists to clone') try: data_object.copy(ctx, source_data, target_data) except msi.Error as e: - raise api.Error('copy_failed', 'The metadata file could not be copied', str(e)) + return api.Error('copy_failed', 'The metadata file could not be copied', str(e)) # Functions that deal with ingesting metadata into AVUs {{{ -def ingest_metadata_research(ctx, path): +def ingest_metadata_research(ctx: rule.Context, path: str) -> None: """Validate JSON metadata (without requiredness) and ingests as AVUs in the research space.""" coll, data = pathutil.chop(path) @@ -334,7 +329,7 @@ def ingest_metadata_research(ctx, path): jsonutil.dump(metadata)) -def ingest_metadata_deposit(ctx, path): +def ingest_metadata_deposit(ctx: rule.Context, path: str) -> None: """Validate JSON metadata (without requiredness) and ingests as AVUs in the deposit space.""" coll, data = pathutil.chop(path) @@ -355,7 +350,7 @@ def ingest_metadata_deposit(ctx, path): avu.associate_to_coll(ctx, coll, 'Data_Access_Restriction', metadata['Data_Access_Restriction']) -def ingest_metadata_staging(ctx, path): +def ingest_metadata_staging(ctx: rule.Context, path: str) -> None: """Set cronjob metadata flag and triggers vault ingest.""" ret = msi.string_2_key_val_pair(ctx, '{}{}{}'.format(constants.UUORGMETADATAPREFIX, @@ -373,7 +368,7 @@ def ingest_metadata_staging(ctx, path): ctx.iiAdminVaultIngest() -def update_index_metadata(ctx, path, metadata, creation_time, data_package): +def update_index_metadata(ctx: rule.Context, path: str, metadata: Dict, creation_time: str, data_package: str) -> None: """Update the index attributes for JSON metadata.""" msi.coll_create(ctx, path, "", irods_types.BytesBuf()) ctx.msi_rmw_avu('-C', path, '%', '%', constants.UUFLATINDEX) @@ -511,7 +506,7 @@ def update_index_metadata(ctx, path, metadata, creation_time, data_package): log.write(ctx, 'update_index_metadata: Metadata index update unsuccessful on path {}'.format(path)) -def ingest_metadata_vault(ctx, path): +def ingest_metadata_vault(ctx: rule.Context, path: str) -> None: """Ingest (pre-validated) JSON metadata in the vault.""" # The JSON metadata file has just landed in the vault, required validation / # logging / provenance has already taken place. @@ -560,7 +555,7 @@ def ingest_metadata_vault(ctx, path): @rule.make() -def rule_meta_modified_post(ctx, path, user, zone): +def rule_meta_modified_post(ctx: rule.Context, path: str, user: str, zone: str) -> None: if re.match('^/{}/home/datamanager-[^/]+/vault-[^/]+/.*'.format(zone), path): ingest_metadata_staging(ctx, path) elif re.match('^/{}/home/vault-[^/]+/.*'.format(zone), path): @@ -744,9 +739,8 @@ def set_result(msg_short, msg_long): set_result('Success', '') -def copy_user_metadata(ctx, source, target): - """ - Copy the user metadata (AVUs) of a collection to another collection. +def copy_user_metadata(ctx: rule.Context, source: str, target: str) -> None: + """Copy the user metadata (AVUs) of a collection to another collection. This only copies user metadata, so it ignores system metadata. @@ -759,7 +753,7 @@ def copy_user_metadata(ctx, source, target): user_metadata = list(avu.inside_coll(ctx, source, recursive=True)) # Group AVUs by entity and filter system metadata. - grouped_user_metadata = {} + grouped_user_metadata: Dict = {} for path, type, attribute, value, unit in user_metadata: if not attribute.startswith(constants.UUORGMETADATAPREFIX) and unit != constants.UUFLATINDEX and not unit.startswith(constants.UUUSERMETADATAROOT + '_'): grouped_user_metadata.setdefault(path, {"type": type, "avus": []}) @@ -792,7 +786,7 @@ def copy_user_metadata(ctx, source, target): log.write(ctx, "copy_user_metadata: failed to copy user metadata from <{}> to <{}/original>".format(source, target)) -def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, write_stdout): +def vault_metadata_matches_schema(ctx: rule.Context, coll_name: str, schema_cache: Dict, report_name: str, write_stdout: bool) -> Dict | None: """Process a single data package to retrieve and validate that its metadata conforms to the schema. :param ctx: Combined type of a callback and rei struct @@ -801,7 +795,7 @@ def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, wri :param report_name: Name of report script (for logging) :param write_stdout: A boolean representing whether to write to stdout or rodsLog - :returns: A dictionary result containing if schema matches and the schema short name. + :returns: A dictionary result containing if schema matches and the schema short name. """ metadata_path = get_latest_vault_metadata_path(ctx, coll_name) @@ -818,6 +812,9 @@ def vault_metadata_matches_schema(ctx, coll_name, schema_cache, report_name, wri # Determine schema schema_id = schema_.get_schema_id(ctx, metadata_path) + if schema_id is None: + return None + schema_shortname = schema_id.split("/")[-2] # Retrieve schema and cache it for future use diff --git a/meta_form.py b/meta_form.py index add877018..b4573e9ca 100644 --- a/meta_form.py +++ b/meta_form.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Dict, List, Tuple import irods_types @@ -19,7 +20,7 @@ 'api_meta_form_save'] -def get_coll_lock(ctx, path, org_metadata=None): +def get_coll_lock(ctx: rule.Context, path: str, org_metadata: List | None = None) -> Tuple[str, str]: """Check for existence of locks on a collection. path -> ((no|here|outoftree|ancestor|descendant), rootcoll) @@ -33,7 +34,7 @@ def get_coll_lock(ctx, path, org_metadata=None): if org_metadata is None: org_metadata = folder.get_org_metadata(ctx, path) - ret = ('no', None) + ret = ('no', '') for root in [v for k, v in org_metadata if k == constants.IILOCKATTRNAME]: if root == path: @@ -49,7 +50,7 @@ def get_coll_lock(ctx, path, org_metadata=None): return ret -def get_coll_lock_count(ctx, path, org_metadata=None): +def get_coll_lock_count(ctx: rule.Context, path: str, org_metadata: List | None = None) -> int: """Count locks on a collection. :param ctx: Combined type of a callback and rei struct @@ -69,7 +70,7 @@ def get_coll_lock_count(ctx, path, org_metadata=None): return count -def humanize_validation_error(e): +def humanize_validation_error(e: str) -> str: """Transform a jsonschema validation error such that it is readable by humans. :param e: a jsonschema.exceptions.ValidationError @@ -98,7 +99,7 @@ def humanize_validation_error(e): return 'This field contains an error: ' + ' -> '.join(path_out) -def load(ctx, coll): +def load(ctx: rule.Context, coll: str) -> api.Result: """Retrieve all information required to load a metadata form in either the research or vault space. This produces a JSON struct on stdout. If no transformation is required @@ -278,7 +279,7 @@ def load(ctx, coll): 'is_locked': is_locked} -def save(ctx, coll, metadata): +def save(ctx: rule.Context, coll: str, metadata: Dict) -> api.Result: """Validate and store JSON metadata for a given collection. :param ctx: Combined type of a callback and rei struct diff --git a/notifications.py b/notifications.py index c875e9956..5d0b27db0 100644 --- a/notifications.py +++ b/notifications.py @@ -10,6 +10,7 @@ import time import urllib.parse from datetime import datetime, timedelta +from typing import List, Tuple import genquery from dateutil import relativedelta @@ -34,13 +35,13 @@ NOTIFICATION_KEY = constants.UUORGMETADATAPREFIX + "notification" -def generate_random_id(ctx): +def generate_random_id(ctx: rule.Context) -> str: """Generate random ID for notification.""" characters = string.ascii_lowercase + string.digits return ''.join(random.choice(characters) for x in range(10)) -def set(ctx, actor, receiver, target, message): +def set(ctx: rule.Context, actor: str, receiver: str, target: str, message: str) -> None: """Set user notification and send mail notification when configured. :param ctx: Combined type of a callback and rei struct @@ -63,13 +64,13 @@ def set(ctx, actor, receiver, target, message): @api.make() -def api_notifications_load(ctx, sort_order="desc"): +def api_notifications_load(ctx: rule.Context, sort_order: str = "desc") -> List: """Load user notifications. :param ctx: Combined type of a callback and rei struct :param sort_order: Sort order of notifications on timestamp ("asc" or "desc", default "desc") - :returns: Dict with all notifications + :returns: List with all notifications """ results = [v for v in Query(ctx, "META_USER_ATTR_VALUE", @@ -144,7 +145,7 @@ def api_notifications_load(ctx, sort_order="desc"): @api.make() -def api_notifications_dismiss(ctx, identifier): +def api_notifications_dismiss(ctx: rule.Context, identifier: str) -> api.Result: """Dismiss user notification. :param ctx: Combined type of a callback and rei struct @@ -156,7 +157,7 @@ def api_notifications_dismiss(ctx, identifier): @api.make() -def api_notifications_dismiss_all(ctx): +def api_notifications_dismiss_all(ctx: rule.Context) -> api.Result: """Dismiss all user notifications. :param ctx: Combined type of a callback and rei struct @@ -166,7 +167,7 @@ def api_notifications_dismiss_all(ctx): ctx.uuUserMetaRemove(user_name, key, '', '') -def send_notification(ctx, to, actor, message): +def send_notification(ctx: rule.Context, to: str, actor: str, message: str) -> api.Result: return mail.send(ctx, to=to, actor=actor, @@ -183,9 +184,9 @@ def send_notification(ctx, to, actor, message): @rule.make(inputs=[0, 1], outputs=[2, 3]) -def rule_mail_notification_report(ctx, to, notifications): +def rule_mail_notification_report(ctx: rule.Context, to: str, notifications: str) -> Tuple[str, str]: if not user.is_admin(ctx): - return api.Error('not_allowed', 'Only rodsadmin can send test mail') + return '0', 'Only rodsadmin can send test mail' return mail.wrapper(ctx, to=to, @@ -203,7 +204,7 @@ def rule_mail_notification_report(ctx, to, notifications): @rule.make() -def rule_process_ending_retention_packages(ctx): +def rule_process_ending_retention_packages(ctx: rule.Context) -> None: """Rule interface for checking vault packages for ending retention. :param ctx: Combined type of a callback and rei struct @@ -296,7 +297,7 @@ def rule_process_ending_retention_packages(ctx): @rule.make() -def rule_process_groups_expiration_date(ctx): +def rule_process_groups_expiration_date(ctx: rule.Context) -> None: """Rule interface for checking research groups for reaching group expiration date. :param ctx: Combined type of a callback and rei struct @@ -346,7 +347,7 @@ def rule_process_groups_expiration_date(ctx): @rule.make() -def rule_process_inactive_research_groups(ctx): +def rule_process_inactive_research_groups(ctx: rule.Context) -> None: """Rule interface for checking for research groups that have not been modified after a certain amount of months. :param ctx: Combined type of a callback and rei struct @@ -459,7 +460,7 @@ def rule_process_inactive_research_groups(ctx): @rule.make() -def rule_process_data_access_token_expiry(ctx): +def rule_process_data_access_token_expiry(ctx: rule.Context) -> None: """Rule interface for checking for data access tokens that are expiring soon. :param ctx: Combined type of a callback and rei struct diff --git a/policies.py b/policies.py index 0935f8abd..95340ed50 100644 --- a/policies.py +++ b/policies.py @@ -33,7 +33,7 @@ # Separate from ACLs, we deny certain operations on collections and data in # research or deposit folders when paths are locked. -def can_coll_create(ctx, actor, coll): +def can_coll_create(ctx: rule.Context, actor: str, coll: str) -> policy.Succeed | policy.Fail: """Disallow creating collections in locked folders.""" log.debug(ctx, 'check coll create <{}>'.format(coll)) @@ -48,7 +48,7 @@ def can_coll_create(ctx, actor, coll): return policy.succeed() -def can_coll_delete(ctx, actor, coll): +def can_coll_delete(ctx: rule.Context, actor: str, coll: str) -> policy.Succeed | policy.Fail: """Disallow deleting collections in locked folders and collections containing locked folders.""" log.debug(ctx, 'check coll delete <{}>'.format(coll)) @@ -66,14 +66,14 @@ def can_coll_delete(ctx, actor, coll): return policy.succeed() -def can_coll_move(ctx, actor, src, dst): +def can_coll_move(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check coll move <{}> -> <{}>'.format(src, dst)) return policy.all(can_coll_delete(ctx, actor, src), can_coll_create(ctx, actor, dst)) -def can_data_create(ctx, actor, path): +def can_data_create(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data create <{}>'.format(path)) if pathutil.info(path).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT]: @@ -95,7 +95,7 @@ def can_data_create(ctx, actor, path): return policy.succeed() -def can_data_write(ctx, actor, path): +def can_data_write(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data write <{}>'.format(path)) # Disallow writing to locked objects in research and deposit folders. @@ -111,7 +111,7 @@ def can_data_write(ctx, actor, path): return policy.succeed() -def can_data_delete(ctx, actor, path): +def can_data_delete(ctx: rule.Context, actor: str, path: str) -> policy.Succeed | policy.Fail: if re.match(r'^/[^/]+/home/[^/]+$', path) and not user.is_admin(ctx, actor): return policy.fail('Cannot delete or move data directly under /home') @@ -126,12 +126,12 @@ def can_data_delete(ctx, actor, path): return policy.succeed() -def can_data_copy(ctx, actor, src, dst): +def can_data_copy(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data copy <{}> -> <{}>'.format(src, dst)) return can_data_create(ctx, actor, dst) -def can_data_move(ctx, actor, src, dst): +def can_data_move(ctx: rule.Context, actor: str, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'check data move <{}> -> <{}>'.format(src, dst)) return policy.all(can_data_delete(ctx, actor, src), can_data_create(ctx, actor, dst)) @@ -151,7 +151,7 @@ def can_data_move(ctx, actor, src, dst): # Most of them 'cut' and call identically named Python functions in this file. @policy.require() -def py_acPreprocForCollCreate(ctx): +def py_acPreprocForCollCreate(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForCollCreate') # print(jsonutil.dump(session_vars.get_map(ctx.rei))) return can_coll_create(ctx, user.user_and_zone(ctx), @@ -159,7 +159,7 @@ def py_acPreprocForCollCreate(ctx): @policy.require() -def py_acPreprocForRmColl(ctx): +def py_acPreprocForRmColl(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForRmColl') # print(jsonutil.dump(session_vars.get_map(ctx.rei))) return can_coll_delete(ctx, user.user_and_zone(ctx), @@ -167,7 +167,7 @@ def py_acPreprocForRmColl(ctx): @policy.require() -def py_acPreprocForDataObjOpen(ctx): +def py_acPreprocForDataObjOpen(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreprocForDataObjOpen') # data object reads are always allowed. # writes are blocked e.g. when the object is locked (unless actor is a rodsadmin). @@ -179,7 +179,7 @@ def py_acPreprocForDataObjOpen(ctx): @policy.require() -def py_acDataDeletePolicy(ctx): +def py_acDataDeletePolicy(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acDataDeletePolicy') return (policy.succeed() if can_data_delete(ctx, user.user_and_zone(ctx), @@ -188,7 +188,7 @@ def py_acDataDeletePolicy(ctx): @policy.require() -def py_acPreProcForObjRename(ctx, src, dst): +def py_acPreProcForObjRename(ctx: rule.Context, src: str, dst: str) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPreProcForObjRename') # irods/lib/api/include/dataObjInpOut.h @@ -200,11 +200,11 @@ def py_acPreProcForObjRename(ctx, src, dst): elif session_vars.get_map(ctx.rei)['operation_type'] == RENAME_COLL: return can_coll_move(ctx, user.user_and_zone(ctx), src, dst) - # if ($objPath like regex "/[^/]+/home/" ++ IIGROUPPREFIX ++ ".[^/]*/.*") { + return policy.succeed() @policy.require() -def py_acPostProcForPut(ctx): +def py_acPostProcForPut(ctx: rule.Context) -> policy.Succeed | policy.Fail: log.debug(ctx, 'py_acPostProcForPut') # Data object creation cannot be prevented by API dynpeps and static PEPs, # due to how MSIs work. Thus, this ugly workaround specifically for MSIs. @@ -218,7 +218,7 @@ def py_acPostProcForPut(ctx): @policy.require() -def py_acPostProcForCopy(ctx): +def py_acPostProcForCopy(ctx: rule.Context) -> policy.Succeed | policy.Fail: # See py_acPostProcForPut. log.debug(ctx, 'py_acPostProcForCopy') @@ -255,7 +255,10 @@ def py_acPostProcForCopy(ctx): @policy.require() -def pep_api_data_obj_create_pre(ctx, instance_name, rs_comm, data_obj_inp): +def pep_api_data_obj_create_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_create_pre') # Catch object creation/overwrite via Davrods and PRC. @@ -268,7 +271,11 @@ def pep_api_data_obj_create_pre(ctx, instance_name, rs_comm, data_obj_inp): @policy.require() -def pep_api_data_obj_create_and_stat_pre(ctx, instance_name, rs_comm, data_obj_inp, open_stat): +def pep_api_data_obj_create_and_stat_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object, + open_stat: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_create_and_stat_pre') # Not triggered by any of our clients currently, but needed for completeness. @@ -306,14 +313,20 @@ def pep_api_data_obj_create_and_stat_pre(ctx, instance_name, rs_comm, data_obj_i @policy.require() -def pep_api_data_obj_trim_pre(ctx, instance_name, rs_comm, data_obj_inp): +def pep_api_data_obj_trim_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_trim_pre') return can_data_write(ctx, user.user_and_zone(ctx), str(data_obj_inp.objPath)) @policy.require() -def pep_api_data_obj_truncate_pre(ctx, instance_name, rs_comm, data_obj_truncate_inp): +def pep_api_data_obj_truncate_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + data_obj_truncate_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_data_obj_truncate_pre') return can_data_write(ctx, user.user_and_zone(ctx), str(data_obj_truncate_inp.objPath)) @@ -337,8 +350,13 @@ def pep_api_data_obj_truncate_pre(ctx, instance_name, rs_comm, data_obj_truncate # Policy for most AVU changes @policy.require() -def py_acPreProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, value, unit): - +def py_acPreProcForModifyAVUMetadata(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + attr: str, + value: str, + unit: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if obj_type not in ['-d', '-C']: @@ -412,21 +430,32 @@ def py_acPreProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, valu # imeta mod @policy.require() -def py_acPreProcForModifyAVUMetadata_mod(ctx, *args): +def py_acPreProcForModifyAVUMetadata_mod(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + a_attr: str, + a_value: str, + a_unit: str, + b_name: str, + b_value: str, + b_unit: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): return policy.succeed() - if t_dst not in ['-d', '-C']: + if obj_type not in ['-d', '-C']: return policy.succeed() - if pathutil.info(dst).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT, pathutil.Space.VAULT]: + if pathutil.info(obj_name).space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT, pathutil.Space.VAULT]: return policy.fail('Metadata mod not allowed') + return policy.succeed() + # imeta cp @policy.require() -def py_acPreProcForModifyAVUMetadata_cp(ctx, _, t_src, t_dst, src, dst): +def py_acPreProcForModifyAVUMetadata_cp(ctx: rule.Context, option: str, t_src: str, t_dst: str, src: str, dst: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): return policy.succeed() @@ -449,7 +478,13 @@ def py_acPreProcForModifyAVUMetadata_cp(ctx, _, t_src, t_dst, src, dst): # conditions defined in folder.py and iiVaultTransitions.r # are called here. @rule.make() -def py_acPostProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, value, unit): +def py_acPostProcForModifyAVUMetadata(ctx: rule.Context, + option: str, + obj_type: str, + obj_name: str, + attr: str, + value: str, + unit: str) -> None: info = pathutil.info(obj_name) if attr == constants.IISTATUSATTRNAME and info.space in [pathutil.Space.RESEARCH, pathutil.Space.DEPOSIT]: @@ -472,7 +507,10 @@ def py_acPostProcForModifyAVUMetadata(ctx, option, obj_type, obj_name, attr, val # ichmod @policy.require() -def pep_api_mod_access_control_pre(ctx, instance_name, rs_comm, mod_access_control_inp): +def pep_api_mod_access_control_pre(ctx: rule.Context, + instance_name: str, + rs_comm: object, + mod_access_control_inp: object) -> policy.Succeed | policy.Fail: log.debug(ctx, 'pep_api_mod_access_control_pre') actor = user.user_and_zone(ctx) if user.is_admin(ctx, actor): @@ -490,7 +528,11 @@ def pep_api_mod_access_control_pre(ctx, instance_name, rs_comm, mod_access_contr # ExecCmd {{{ @policy.require() -def py_acPreProcForExecCmd(ctx, cmd, args, addr, hint): +def py_acPreProcForExecCmd(ctx: rule.Context, + cmd: str, + args: str, + addr: str, + hint: str) -> policy.Succeed | policy.Fail: actor = user.user_and_zone(ctx) # No restrictions for rodsadmin and priv group. @@ -519,7 +561,7 @@ def py_acPreProcForExecCmd(ctx, cmd, args, addr, hint): # Internal function to determine whether changes to data objects on a particular # resource need to trigger policies (e.g. asynchronous replication) by default. -def resource_should_trigger_policies(resource): +def resource_should_trigger_policies(resource: str) -> bool: if resource in config.resource_primary: return True @@ -534,7 +576,10 @@ def resource_should_trigger_policies(resource): @rule.make() -def pep_resource_modified_post(ctx, instance_name, _ctx, out): +def pep_resource_modified_post(ctx: rule.Context, + instance_name: str, + _ctx: rule.Context, + out: str) -> None: if not resource_should_trigger_policies(instance_name): return @@ -578,7 +623,7 @@ def pep_resource_modified_post(ctx, instance_name, _ctx, out): @rule.make() -def py_acPostProcForObjRename(ctx, src, dst): +def py_acPostProcForObjRename(ctx: rule.Context, src: str, dst: str) -> None: # Update ACLs to give correct group ownership when an object is moved into # a different research- or grp- collection. info = pathutil.info(dst) @@ -588,9 +633,16 @@ def py_acPostProcForObjRename(ctx, src, dst): @rule.make(inputs=[0, 1, 2, 3, 4, 5, 6], outputs=[2]) -def pep_resource_resolve_hierarchy_pre(ctx, resource, _ctx, out, operation, host, parser, vote): +def pep_resource_resolve_hierarchy_pre(ctx: rule.Context, + resource: str, + _ctx: rule.Context, + out: str, + operation: str, + host: str, + parser: str, + vote: str) -> str | None: if not config.arb_enabled or operation != "CREATE": - return + return None arb_data = arb_data_manager.ARBDataManager() arb_status = arb_data.get(ctx, resource) @@ -602,7 +654,7 @@ def pep_resource_resolve_hierarchy_pre(ctx, resource, _ctx, out, operation, host @rule.make(inputs=[0], outputs=[1]) -def rule_check_anonymous_access_allowed(ctx, address): +def rule_check_anonymous_access_allowed(ctx: rule.Context, address: str) -> str: """Check if access to the anonymous account is allowed from a particular network address. Non-local access to the anonymous account should only be allowed from DavRODS servers, for security reasons. @@ -617,7 +669,7 @@ def rule_check_anonymous_access_allowed(ctx, address): @rule.make(inputs=[], outputs=[0]) -def rule_check_max_connections_exceeded(ctx): +def rule_check_max_connections_exceeded(ctx: rule.Context) -> str: """Check if user exceeds the maximum number of connections. :param ctx: Combined type of a callback and rei struct @@ -636,7 +688,12 @@ def rule_check_max_connections_exceeded(ctx): @rule.make(inputs=[0, 1, 2, 3, 4], outputs=[]) -def pep_database_gen_query_pre(ctx, dbtype, _ctx, results, genquery_inp, genquery_out): +def pep_database_gen_query_pre(ctx: rule.Context, + dbtype: str, + _ctx: rule.Context, + results: str, + genquery_inp: object, + genquery_out: object) -> None: if not is_safe_genquery_inp(genquery_inp): # We can't use log here, because the REI is not (always) available. print("Refused unsafe query: " + str(genquery_inp)) diff --git a/policies_datamanager.py b/policies_datamanager.py index cfa51f54f..3800ba6ff 100644 --- a/policies_datamanager.py +++ b/policies_datamanager.py @@ -6,7 +6,12 @@ from util import * -def can_datamanager_acl_set(ctx, obj, actor, other_name, recursive, access): +def can_datamanager_acl_set(ctx: rule.Context, + obj: str, + actor: str, + other_name: str, + recursive: str, + access: str) -> policy.Succeed | policy.Fail: x = ctx.iiCanDatamanagerAclSet(obj, actor, other_name, recursive, access, '', '') if x['arguments'][5] == '\x01': return policy.succeed() diff --git a/policies_datapackage_status.py b/policies_datapackage_status.py index e4d5bbd34..16666251b 100644 --- a/policies_datapackage_status.py +++ b/policies_datapackage_status.py @@ -11,7 +11,10 @@ from util import * -def pre_status_transition(ctx, coll, current, new): +def pre_status_transition(ctx: rule.Context, + coll: str, + current: constants.research_package_state, + new: constants.research_package_state) -> policy.Succeed | policy.Fail: """Action taken before status transition.""" if current is constants.vault_package_state.SUBMITTED_FOR_PUBLICATION \ and new is constants.vault_package_state.UNPUBLISHED: @@ -21,7 +24,11 @@ def pre_status_transition(ctx, coll, current, new): return policy.succeed() -def can_transition_datapackage_status(ctx, actor, coll, status_from, status_to): +def can_transition_datapackage_status(ctx: rule.Context, + actor: str, + coll: str, + status_from: str, + status_to: str) -> policy.Succeed | policy.Fail: transition = (constants.vault_package_state(status_from), constants.vault_package_state(status_to)) if transition not in constants.datapackage_transitions: @@ -38,7 +45,10 @@ def can_transition_datapackage_status(ctx, actor, coll, status_from, status_to): return policy.succeed() -def can_set_datapackage_status_attr(ctx, actor, coll, status): +def can_set_datapackage_status_attr(ctx: rule.Context, + actor: str, + coll: str, + status: str) -> policy.Succeed | policy.Fail: try: new = constants.vault_package_state(status) except ValueError: @@ -53,7 +63,10 @@ def can_set_datapackage_status_attr(ctx, actor, coll, status): return (current, new) -def post_status_transition(ctx, path, actor, status): +def post_status_transition(ctx: rule.Context, + path: str, + actor: str, + status: str) -> None: """Post data package status transition actions.""" status = constants.vault_package_state(status) actor = ctx.iiVaultGetActionActor(path, actor, '')['arguments'][2] diff --git a/policies_datarequest_status.py b/policies_datarequest_status.py index 16e3bc482..c7381b760 100644 --- a/policies_datarequest_status.py +++ b/policies_datarequest_status.py @@ -9,8 +9,9 @@ from util import * -def can_set_datarequest_status(ctx, obj_name, status_to): - +def can_set_datarequest_status(ctx: rule.Context, + obj_name: str, + status_to: str) -> policy.Succeed | policy.Fail: # Get current status. try: status_from = datarequest.status_get_from_path(ctx, obj_name) @@ -26,8 +27,7 @@ def can_set_datarequest_status(ctx, obj_name, status_to): return policy.succeed() -def post_status_transition(ctx, obj_name, value): - +def post_status_transition(ctx: rule.Context, obj_name: str, value: str) -> None: # Write timestamp to provenance log request_id = re.sub(r"^[^0-9]*/(\d+).*", r"\1", obj_name) status = datarequest.status[value] diff --git a/policies_folder_status.py b/policies_folder_status.py index f9483928f..2afae97e3 100644 --- a/policies_folder_status.py +++ b/policies_folder_status.py @@ -12,7 +12,10 @@ from util import * -def pre_status_transition(ctx, coll, current, new): +def pre_status_transition(ctx: rule.Context, + coll: str, + current: constants.research_package_state, + new: constants.research_package_state) -> policy.Succeed | policy.Fail: """Action taken before status transition.""" if current != constants.research_package_state.LOCKED \ and new in [constants.research_package_state.LOCKED, @@ -50,7 +53,11 @@ def pre_status_transition(ctx, coll, current, new): return policy.succeed() -def can_transition_folder_status(ctx, actor, coll, status_from, status_to): +def can_transition_folder_status(ctx: rule.Context, + actor: str, + coll: str, + status_from: str, + status_to: str) -> policy.Succeed | policy.Fail: transition = (constants.research_package_state(status_from), constants.research_package_state(status_to)) if transition not in constants.folder_transitions: @@ -84,7 +91,10 @@ def can_transition_folder_status(ctx, actor, coll, status_from, status_to): return policy.succeed() -def can_set_folder_status_attr(ctx, actor, coll, status): +def can_set_folder_status_attr(ctx: rule.Context, + actor: str, + coll: str, + status: str) -> policy.Succeed | policy.Fail: try: status = "" if status == "FOLDER" else status new = constants.research_package_state(status) @@ -100,9 +110,11 @@ def can_set_folder_status_attr(ctx, actor, coll, status): return (current, new) -def post_status_transition(ctx, path, actor, status): +def post_status_transition(ctx: rule.Context, + path: str, + actor: str, + status: str) -> None: """Post folder status transition actions.""" - status = "" if status == "FOLDER" else status status = constants.research_package_state(status) diff --git a/policies_intake.py b/policies_intake.py index 3981bbfeb..aaca62c57 100644 --- a/policies_intake.py +++ b/policies_intake.py @@ -3,12 +3,14 @@ __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict + import genquery from util import * -def object_is_locked(ctx, path, is_collection): +def object_is_locked(ctx: rule.Context, path: str, is_collection: bool) -> Dict: """Returns whether given object in path (collection or dataobject) is locked or frozen :param ctx: Combined type of a callback and rei struct @@ -48,7 +50,7 @@ def object_is_locked(ctx, path, is_collection): return locked_state -def is_data_in_locked_dataset(ctx, actor, path): +def is_data_in_locked_dataset(ctx: rule.Context, actor: str, path: str) -> bool: """ Check whether given data object is within a locked dataset """ dataset_id = '' coll = pathutil.chop(path)[0] @@ -114,7 +116,7 @@ def is_data_in_locked_dataset(ctx, actor, path): return False -def is_coll_in_locked_dataset(ctx, actor, coll): +def is_coll_in_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool: """ Check whether given collection is within a locked dataset """ dataset_id = '' intake_group_prefix = _get_intake_group_prefix(coll) @@ -160,14 +162,14 @@ def is_coll_in_locked_dataset(ctx, actor, coll): return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor) else: # Lock status could not be determined. Assume collection is not locked. - log.debug(ctx, "Could not determine lock state of data object " + path) + log.debug(ctx, "Could not determine lock state of data object " + coll) return False log.debug(ctx, 'After check for datasetid - no dataset found') return False -def coll_in_path_of_locked_dataset(ctx, actor, coll): +def coll_in_path_of_locked_dataset(ctx: rule.Context, actor: str, coll: str) -> bool: """ If collection is part of a locked dataset, or holds one on a deeper level, then deletion is not allowed """ dataset_id = '' intake_group_prefix = _get_intake_group_prefix(coll) @@ -211,7 +213,7 @@ def coll_in_path_of_locked_dataset(ctx, actor, coll): log.debug(ctx, locked_state) return (locked_state['locked'] or locked_state['frozen']) and not user.is_admin(ctx, actor) else: - log.debug(ctx, "Could not determine lock state of data object " + path) + log.debug(ctx, "Could not determine lock state of data object " + coll) # Pretend presence of a lock so no unwanted data gets deleted return True else: @@ -242,7 +244,7 @@ def coll_in_path_of_locked_dataset(ctx, actor, coll): return False -def _get_intake_group_prefix(coll): +def _get_intake_group_prefix(coll: str) -> str: """ Get the group prefix of a intake collection name: 'grp-intake' or 'intake' """ parts = coll.split('/')[3].split('-') del parts[-1] diff --git a/policies_utils.py b/policies_utils.py index c8ebe0eda..a4b4dec19 100644 --- a/policies_utils.py +++ b/policies_utils.py @@ -4,11 +4,12 @@ __license__ = 'GPLv3, see LICENSE' import ast +from typing import Set from util.genquery_col_constants import * -def is_safe_genquery_inp(genquery_inp): +def is_safe_genquery_inp(genquery_inp: object) -> bool: """Checks if a GenQuery input matches Yoda policies :param genquery_inp: GenQueryInp object containing query information @@ -19,7 +20,7 @@ def is_safe_genquery_inp(genquery_inp): return _is_safe_genquery_inp(genquery_inp.selectInp, genquery_inp.sqlCondInp.inx) -def _column_in_select_inp(selectInp, columns): +def _column_in_select_inp(selectInp: Set[int], columns: Set[int]) -> bool: selectedInpHash = ast.literal_eval(str(selectInp)) selected_columns = selectedInpHash.keys() for column in columns: @@ -28,7 +29,7 @@ def _column_in_select_inp(selectInp, columns): return False -def _column_in_cond_inp(sqlCondInp, columns): +def _column_in_cond_inp(sqlCondInp: Set[int], columns: Set[int]) -> bool: condition_columns = ast.literal_eval(str(sqlCondInp)) for column in columns: if column in condition_columns: @@ -36,7 +37,7 @@ def _column_in_cond_inp(sqlCondInp, columns): return False -def _is_safe_genquery_inp(selectInp, sqlCondInp): +def _is_safe_genquery_inp(selectInp: Set[int], sqlCondInp: Set[int]) -> bool: # Defines groups of GenQuery columns dataobject_columns = {COL_D_DATA_ID, COL_D_COLL_ID, COL_DATA_NAME, COL_DATA_REPL_NUM, COL_DATA_VERSION, COL_DATA_TYPE_NAME, COL_DATA_SIZE, diff --git a/provenance.py b/provenance.py index 171cb8493..0ba487cee 100644 --- a/provenance.py +++ b/provenance.py @@ -5,6 +5,7 @@ import json import time +from typing import List import genquery @@ -17,7 +18,7 @@ @rule.make() -def rule_provenance_log_action(ctx, actor, coll, action): +def rule_provenance_log_action(ctx: rule.Context, actor: str, coll: str, action: str) -> None: """Function to add action log record to provenance of specific folder. :param ctx: Combined type of a callback and rei struct @@ -34,7 +35,7 @@ def rule_provenance_log_action(ctx, actor, coll, action): log.write(ctx, "rule_provenance_log_action: failed to log action <{}> to provenance".format(action)) -def log_action(ctx, actor, coll, action, update=True): +def log_action(ctx: rule.Context, actor: str, coll: str, action: str, update: bool = True) -> None: """Function to add action log record to provenance of specific folder. :param ctx: Combined type of a callback and rei struct @@ -54,7 +55,7 @@ def log_action(ctx, actor, coll, action, update=True): @rule.make() -def rule_copy_provenance_log(ctx, source, target): +def rule_copy_provenance_log(ctx: rule.Context, source: str, target: str) -> None: """Copy the provenance log of a collection to another collection. :param ctx: Combined type of a callback and rei struct @@ -64,7 +65,7 @@ def rule_copy_provenance_log(ctx, source, target): provenance_copy_log(ctx, source, target) -def provenance_copy_log(ctx, source, target): +def provenance_copy_log(ctx: rule.Context, source: str, target: str) -> None: """Copy the provenance log of a collection to another collection. :param ctx: Combined type of a callback and rei struct @@ -88,7 +89,7 @@ def provenance_copy_log(ctx, source, target): log.write(ctx, "rule_copy_provenance_log: failed to copy provenance log from <{}> to <{}>".format(source, target)) -def get_provenance_log(ctx, coll): +def get_provenance_log(ctx: rule.Context, coll: str) -> List: """Return provenance log of a collection. :param ctx: Combined type of a callback and rei struct @@ -113,7 +114,7 @@ def get_provenance_log(ctx, coll): @api.make() -def api_provenance_log(ctx, coll): +def api_provenance_log(ctx: rule.Context, coll: str) -> api.Result: """Return formatted provenance log of a collection. :param ctx: Combined type of a callback and rei struct @@ -134,7 +135,7 @@ def api_provenance_log(ctx, coll): return output -def latest_action_actor(ctx, path): +def latest_action_actor(ctx: rule.Context, path: str) -> str: """Return the actor of the latest provenance action. :param ctx: Combined type of a callback and rei struct diff --git a/publication.py b/publication.py index c30aca478..b3b5c9fef 100644 --- a/publication.py +++ b/publication.py @@ -3,8 +3,10 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +import json import re from datetime import datetime +from typing import Dict, List, Tuple import genquery from requests.exceptions import ReadTimeout @@ -25,7 +27,7 @@ 'rule_lift_embargos_on_data_access'] -def get_publication_config(ctx): +def get_publication_config(ctx: rule.Context) -> Dict: """Get all publication config keys and their values and report any missing keys.""" zone = user.zone(ctx) system_coll = "/" + zone + constants.UUSYSTEMCOLLECTION @@ -70,12 +72,14 @@ def get_publication_config(ctx): return config_keys -def generate_combi_json(ctx, publication_config, publication_state): +def generate_combi_json(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Join system metadata with the user metadata in yoda-metadata.json. :param ctx: Combined type of a callback and rei struct :param publication_config: Dict with publication configuration :param publication_state: Dict with state of the publication process + + :raises Exception: When latest metadata is not found """ temp_coll = "/" + user.zone(ctx) + constants.IIPUBLICATIONCOLLECTION davrodsAnonymousVHost = publication_config["davrodsAnonymousVHost"] @@ -100,6 +104,8 @@ def generate_combi_json(ctx, publication_config, publication_state): # metadataJsonPath contains latest json metadataJsonPath = meta.get_latest_vault_metadata_path(ctx, vaultPackage) + if metadataJsonPath is None: + raise Exception # Combine content of current *metadataJsonPath with system info and creates a new file in *combiJsonPath: json_datacite.json_datacite_create_combi_metadata_json(ctx, metadataJsonPath, combiJsonPath, lastModifiedDateTime, versionDOI, publicationDate, openAccessLink, licenseUri) @@ -107,7 +113,7 @@ def generate_combi_json(ctx, publication_config, publication_state): publication_state["combiJsonPath"] = combiJsonPath -def generate_system_json(ctx, publication_state): +def generate_system_json(ctx: rule.Context, publication_state: Dict) -> None: """Overwrite combi metadata json with system-only metadata. :param ctx: Combined type of a callback and rei struct @@ -135,7 +141,7 @@ def generate_system_json(ctx, publication_state): publication_state["combiJsonPath"] = system_json_path -def get_publication_state(ctx, vault_package): +def get_publication_state(ctx: rule.Context, vault_package: str) -> Dict: """The publication state is kept as metadata on the vault package. :param ctx: Combined type of a callback and rei struct @@ -191,7 +197,7 @@ def get_publication_state(ctx, vault_package): return publication_state -def save_publication_state(ctx, vault_package, publication_state): +def save_publication_state(ctx: rule.Context, vault_package: str, publication_state: Dict) -> None: """Save the publication state key-value-pairs to AVU's on the vault package. :param ctx: Combined type of a callback and rei struct @@ -204,7 +210,7 @@ def save_publication_state(ctx, vault_package, publication_state): avu.set_on_coll(ctx, vault_package, constants.UUORGMETADATAPREFIX + 'publication_' + key, publication_state[key]) -def set_update_publication_state(ctx, vault_package): +def set_update_publication_state(ctx: rule.Context, vault_package: str) -> str: """Routine to set publication state of vault package pending to update. :param ctx: Combined type of a callback and rei struct @@ -251,7 +257,7 @@ def set_update_publication_state(ctx, vault_package): return "" -def get_publication_date(ctx, vault_package): +def get_publication_date(ctx: rule.Context, vault_package: str) -> str: """Determine the time of publication as a datetime with UTC offset. First try action_log. Then icat-time. @@ -279,7 +285,7 @@ def get_publication_date(ctx, vault_package): return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') -def get_last_modified_datetime(ctx, vault_package): +def get_last_modified_datetime(ctx: rule.Context, vault_package: str) -> str: """Determine the time of last modification as a datetime with UTC offset. :param ctx: Combined type of a callback and rei struct @@ -294,13 +300,14 @@ def get_last_modified_datetime(ctx, vault_package): ) for row in iter: log_item_list = jsonutil.parse(row[1]) - my_date = datetime.fromtimestamp(int(log_item_list[0])) - return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') + my_date = datetime.now() + return my_date.strftime('%Y-%m-%dT%H:%M:%S.%f%z') + -def generate_preliminary_doi(ctx, publication_config, publication_state): +def generate_preliminary_doi(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a Preliminary DOI. Preliminary, because we check for collision later. :param ctx: Combined type of a callback and rei struct @@ -310,13 +317,13 @@ def generate_preliminary_doi(ctx, publication_config, publication_state): dataCitePrefix = publication_config["dataCitePrefix"] yodaPrefix = publication_config["yodaPrefix"] - randomId = datacite.generate_random_id(ctx, publication_config["randomIdLength"]) + randomId = datacite.generate_random_id(publication_config["randomIdLength"]) publication_state["randomId"] = randomId publication_state["versionDOI"] = dataCitePrefix + "/" + yodaPrefix + "-" + randomId -def generate_base_doi(ctx, publication_config, publication_state): +def generate_base_doi(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a base DOI. :param ctx: Combined type of a callback and rei struct @@ -326,13 +333,13 @@ def generate_base_doi(ctx, publication_config, publication_state): dataCitePrefix = publication_config["dataCitePrefix"] yodaPrefix = publication_config["yodaPrefix"] - randomId = datacite.generate_random_id(ctx, publication_config["randomIdLength"]) + randomId = datacite.generate_random_id(publication_config["randomIdLength"]) publication_state["baseRandomId"] = randomId publication_state["baseDOI"] = dataCitePrefix + "/" + yodaPrefix + "-" + randomId -def generate_datacite_json(ctx, publication_state): +def generate_datacite_json(ctx: rule.Context, publication_state: Dict) -> None: """Generate a DataCite compliant JSON based on yoda-metadata.json. :param ctx: Combined type of a callback and rei struct @@ -353,7 +360,7 @@ def generate_datacite_json(ctx, publication_state): publication_state["dataCiteJsonPath"] = datacite_json_path -def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi=False): +def post_metadata_to_datacite(ctx: rule.Context, publication_state: Dict, doi: str, send_method: str, base_doi: bool = False) -> None: """Upload DataCite JSON to DataCite. This will register the DOI, without minting it. :param ctx: Combined type of a callback and rei struct @@ -370,9 +377,9 @@ def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi try: if send_method == 'post': - httpCode = datacite.metadata_post(ctx, datacite_json) + httpCode = datacite.metadata_post(datacite_json) else: - httpCode = datacite.metadata_put(ctx, doi, datacite_json) + httpCode = datacite.metadata_put(doi, datacite_json) if (send_method == 'post' and httpCode == 201) or (send_method == 'put' and httpCode == 200): publication_state["dataCiteMetadataPosted"] = "yes" @@ -389,7 +396,7 @@ def post_metadata_to_datacite(ctx, publication_state, doi, send_method, base_doi publication_state["status"] = "Retry" -def post_draft_doi_to_datacite(ctx, publication_state): +def post_draft_doi_to_datacite(ctx: rule.Context, publication_state: Dict) -> None: """Upload DOI to DataCite. This will register the DOI as a draft. This function is also a draft, and will have to be reworked! @@ -401,7 +408,7 @@ def post_draft_doi_to_datacite(ctx, publication_state): try: # post the DOI only - httpCode = datacite.metadata_post(ctx, { + httpCode = datacite.metadata_post({ 'data': { 'type': 'dois', 'attributes': { @@ -425,18 +432,17 @@ def post_draft_doi_to_datacite(ctx, publication_state): publication_state["status"] = "Retry" -def remove_metadata_from_datacite(ctx, publication_state, type_flag): +def remove_metadata_from_datacite(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Remove metadata XML from DataCite. :param ctx: Combined type of a callback and rei struct :param publication_state: Dict with state of the publication process :param type_flag: Determine whether it is base DOI or version DOI """ - import json payload = json.dumps({"data": {"attributes": {"event": "hide"}}}) try: - httpCode = datacite.metadata_put(ctx, publication_state[type_flag + "DOI"], payload) + httpCode = datacite.metadata_put(publication_state[type_flag + "DOI"], payload) if httpCode == 200: publication_state["dataCiteMetadataPosted"] = "yes" @@ -457,18 +463,17 @@ def remove_metadata_from_datacite(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def mint_doi(ctx, publication_state, type_flag): +def mint_doi(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Announce the landing page URL for a DOI to dataCite. This will mint the DOI. :param ctx: Combined type of a callback and rei struct :param publication_state: Dict with state of the publication process :param type_flag: Flag indicating DOI type ('version' or 'base') """ - import json payload = json.dumps({"data": {"attributes": {"url": publication_state["landingPageUrl"]}}}) try: - httpCode = datacite.metadata_put(ctx, publication_state[type_flag + "DOI"], payload) + httpCode = datacite.metadata_put(publication_state[type_flag + "DOI"], payload) if httpCode == 200: # 201: publication_state[type_flag + "DOIMinted"] = "yes" @@ -488,7 +493,7 @@ def mint_doi(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def generate_landing_page_url(ctx, publication_config, publication_state): +def generate_landing_page_url(ctx: rule.Context, publication_config: Dict, publication_state: Dict) -> None: """Generate a URL for the landing page. :param ctx: Combined type of a callback and rei struct @@ -505,7 +510,7 @@ def generate_landing_page_url(ctx, publication_config, publication_state): publication_state["landingPageUrl"] = landingPageUrl -def generate_landing_page(ctx, publication_state, publish): +def generate_landing_page(ctx: rule.Context, publication_state: Dict, publish: str) -> None: """Generate a dataCite compliant XML based up yoda-metadata.json. :param ctx: Combined type of a callback and rei struct @@ -540,7 +545,7 @@ def generate_landing_page(ctx, publication_state, publish): publication_state["landingPagePath"] = landing_page_path -def copy_landingpage_to_public_host(ctx, random_id, publication_config, publication_state): +def copy_landingpage_to_public_host(ctx: rule.Context, random_id: str, publication_config: Dict, publication_state: Dict) -> None: """Copy the resulting landing page to configured public host. :param ctx: Combined type of a callback and rei struct @@ -565,7 +570,7 @@ def copy_landingpage_to_public_host(ctx, random_id, publication_config, publicat log.write(ctx, "copy_landingpage_to_public: " + error) -def copy_metadata_to_moai(ctx, random_id, publication_config, publication_state): +def copy_metadata_to_moai(ctx: rule.Context, random_id: str, publication_config: Dict, publication_state: Dict) -> None: """Copy the metadata json file to configured MOAI. :param ctx: Combined type of a callback and rei struct @@ -589,7 +594,7 @@ def copy_metadata_to_moai(ctx, random_id, publication_config, publication_state) log.write(ctx, "copy_metadata_to_public: " + error) -def set_access_restrictions(ctx, vault_package, publication_state): +def set_access_restrictions(ctx: rule.Context, vault_package: str, publication_state: Dict) -> None: """Set access restriction for vault package. This function is called when (re)publishing a vault package. @@ -600,8 +605,6 @@ def set_access_restrictions(ctx, vault_package, publication_state): :param ctx: Combined type of a callback and rei struct :param vault_package: Path to the package in the vault :param publication_state: Dict with state of the publication process - - :returns: None """ # Embargo handling combiJsonPath = publication_state["combiJsonPath"] @@ -658,7 +661,7 @@ def set_access_restrictions(ctx, vault_package, publication_state): publication_state["anonymousAccess"] = "yes" -def check_doi_availability(ctx, publication_state, type_flag): +def check_doi_availability(ctx: rule.Context, publication_state: Dict, type_flag: str) -> None: """Request DOI to check on availability. We want a 404 as return code. :param ctx: Combined type of a callback and rei struct @@ -668,7 +671,7 @@ def check_doi_availability(ctx, publication_state, type_flag): doi = publication_state[type_flag + "DOI"] try: - http_code = datacite.metadata_get(ctx, doi) + http_code = datacite.metadata_get(doi) if http_code == 404: publication_state[type_flag + "DOIAvailable"] = "yes" @@ -685,7 +688,7 @@ def check_doi_availability(ctx, publication_state, type_flag): publication_state["status"] = "Retry" -def process_publication(ctx, vault_package): +def process_publication(ctx: rule.Context, vault_package: str) -> str: """Handling of publication of vault_package. :param ctx: Combined type of a callback and rei struct @@ -999,7 +1002,7 @@ def process_publication(ctx, vault_package): return publication_state["status"] -def process_depublication(ctx, vault_package): +def process_depublication(ctx: rule.Context, vault_package: str) -> str: status = "Unknown" log.write(ctx, "Process depublication of vault package <{}>".format(vault_package)) @@ -1145,7 +1148,7 @@ def process_depublication(ctx, vault_package): return publication_state["status"] -def process_republication(ctx, vault_package): +def process_republication(ctx: rule.Context, vault_package: str) -> str: """Routine to process a republication with sanity checks at every step.""" publication_state = {} @@ -1315,7 +1318,11 @@ def process_republication(ctx, vault_package): @rule.make(inputs=[0, 1, 2, 3]) -def rule_update_publication(ctx, vault_package, update_datacite, update_landingpage, update_moai): +def rule_update_publication(ctx: rule.Context, + vault_package: str, + update_datacite: str, + update_landingpage: str, + update_moai: str) -> None: """Rule interface for updating the publication of a vault package. :param ctx: Combined type of a callback and rei struct @@ -1352,7 +1359,11 @@ def rule_update_publication(ctx, vault_package, update_datacite, update_landingp log.write(ctx, "[UPDATE PUBLICATIONS] Finished for {}".format(vault_package), True) -def update_publication(ctx, vault_package, update_datacite=False, update_landingpage=False, update_moai=False): +def update_publication(ctx: rule.Context, + vault_package: str, + update_datacite: bool = False, + update_landingpage: bool = False, + update_moai: bool = False) -> str: """Routine to update a publication with sanity checks at every step. :param ctx: Combined type of a callback and rei struct @@ -1365,7 +1376,7 @@ def update_publication(ctx, vault_package, update_datacite=False, update_landing """ publication_state = {} - def _check_return_if_publication_status(return_statuses, location): + def _check_return_if_publication_status(return_statuses: List[str], location: str) -> bool: # Used to check whether we need to return early because of an # unexpected publication status, and log a message for troubleshooting # purposes. @@ -1509,7 +1520,7 @@ def _check_return_if_publication_status(return_statuses, location): return publication_state["status"] -def get_collection_metadata(ctx, coll, prefix): +def get_collection_metadata(ctx: rule.Context, coll: str, prefix: str) -> Dict: """Retrieve all collection metadata. :param ctx: Combined type of a callback and rei struct @@ -1531,14 +1542,14 @@ def get_collection_metadata(ctx, coll, prefix): return coll_metadata -def get_all_versions(ctx, path, doi): +def get_all_versions(ctx: rule.Context, path: str, doi: str) -> Tuple[List, List]: """Get all the version DOI of published data package in a vault. :param ctx: Combined type of a callback and rei struct :param path: Path of the published data package :param doi: Base DOI of the selected publication - :return: Dict of related version DOIs + :return: Tuple with version DOIS and previous version DOIs """ coll_parent_name = path.rsplit('/', 1)[0] @@ -1577,7 +1588,7 @@ def get_all_versions(ctx, path, doi): @rule.make() -def rule_lift_embargos_on_data_access(ctx): +def rule_lift_embargos_on_data_access(ctx: rule.Context) -> str: """Find vault packages that have a data access embargo that can be lifted as the embargo expires. If lift_embargo_date <= now, update publication. diff --git a/publication_troubleshoot.py b/publication_troubleshoot.py index 8d2c7e027..04e6bb69d 100644 --- a/publication_troubleshoot.py +++ b/publication_troubleshoot.py @@ -108,7 +108,7 @@ def check_one_datacite_doi_reg(ctx, data_package, doi_name, write_stdout): log.write(ctx, "check_datacite_doi_registration: Error while trying to get {} - {}".format(doi_name, e), write_stdout) return False - status_code = datacite.metadata_get(ctx, doi) + status_code = datacite.metadata_get(doi) return status_code == 200 diff --git a/replication.py b/replication.py index 0dc7e07ac..062870770 100644 --- a/replication.py +++ b/replication.py @@ -16,7 +16,7 @@ __all__ = ['rule_replicate_batch'] -def replicate_asynchronously(ctx, path, source_resource, target_resource): +def replicate_asynchronously(ctx: rule.Context, path: str, source_resource: str, target_resource: str) -> None: """Schedule replication of a data object. :param ctx: Combined type of a callback and rei struct @@ -71,7 +71,7 @@ def replicate_asynchronously(ctx, path, source_resource, target_resource): @rule.make() -def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size_limit, dry_run): +def rule_replicate_batch(ctx: rule.Context, verbose: str, balance_id_min: int, balance_id_max: int, batch_size_limit: int, dry_run: str) -> None: """Scheduled replication batch job. Performs replication for all data objects marked with 'org_replication_scheduled' metadata. @@ -87,7 +87,6 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz :param balance_id_max: Maximum balance id for batch jobs (value 1-64) :param batch_size_limit: Maximum number of items to be processed within one batch :param dry_run: When '1' do not actually replicate, only log what would have replicated - """ count = 0 count_ok = 0 @@ -191,7 +190,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz # Mark as correctly replicated count_ok += 1 except msi.Error as e: - log.write(ctx, 'ERROR - The file {} could not be replicated from {} to {}: {}'.format(file, from_path, to_path, str(e))) + log.write(ctx, 'ERROR - The file {} could not be replicated from {} to {}: {}'.format(path, from_path, to_path, str(e))) if print_verbose: log.write(ctx, "Batch replication retry: copying {} from {} to {}".format(path, data_resc_name, to_path)) @@ -250,7 +249,7 @@ def rule_replicate_batch(ctx, verbose, balance_id_min, balance_id_max, batch_siz log.write(ctx, "Batch replication job finished. {}/{} objects replicated successfully.".format(count_ok, count)) -def is_replication_blocked_by_admin(ctx): +def is_replication_blocked_by_admin(ctx: rule.Context) -> bool: """Admin can put the replication process on hold by adding a file called 'stop_replication' in collection /yoda/flags. :param ctx: Combined type of a callback and rei struct @@ -262,24 +261,19 @@ def is_replication_blocked_by_admin(ctx): return collection.exists(ctx, path) -def memory_rss_usage(): - """ - The RSS (resident) memory size in bytes for the current process. - """ +def memory_rss_usage() -> int: + """The RSS (resident) memory size in bytes for the current process.""" p = psutil.Process() return p.memory_info().rss -def show_memory_usage(ctx): - """ - For debug purposes show the current RSS usage. - """ +def show_memory_usage(ctx: rule.Context) -> None: + """For debug purposes show the current RSS usage.""" log.write(ctx, "current RSS usage: {} bytes".format(memory_rss_usage())) -def memory_limit_exceeded(rss_limit): - """ - True when a limit other than 0 was specified and memory usage is currently +def memory_limit_exceeded(rss_limit: int) -> bool: + """True when a limit other than 0 was specified and memory usage is currently above this limit. Otherwise False. :param rss_limit: Max memory usage in bytes @@ -287,4 +281,4 @@ def memory_limit_exceeded(rss_limit): :returns: Boolean indicating if memory limited exceeded """ rss_limit = int(rss_limit) - return rss_limit and memory_rss_usage() > rss_limit + return rss_limit > 0 and memory_rss_usage() > rss_limit diff --git a/research.py b/research.py index a906abba3..0f35c426f 100644 --- a/research.py +++ b/research.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Tuple + import genquery from pathvalidate import validate_filename, validate_filepath, ValidationError @@ -26,7 +28,7 @@ 'api_research_manifest'] -def folder_new_name_check(folder_name): +def folder_new_name_check(folder_name: str) -> Tuple[bool, str]: if len(folder_name) == 0: return False, api.Error('missing_foldername', 'Missing folder name. Please add a folder name') @@ -47,14 +49,14 @@ def folder_new_name_check(folder_name): @api.make() -def api_research_folder_add(ctx, coll, new_folder_name): +def api_research_folder_add(ctx: rule.Context, coll: str, new_folder_name: str) -> api.Result: """Add a new folder to a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Collection to create new folder in :param new_folder_name: Name of the new folder - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + new_folder_name @@ -102,7 +104,7 @@ def api_research_folder_add(ctx, coll, new_folder_name): return api.Result.ok() -def folder_copy_check(ctx, folder_path, new_folder_path, overwrite, copy=True): +def folder_copy_check(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool, copy: bool = True) -> Tuple[bool, str]: """Check whether can copy (or move) folder to new folder location. :param ctx: Combined type of a callback and rei struct @@ -166,7 +168,7 @@ def folder_copy_check(ctx, folder_path, new_folder_path, overwrite, copy=True): @api.make() -def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False): +def api_research_folder_copy(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool = False) -> api.Result: """Copy a folder in a research folder. :param ctx: Combined type of a callback and rei struct @@ -174,7 +176,7 @@ def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False) :param new_folder_path: Path to the new copy of the folder :param overwrite: Overwrite folder if it already exists - :returns: Dict with API status result + :returns: API status result """ valid, errorResponse = folder_copy_check(ctx, folder_path, new_folder_path, overwrite, True) if not valid: @@ -190,7 +192,7 @@ def api_research_folder_copy(ctx, folder_path, new_folder_path, overwrite=False) @api.make() -def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False): +def api_research_folder_move(ctx: rule.Context, folder_path: str, new_folder_path: str, overwrite: bool = False) -> api.Result: """Move a folder in a research folder. :param ctx: Combined type of a callback and rei struct @@ -198,7 +200,7 @@ def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False) :param new_folder_path: Path to the new folder :param overwrite: Overwrite folder if it already exists - :returns: Dict with API status result + :returns: API status result """ valid, errorResponse = folder_copy_check(ctx, folder_path, new_folder_path, overwrite, False) if not valid: @@ -214,7 +216,7 @@ def api_research_folder_move(ctx, folder_path, new_folder_path, overwrite=False) @api.make() -def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): +def api_research_folder_rename(ctx: rule.Context, new_folder_name: str, coll: str, org_folder_name: str) -> api.Result: """Rename an existing research folder. :param ctx: Combined type of a callback and rei struct @@ -222,7 +224,7 @@ def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): :param coll: Parent collection of folder :param org_folder_name: Current name of the folder - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + new_folder_name @@ -275,14 +277,14 @@ def api_research_folder_rename(ctx, new_folder_name, coll, org_folder_name): @api.make() -def api_research_folder_delete(ctx, coll, folder_name): +def api_research_folder_delete(ctx: rule.Context, coll: str, folder_name: str) -> api.Result: """Delete a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Parent collection of folder to delete :param folder_name: Name of folder to delete - :returns: Dict with API status result + :returns: API status result """ coll_target = coll + '/' + folder_name @@ -322,7 +324,7 @@ def api_research_folder_delete(ctx, coll, folder_name): @api.make() -def api_research_list_temporary_files(ctx, coll): +def api_research_list_temporary_files(ctx: rule.Context, coll: str) -> api.Result: """Get list of temporary files to be cleaned up. :param ctx: Combined type of a callback and rei struct @@ -354,7 +356,7 @@ def api_research_list_temporary_files(ctx, coll): @api.make() -def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): +def api_research_file_copy(ctx: rule.Context, filepath: str, new_filepath: str, overwrite: bool = False) -> api.Result: """Copy a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -362,7 +364,7 @@ def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): :param new_filepath: Path to the new copy of the file :param overwrite: Overwrite file if it already exists - :returns: Dict with API status result + :returns: API status result """ if len(new_filepath) == 0: return api.Error('missing_filepath', 'Missing file path. Please add a file path') @@ -424,7 +426,7 @@ def api_research_file_copy(ctx, filepath, new_filepath, overwrite=False): @api.make() -def api_research_file_rename(ctx, new_file_name, coll, org_file_name): +def api_research_file_rename(ctx: rule.Context, new_file_name: str, coll: str, org_file_name: str) -> api.Result: """Rename a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -432,7 +434,7 @@ def api_research_file_rename(ctx, new_file_name, coll, org_file_name): :param coll: Parent collection of file :param org_file_name: Current name of the file - :returns: Dict with API status result + :returns: API status result """ if len(new_file_name) == 0: return api.Error('missing_filename', 'Missing filename. Please add a file name') @@ -494,7 +496,7 @@ def api_research_file_rename(ctx, new_file_name, coll, org_file_name): @api.make() -def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): +def api_research_file_move(ctx: rule.Context, filepath: str, new_filepath: str, overwrite: bool = False) -> api.Result: """Move a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -502,7 +504,7 @@ def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): :param new_filepath: Path to the new location of the file :param overwrite: Overwrite file if it already exists - :returns: Dict with API status result + :returns: API status result """ if len(new_filepath) == 0: return api.Error('missing_filepath', 'Missing file path. Please add a file path') @@ -567,14 +569,14 @@ def api_research_file_move(ctx, filepath, new_filepath, overwrite=False): @api.make() -def api_research_file_delete(ctx, coll, file_name): +def api_research_file_delete(ctx: rule.Context, coll: str, file_name: str) -> api.Result: """Delete a file in a research folder. :param ctx: Combined type of a callback and rei struct :param coll: Parent collection of file to delete :param file_name: Name of file to delete - :returns: Dict with API status result + :returns: API status result """ path_target = coll + '/' + file_name @@ -610,13 +612,13 @@ def api_research_file_delete(ctx, coll, file_name): @api.make() -def api_research_system_metadata(ctx, coll): +def api_research_system_metadata(ctx: rule.Context, coll: str) -> api.Result: """Return collection statistics as JSON. :param ctx: Combined type of a callback and rei struct :param coll: Research collection - :returns: Dict with research system metadata + :returns: API status result """ data_count = collection.data_count(ctx, coll) collection_count = collection.collection_count(ctx, coll) @@ -629,8 +631,14 @@ def api_research_system_metadata(ctx, coll): @api.make() -def api_research_collection_details(ctx, path): - """Return details of a research collection.""" +def api_research_collection_details(ctx: rule.Context, path: str) -> api.Result: + """Return details of a research collection. + + :param ctx: Combined type of a callback and rei struct + :param path: Path to research collection + + :returns: API status result + """ if not collection.exists(ctx, path): return api.Error('nonexistent', 'The given path does not exist') @@ -664,7 +672,7 @@ def api_research_collection_details(ctx, path): @api.make() -def api_research_manifest(ctx, coll): +def api_research_manifest(ctx: rule.Context, coll: str) -> api.Result: """Produce a manifest of data objects in a collection :param ctx: Combined type of a callback and rei struct diff --git a/resources.py b/resources.py index f93ea0e07..830ef953e 100644 --- a/resources.py +++ b/resources.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' from datetime import datetime +from typing import Dict, List import genquery @@ -22,12 +23,12 @@ @api.make() -def api_resource_browse_group_data(ctx, - sort_on='name', - sort_order='asc', - offset=0, - limit=10, - search_groups=""): +def api_resource_browse_group_data(ctx: rule.Context, + sort_on: str = 'name', + sort_order: str = 'asc', + offset: int = 0, + limit: int = 10, + search_groups: str = "") -> api.Result: """Get paginated group data groupname / size :param ctx: Combined type of a callback and rei struct @@ -88,10 +89,8 @@ def api_resource_browse_group_data(ctx, @api.make() -def api_resource_full_year_differentiated_group_storage(ctx, group_name): - # def api_resource_full_range ... - - """Return the full range of registered storage data differentiated into vault/research/revision/total +def api_resource_full_year_differentiated_group_storage(ctx: rule.Context, group_name: str) -> api.Result: + """Return the full range of registered storage data differentiated into vault/research/revision/total. :param ctx: Combined type of a callback and rei struct :param group_name: Group that is searched for storage data @@ -135,7 +134,7 @@ def api_resource_full_year_differentiated_group_storage(ctx, group_name): @api.make() -def api_resource_category_stats(ctx): +def api_resource_category_stats(ctx: rule.Context) -> api.Result: """Collect storage stats of last month for categories. Storage is summed up for each category. @@ -235,7 +234,7 @@ def api_resource_category_stats(ctx): @api.make() -def api_resource_monthly_category_stats(ctx): +def api_resource_monthly_category_stats(ctx: rule.Context) -> api.Result: """Collect storage stats for all twelve months based upon categories a user is datamanager of. Statistics gathered: @@ -329,7 +328,7 @@ def api_resource_monthly_category_stats(ctx): return {'storage': all_storage, 'dates': storage_dates} -def get_group_category_info(ctx, groupName): +def get_group_category_info(ctx: rule.Context, groupName: str) -> Dict: """Get category and subcategory for a group. :param ctx: Combined type of a callback and rei struct @@ -358,7 +357,7 @@ def get_group_category_info(ctx, groupName): return {'category': category, 'subcategory': subcategory} -def get_groups_on_categories(ctx, categories, search_groups=""): +def get_groups_on_categories(ctx: rule.Context, categories: List, search_groups: str = "") -> List: """Get all groups belonging to all given categories. :param ctx: Combined type of a callback and rei struct @@ -414,7 +413,7 @@ def get_groups_on_categories(ctx, categories, search_groups=""): @rule.make() -def rule_resource_store_storage_statistics(ctx): +def rule_resource_store_storage_statistics(ctx: rule.Context) -> str: """ For all categories present, store all found storage data for each group belonging to these categories. @@ -558,13 +557,13 @@ def rule_resource_store_storage_statistics(ctx): @rule.make(inputs=[0, 1, 2], outputs=[]) -def rule_resource_update_resc_arb_data(ctx, resc_name, bytes_free, bytes_total): +def rule_resource_update_resc_arb_data(ctx: rule.Context, resc_name: str, bytes_free: int, bytes_total: int) -> None: """ Update ARB data for a specific resource - :param ctx: Combined type of a callback and rei struct - :param resc_name: Name of a particular unixfilesystem resource - :param bytes_free: Free size on this resource, in bytes + :param ctx: Combined type of a callback and rei struct + :param resc_name: Name of a particular unixfilesystem resource + :param bytes_free: Free size on this resource, in bytes :param bytes_total: Total size of this resource, in bytes """ if user.user_type(ctx) != 'rodsadmin': @@ -595,7 +594,7 @@ def rule_resource_update_resc_arb_data(ctx, resc_name, bytes_free, bytes_total): @rule.make() -def rule_resource_update_misc_arb_data(ctx): +def rule_resource_update_misc_arb_data(ctx: rule.Context) -> None: """Update ARB data for resources that are not covered by the regular process. That is, all resources that are neither unixfilesystem nor passthrough resources, as well as passthrough resources that do not have a unixfilesystem child resource. @@ -629,7 +628,7 @@ def rule_resource_update_misc_arb_data(ctx): manager.put(ctx, resc, constants.arb_status.IGNORE) -def get_categories(ctx): +def get_categories(ctx: rule.Context) -> List: """Get all categories for current user. :param ctx: Combined type of a callback and rei struct @@ -665,7 +664,7 @@ def get_categories(ctx): return categories -def get_groups_on_category(ctx, category): +def get_groups_on_category(ctx: rule.Context, category: str) -> List: """Get all groups for category.""" groups = [] iter = genquery.row_iterator( @@ -681,7 +680,7 @@ def get_groups_on_category(ctx, category): return groups -def get_group_data_sizes(ctx, group_name, ref_period=None): +def get_group_data_sizes(ctx: rule.Context, group_name: str, ref_period: str | None = None) -> List: """Get group data sizes and return as a list of values. If no reference period is specified return closest to today. diff --git a/revision_strategies.py b/revision_strategies.py index 567cc16b0..3102829b3 100644 --- a/revision_strategies.py +++ b/revision_strategies.py @@ -4,8 +4,32 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import List -def get_revision_strategy(strategy_name): + +class RevisionStrategy: + HOURS = 3600 + DAYS = 86400 + WEEKS = 604800 + + def __init__(self, strategy_name: str, buckets_configuration: List) -> None: + self._name = strategy_name + self._buckets = buckets_configuration + + def get_name(self) -> str: + return self._name + + def get_buckets(self) -> List: + return self._buckets + + def get_minimum_bucket_size(self) -> int: + return min(map(lambda bucket_timespan_bucket_size_offset: bucket_timespan_bucket_size_offset[1], self.get_buckets())) + + def get_total_bucket_timespan(self) -> int: + return sum(map(lambda bucket_timespan_bucket_size_offset1: bucket_timespan_bucket_size_offset1[0], self.get_buckets())) + + +def get_revision_strategy(strategy_name: str) -> RevisionStrategy: """Returns a revision strategy object for a particular revision strategy name. This object can be used to obtain information about the revision strategy. @@ -61,25 +85,3 @@ def get_revision_strategy(strategy_name): return RevisionStrategy(strategy_name, buckets_configuration[strategy_name]) else: raise ValueError('Strategy "{}" is not supported'.format(strategy_name)) - - -class RevisionStrategy: - HOURS = 3600 - DAYS = 86400 - WEEKS = 604800 - - def __init__(self, strategy_name, buckets_configuration): - self._name = strategy_name - self._buckets = buckets_configuration - - def get_name(self): - return self._name - - def get_buckets(self): - return self._buckets - - def get_minimum_bucket_size(self): - return min(map(lambda bucket_timespan_bucket_size_offset: bucket_timespan_bucket_size_offset[1], self.get_buckets())) - - def get_total_bucket_timespan(self): - return sum(map(lambda bucket_timespan_bucket_size_offset1: bucket_timespan_bucket_size_offset1[0], self.get_buckets())) diff --git a/revision_utils.py b/revision_utils.py index 3aee91890..0f879e27c 100644 --- a/revision_utils.py +++ b/revision_utils.py @@ -7,12 +7,13 @@ import datetime import hashlib import os +from typing import List, Tuple -from revision_strategies import get_revision_strategy -from util import constants, log, pathutil +from revision_strategies import get_revision_strategy, RevisionStrategy +from util import constants, log, pathutil, rule -def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_store_exists): +def revision_eligible(max_size: int, data_obj_exists: bool, size: int, path: str, groups: List, revision_store_exists: bool) -> Tuple[bool, str]: """Determine whether can create a revision of given data object. :param max_size: Max size that file can be to create a revision (in bytes) @@ -55,7 +56,7 @@ def revision_eligible(max_size, data_obj_exists, size, path, groups, revision_st return True, "" -def calculate_end_of_calendar_day(): +def calculate_end_of_calendar_day() -> int: """Calculate the unix timestamp for the end of the current day (Same as start of next day). :returns: End of calendar day - Timestamp of the end of the current day @@ -67,7 +68,7 @@ def calculate_end_of_calendar_day(): return int(tomorrow.strftime("%s")) -def get_revision_store_path(zone, trailing_slash=False): +def get_revision_store_path(zone: str, trailing_slash: bool = False) -> str: """Produces the logical path of the revision store :param zone: zone name @@ -81,7 +82,12 @@ def get_revision_store_path(zone, trailing_slash=False): return os.path.join("/" + zone, constants.UUREVISIONCOLLECTION.lstrip(os.path.sep)) -def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_time_bound, original_exists, verbose): +def get_deletion_candidates(ctx: 'rule.Context', + revision_strategy: RevisionStrategy, + revisions: List, + initial_upper_time_bound: bool, + original_exists: bool, + verbose: bool) -> List: """Get revision data objects for a particular versioned data object that should be deleted, as per a given revision strategy. @@ -181,7 +187,11 @@ def get_deletion_candidates(ctx, revision_strategy, revisions, initial_upper_tim return deletion_candidates -def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, original_exists_dict, verbose): +def revision_cleanup_prefilter(ctx: 'rule.Context', + revisions_list: List, + revision_strategy_name: str, + original_exists_dict: bool, + verbose: bool) -> List: """Filters out revisioned data objects from a list if we can easily determine that they don't meet criteria for being removed, for example if the number of revisions of an existing versioned data object is at most one. @@ -231,7 +241,7 @@ def revision_cleanup_prefilter(ctx, revisions_list, revision_strategy_name, orig return results -def get_resc(row): +def get_resc(row: List) -> str: """Get the resc id for a data object given the metadata provided (for revision job). :param row: metadata for the data object @@ -246,7 +256,7 @@ def get_resc(row): return row[3] -def get_balance_id(row, path): +def get_balance_id(row: List, path: str) -> int: """Get the balance id for a data object given the metadata provided (for revision job). :param row: metadata for the data object diff --git a/revisions.py b/revisions.py index 0a874da58..1d533ec07 100644 --- a/revisions.py +++ b/revisions.py @@ -8,6 +8,7 @@ import random import re import time +from typing import Dict, Iterator, List, Tuple import genquery import irods_types @@ -30,7 +31,7 @@ @api.make() -def api_revisions_search_on_filename(ctx, searchString, offset=0, limit=10): +def api_revisions_search_on_filename(ctx: rule.Context, searchString: str, offset: int = 0, limit: int = 10) -> api.Result: """Search revisions of a file in a research folder and return list of corresponding revisions. :param ctx: Combined type of a callback and rei struct @@ -114,7 +115,7 @@ def api_revisions_search_on_filename(ctx, searchString, offset=0, limit=10): @api.make() -def api_revisions_list(ctx, path): +def api_revisions_list(ctx: rule.Context, path: str) -> api.Result: """Get list revisions of a file in a research folder. :param ctx: Combined type of a callback and rei struct @@ -161,7 +162,7 @@ def api_revisions_list(ctx, path): @api.make() -def api_revisions_restore(ctx, revision_id, overwrite, coll_target, new_filename): +def api_revisions_restore(ctx: rule.Context, revision_id: str, overwrite: str, coll_target: str, new_filename: str) -> api.Result: """Copy selected revision to target collection with given name. :param ctx: Combined type of a callback and rei struct @@ -244,7 +245,7 @@ def api_revisions_restore(ctx, revision_id, overwrite, coll_target, new_filename return api.Result.ok() -def resource_modified_post_revision(ctx, resource, zone, path): +def resource_modified_post_revision(ctx: rule.Context, resource: str, zone: str, path: str) -> None: """Create revisions on file modifications. This policy should trigger whenever a new file is added or modified @@ -309,12 +310,20 @@ def resource_modified_post_revision(ctx, resource, zone, path): # CAT_SQL_ERROR: this AVU is already present. No need to set it anymore. pass else: + error_msg = "" error_status = re.search(r"status \[(.*?)\]", str(e)) - log.write(ctx, "Schedule revision of data object {} failed with error {}".format(path, error_status.group(1))) + if error_status is not None: + error_msg = error_status.group(1) + log.write(ctx, "Schedule revision of data object {} failed with error {}".format(path, error_msg)) @rule.make() -def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size_limit, dry_run='0'): +def rule_revision_batch(ctx: rule.Context, + verbose: str, + balance_id_min: str, + balance_id_max: str, + batch_size_limit: str, + dry_run: str = '0') -> None: """Scheduled revision creation batch job. Creates revisions for all data objects (in research space) marked with 'org_revision_scheduled' metadata. @@ -432,7 +441,7 @@ def rule_revision_batch(ctx, verbose, balance_id_min, balance_id_max, batch_size log.write(ctx, "Batch revision job ignored {} data objects in research area, excluding data objects postponed because of delay time.".format(count_ignored)) -def check_eligible_and_create_revision(ctx, print_verbose, attr, errorattr, data_id, resc, path): +def check_eligible_and_create_revision(ctx: rule.Context, print_verbose: bool, attr: str, errorattr: str, data_id: str, resc: str, path: str) -> bool: """ Check that a data object is eligible for a revision, and if so, create a revision. Then remove or add revision flags as appropriate. @@ -475,15 +484,15 @@ def check_eligible_and_create_revision(ctx, print_verbose, attr, errorattr, data return revision_created -def remove_revision_error_flag(ctx, data_id, path, errorattr): +def remove_revision_error_flag(ctx: rule.Context, data_id: str, path: str, errorattr: str) -> None: """Remove revision_error flag""" # Revision creation OK. Remove any existing error indication attribute. - iter2 = genquery.row_iterator( + iter = genquery.row_iterator( "DATA_NAME", "DATA_ID = '{}' AND META_DATA_ATTR_NAME = '{}' AND META_DATA_ATTR_VALUE = 'true'".format(data_id, errorattr), genquery.AS_LIST, ctx ) - for _row in iter2: + for _row in iter: # Only try to remove it if we know for sure it exists, # otherwise we get useless errors in the log. avu.rmw_from_data(ctx, path, errorattr, "%") @@ -491,7 +500,7 @@ def remove_revision_error_flag(ctx, data_id, path, errorattr): break -def remove_revision_scheduled_flag(ctx, print_verbose, path, attr): +def remove_revision_scheduled_flag(ctx: rule.Context, print_verbose: bool, path: str, attr: str) -> None: """Remove revision_scheduled flag (no matter if it succeeded or not).""" # rods should have been given own access via policy to allow AVU # changes. @@ -517,7 +526,7 @@ def remove_revision_scheduled_flag(ctx, print_verbose, path, attr): log.write(ctx, "ERROR - Scheduled revision creation of <{}>: could not remove schedule flag".format(path)) -def is_revision_blocked_by_admin(ctx): +def is_revision_blocked_by_admin(ctx: rule.Context) -> bool: """Admin can put the revision process on a hold by adding a file called 'stop_revisions' in collection /yoda/flags. :param ctx: Combined type of a callback and rei struct @@ -529,7 +538,7 @@ def is_revision_blocked_by_admin(ctx): return collection.exists(ctx, path) -def get_revision_store(ctx, group_name): +def get_revision_store(ctx: rule.Context, group_name: str) -> str | None: """Get path to revision store for group if the path exists. :param ctx: Combined type of a callback and rei struct @@ -545,7 +554,7 @@ def get_revision_store(ctx, group_name): return revision_store if revision_store_exists else None -def revision_create(ctx, print_verbose, data_id, resource, group_name, revision_store): +def revision_create(ctx: rule.Context, print_verbose: bool, data_id: str, resource: str, group_name: str, revision_store: str) -> bool: """Create a revision of a data object in a revision folder. :param ctx: Combined type of a callback and rei struct @@ -633,10 +642,10 @@ def revision_create(ctx, print_verbose, data_id, resource, group_name, revision_ return revision_created -def revision_cleanup_scan_revision_objects(ctx, revision_list): +def revision_cleanup_scan_revision_objects(ctx: rule.Context, revision_list: List) -> List: """Obtain information about all revisions. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param revision_list: List of revision data object IDs :returns: Nested list, where the outer list represents revisioned data objects, @@ -699,7 +708,7 @@ def revision_cleanup_scan_revision_objects(ctx, revision_list): return revisions_info -def get_all_revision_data_ids(ctx): +def get_all_revision_data_ids(ctx: rule.Context) -> Iterator[Tuple[str, str]]: """"Returns all data IDs of revision data objects :param ctx: Combined type of a callback and rei struct @@ -718,7 +727,7 @@ def get_all_revision_data_ids(ctx): yield (row[0], row[1]) -def _update_revision_store_acls(ctx): +def _update_revision_store_acls(ctx: rule.Context) -> None: """Sets the revision store ACL to grant present rodsadmin user access :param ctx: Combined type of a callback and rei struct @@ -741,7 +750,7 @@ def _update_revision_store_acls(ctx): @rule.make(inputs=[0], outputs=[1]) -def rule_revisions_cleanup_collect(ctx, target_batch_size): +def rule_revisions_cleanup_collect(ctx: rule.Context, target_batch_size: str) -> str: """Collect a list of revision data object IDs and puts them in the spool system for processing by the revision cleanup scan job. @@ -762,7 +771,6 @@ def rule_revisions_cleanup_collect(ctx, target_batch_size): log.write(ctx, "Starting revision cleanup collect process.") - target_batch_size = int(target_batch_size) ingest_state = { "batch": [], "current_coll": None, @@ -770,7 +778,7 @@ def rule_revisions_cleanup_collect(ctx, target_batch_size): } number_revisions = 0 - def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): + def ingest_new_data_id(ctx: rule.Context, coll_id: str, data_id: str, ingest_state: Dict, target_batch_size: int) -> None: """Read data object. Store it in ingest state as long as its collection ID is the same as the previous one, so that all data objects in the same collection are part of the same batch. @@ -804,10 +812,10 @@ def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): for (coll_id, data_id) in get_all_revision_data_ids(ctx): number_revisions += 1 - ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size) + ingest_new_data_id(ctx, coll_id, data_id, ingest_state, int(target_batch_size)) if (len(ingest_state["batch"]) > 0 - and len(ingest_state["batch"]) + len(ingest_state["objects_for_current_coll"]) >= target_batch_size): + and len(ingest_state["batch"]) + len(ingest_state["objects_for_current_coll"]) >= int(target_batch_size)): put_spool_data(constants.PROC_REVISION_CLEANUP_SCAN, [ingest_state["batch"]]) ingest_state["batch"] = [] @@ -820,7 +828,7 @@ def ingest_new_data_id(ctx, coll_id, data_id, ingest_state, target_batch_size): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_revisions_cleanup_scan(ctx, revision_strategy_name, verbose_flag): +def rule_revisions_cleanup_scan(ctx: rule.Context, revision_strategy_name: str, verbose_flag: str) -> str: """Collect revision data and put it in the spool system for processing by the revision cleanup scan jobs @@ -865,11 +873,11 @@ def rule_revisions_cleanup_scan(ctx, revision_strategy_name, verbose_flag): return 'Revision store cleanup scan job completed' -def get_original_exists_dict(ctx, revision_data): +def get_original_exists_dict(ctx: rule.Context, revision_data: List) -> Dict: """Returns a dictionary that indicates which original data objects of revision data still exist - :param ctx: Combined type of a callback and rei struct - :param revision_data: List of lists of revision tuples in (data_id, timestamp, revision_path) format + :param ctx: Combined type of a callback and rei struct + :param revision_data: List of lists of revision tuples in (data_id, timestamp, revision_path) format :returns: dictionary, in which the keys are revision path. The values are booleans, and indicate whether the versioned data object of the revision still exists. If the revision data object does not @@ -891,7 +899,7 @@ def get_original_exists_dict(ctx, revision_data): return result -def versioned_data_object_exists(ctx, revision_path): +def versioned_data_object_exists(ctx: rule.Context, revision_path: str) -> bool: """Checks whether the version data object of a revision still exists :param ctx: Combined type of a callback and rei struct @@ -902,19 +910,7 @@ def versioned_data_object_exists(ctx, revision_path): :raises KeyError: If revision data object does not have revision AVUs that point to versioned data object. - - :raises UnicodeEncodeError: If the revision path cannot be converted to a utf-8 byte string. """ - - if isinstance(revision_path, unicode): - try: - # Switch back to bytes for now - # TODO change logic in Python 3 - revision_path = revision_path.encode('utf-8') - except UnicodeEncodeError: - log.write(ctx, "File path {} is not UTF-8 encoded or is not compatible with UTF-8 encoding".format(revision_path)) - raise - revision_avus = avu.of_data(ctx, revision_path) avu_dict = {a: v for (a, v, u) in revision_avus} @@ -931,7 +927,7 @@ def versioned_data_object_exists(ctx, revision_path): @rule.make(inputs=[0, 1, 2], outputs=[3]) -def rule_revisions_cleanup_process(ctx, revision_strategy_name, endOfCalendarDay, verbose_flag): +def rule_revisions_cleanup_process(ctx: rule.Context, revision_strategy_name: str, endOfCalendarDay: str, verbose_flag: str) -> str: """Applies the selected revision strategy to a batch of spooled revision data :param ctx: Combined type of a callback and rei struct @@ -999,7 +995,7 @@ def rule_revisions_cleanup_process(ctx, revision_strategy_name, endOfCalendarDay return 'Revision store cleanup processing job completed' -def revision_remove(ctx, revision_id, revision_path): +def revision_remove(ctx: rule.Context, revision_id: str, revision_path: str) -> bool: """Remove a revision from the revision store. Called by revision-cleanup.r cronjob. @@ -1031,24 +1027,19 @@ def revision_remove(ctx, revision_id, revision_path): return False -def memory_rss_usage(): - """ - The RSS (resident) memory size in bytes for the current process. - """ +def memory_rss_usage() -> int: + """The RSS (resident) memory size in bytes for the current process.""" p = psutil.Process() return p.memory_info().rss -def show_memory_usage(ctx): - """ - For debug purposes show the current RSS usage. - """ +def show_memory_usage(ctx: rule.Context) -> None: + """For debug purposes show the current RSS usage.""" log.write(ctx, "current RSS usage: {} bytes".format(memory_rss_usage())) -def memory_limit_exceeded(rss_limit): - """ - True when a limit other than 0 was specified and memory usage is currently +def memory_limit_exceeded(rss_limit: int) -> bool: + """True when a limit other than 0 was specified and memory usage is currently above this limit. Otherwise False. :param rss_limit: Max memory usage in bytes @@ -1056,15 +1047,14 @@ def memory_limit_exceeded(rss_limit): :returns: Boolean indicating if memory limited exceeded """ rss_limit = int(rss_limit) - return rss_limit and memory_rss_usage() > rss_limit + return rss_limit > 0 and memory_rss_usage() > rss_limit -def remove_revision_creation_avu_from_deleted_data_objects(ctx, print_verbose): - """ - Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata]. +def remove_revision_creation_avu_from_deleted_data_objects(ctx: rule.Context, print_verbose: bool) -> None: + """Removes revision creation AVUs from deleted data objects [marked with 'org_revision_scheduled' metadata]. - :param ctx: Combined type of a callback and rei struct - :param print_verbose: Whether to log verbose messages for troubleshooting (Boolean) + :param ctx: Combined type of a callback and rei struct + :param print_verbose: Whether to log verbose messages for troubleshooting """ revision_avu_name = constants.UUORGMETADATAPREFIX + "revision_scheduled" diff --git a/schema.py b/schema.py index 6ce9d4707..bfea19aa2 100644 --- a/schema.py +++ b/schema.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Dict, Tuple import genquery @@ -14,12 +15,12 @@ @api.make() -def api_schema_get_schemas(ctx): +def api_schema_get_schemas(ctx: rule.Context) -> api.Result: """Retrieve selectable schemas and default schema. :param ctx: Combined type of a callback and rei struct - :returns: Dit with schemas and default schema. + :returns: Dict with schemas and default schema. """ schemas = [] @@ -42,7 +43,7 @@ def api_schema_get_schemas(ctx): 'schema_default': schema_default} -def get_schema_collection(ctx, rods_zone, group_name): +def get_schema_collection(ctx: rule.Context, rods_zone: str, group_name: str) -> str: """Determine schema collection based upon rods zone and name of the group. If there is no schema id set on group level and @@ -87,7 +88,7 @@ def get_schema_collection(ctx, rods_zone, group_name): return config.default_yoda_schema -def get_schema_id_from_group(ctx, group_name): +def get_schema_id_from_group(ctx: rule.Context, group_name: str) -> str | None: """Returns the schema_id value that has been set on an iRODS group :param ctx: Combined type of a callback and rei struct @@ -107,7 +108,7 @@ def get_schema_id_from_group(ctx, group_name): return None -def get_active_schema_path(ctx, path): +def get_active_schema_path(ctx: rule.Context, path: str) -> str: """Get the iRODS path to a schema file from a deposit, research or vault path. The schema collection is determined from group name of the path. @@ -143,7 +144,7 @@ def get_active_schema_path(ctx, path): return '/{}/yoda/schemas/{}/metadata.json'.format(rods_zone, schema_coll) -def get_active_schema(ctx, path): +def get_active_schema(ctx: rule.Context, path: str) -> Dict: """Get a schema object from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -155,7 +156,7 @@ def get_active_schema(ctx, path): return jsonutil.read(ctx, get_active_schema_path(ctx, path)) -def get_active_schema_uischema(ctx, path): +def get_active_schema_uischema(ctx: rule.Context, path: str) -> Tuple[Dict, Dict]: """Get a schema and uischema object from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -171,7 +172,7 @@ def get_active_schema_uischema(ctx, path): jsonutil.read(ctx, uischema_path) -def get_active_schema_id(ctx, path): +def get_active_schema_id(ctx: rule.Context, path: str) -> str: """Get the active schema id from a research or vault path. :param ctx: Combined type of a callback and rei struct @@ -183,14 +184,14 @@ def get_active_schema_id(ctx, path): return get_active_schema(ctx, path)['$id'] -def get_schema_id(ctx, metadata_path, metadata=None): +def get_schema_id(ctx: rule.Context, metadata_path: str, metadata: Dict | None = None) -> str | None: """Get the current schema id from a path to a metadata json.""" if metadata is None: metadata = jsonutil.read(ctx, metadata_path) return meta.metadata_get_schema_id(metadata) -def get_schema_path_by_id(ctx, path, schema_id): +def get_schema_path_by_id(ctx: rule.Context, path: str, schema_id: str) -> str | None: """Get a schema path from a schema id.""" _, zone, _2, _3 = pathutil.info(path) @@ -203,7 +204,7 @@ def get_schema_path_by_id(ctx, path, schema_id): return None -def get_schema_by_id(ctx, path, schema_id): +def get_schema_by_id(ctx: rule.Context, path: str, schema_id: str) -> Dict | None: """ Get a schema from a schema id. @@ -216,7 +217,7 @@ def get_schema_by_id(ctx, path, schema_id): :returns: Schema object (parsed from JSON) """ - path = get_schema_path_by_id(ctx, path, schema_id) - if path is None: + schema_path = get_schema_path_by_id(ctx, path, schema_id) + if schema_path is None: return None - return jsonutil.read(ctx, path) + return jsonutil.read(ctx, schema_path) diff --git a/schema_transformation.py b/schema_transformation.py index 6cd3470ab..0bcd322f5 100644 --- a/schema_transformation.py +++ b/schema_transformation.py @@ -13,6 +13,7 @@ import os import re import time +from typing import Callable, Dict import genquery import session_vars @@ -23,7 +24,7 @@ from util import * -def execute_transformation(ctx, metadata_path, transform, keep_metadata_backup=True): +def execute_transformation(ctx: rule.Context, metadata_path: str, transform: Callable, keep_metadata_backup: bool = True) -> None: """Transform a metadata file with the given transformation function.""" coll, data = os.path.split(metadata_path) @@ -50,10 +51,10 @@ def execute_transformation(ctx, metadata_path, transform, keep_metadata_backup=T @api.make() -def api_transform_metadata(ctx, coll, keep_metadata_backup=True): +def api_transform_metadata(ctx: rule.Context, coll: str, keep_metadata_backup: bool = True) -> api.Result: """Transform a yoda-metadata file in the given collection to the active schema.""" metadata_path = meta.get_collection_metadata_path(ctx, coll) - if metadata_path.endswith('.json'): + if metadata_path and metadata_path.endswith('.json'): # JSON metadata. log.write(ctx, 'Transforming JSON metadata in the research space: <{}>'.format(metadata_path)) transform = get(ctx, metadata_path) @@ -64,10 +65,9 @@ def api_transform_metadata(ctx, coll, keep_metadata_backup=True): execute_transformation(ctx, metadata_path, transform, keep_metadata_backup) else: return api.Error('no_metadata', 'No metadata file found') - return None -def get(ctx, metadata_path, metadata=None): +def get(ctx: rule.Context, metadata_path: str, metadata: Dict | None = None) -> Callable | None: """Find a transformation that can be executed on the given metadata JSON. :param ctx: Combined type of a ctx and rei struct @@ -82,8 +82,9 @@ def get(ctx, metadata_path, metadata=None): # Ideally, we would check that the metadata is valid in its current # schema before claiming that we can transform it... - # print('{} -> {}'.format(src,dst)) + if src is None: + return None return schema_transformations.get(src, dst) except KeyError: @@ -114,7 +115,7 @@ def rule_get_transformation_info(rule_args, callback, rei): rule_args[1:3] = 'true', transformation_html(transform) -def copy_acls_from_parent(ctx, path, recursive_flag): +def copy_acls_from_parent(ctx: rule.Context, path: str, recursive_flag: str) -> None: """ When inheritance is missing we need to copy ACLs when introducing new data in vault package. @@ -310,7 +311,7 @@ def rule_batch_vault_metadata_correct_orcid_format(rule_args, callback, rei): "") -def transform_orcid(ctx, m): +def transform_orcid(ctx: rule.Context, m: Dict) -> Dict: """ Transform all present orcid's into the correct format. If possible! @@ -342,7 +343,7 @@ def transform_orcid(ctx, m): return {'metadata': m, 'data_changed': data_changed} -def correctify_orcid(org_orcid): +def correctify_orcid(org_orcid: str) -> str | None: """Function to correct illformatted ORCIDs. Returns None if value cannot be fixed.""" # Get rid of all spaces. orcid = org_orcid.replace(' ', '') @@ -359,7 +360,7 @@ def correctify_orcid(org_orcid): return "https://orcid.org/{}".format(orcs[-1]) -def html(f): +def html(f: Callable) -> str: """Get a human-readable HTML description of a transformation function. The text is derived from the function's docstring. @@ -368,18 +369,19 @@ def html(f): :returns: Human-readable HTML description of a transformation function """ + docstring = "" if f.__doc__ is None else f.__doc__ description = '\n'.join(map(lambda paragraph: '

{}

'.format( # Trim whitespace. re.sub(r'\s+', ' ', paragraph).strip()), # Docstring paragraphs are separated by blank lines. - re.split('\n{2,}', f.__doc__))) + re.split('\n{2,}', docstring))) # Remove docstring. return re.sub('((:param).*)|((:returns:).*)', ' ', description) @rule.make(inputs=[], outputs=[0]) -def rule_batch_vault_metadata_schema_report(ctx): +def rule_batch_vault_metadata_schema_report(ctx: rule.Context) -> str: """Show vault metadata schema about each data package in vault :param ctx: Combined type of a callback and rei struct diff --git a/schema_transformations.py b/schema_transformations.py index d61f324eb..82437679d 100644 --- a/schema_transformations.py +++ b/schema_transformations.py @@ -4,6 +4,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from typing import Callable, Dict from schema_transformations_utils import correctify_isni, correctify_orcid, correctify_researcher_id, correctify_scopus @@ -25,7 +26,7 @@ # The docstring of a transformation function should describe the transformation # in a human-readable manner: it is provided to the user executing the transformation. -def _default0_default1(ctx, m): +def _default0_default1(ctx: rule.Context, m: Dict) -> Dict: """ A Data type field is added to be used for publication purposes to DataCite. @@ -45,7 +46,7 @@ def _default0_default1(ctx, m): :returns: Transformed (default-1) JSON object """ - def fixup_name(n): + def fixup_name(n: str) -> Dict: """Split a name into a first and last name, error-prone, but acceptable.""" n.strip() # Trim whitespace, if any. @@ -76,7 +77,7 @@ def fixup_name(n): return m -def _default1_default2(ctx, m): +def _default1_default2(ctx: rule.Context, m: Dict) -> Dict: """ Metadata fields Discipline, Language and Tags have become required fields. @@ -108,7 +109,7 @@ def _default1_default2(ctx, m): return m -def _default2_default3(ctx, m): +def _default2_default3(ctx: rule.Context, m: Dict) -> Dict: """ Add affiliation identifiers to creators and contributors. @@ -265,7 +266,7 @@ def _default2_default3(ctx, m): return m -def _core1_core2(ctx, m): +def _core1_core2(ctx: rule.Context, m: Dict) -> Dict: """ Add affiliation identifiers to creators. @@ -297,7 +298,7 @@ def _core1_core2(ctx, m): return m -def _dag0_default2(ctx, m): +def _dag0_default2(ctx: rule.Context, m: Dict) -> Dict: """ Transform dag-0 data to the default-2 schema definition @@ -361,7 +362,7 @@ def _dag0_default2(ctx, m): return m -def _default1_teclab0(ctx, m): +def _default1_teclab0(ctx: rule.Context, m: Dict) -> Dict: """ Transform Default-1 data to the teclab-0 schema definition @@ -482,7 +483,7 @@ def _default1_teclab0(ctx, m): return m -def _default1_hptlab0(ctx, m): +def _default1_hptlab0(ctx: rule.Context, m: Dict) -> Dict: """ Transform Default-1 data to the hptlab-0 schema definition @@ -601,7 +602,7 @@ def _default1_hptlab0(ctx, m): return m -def _hptlab0_hptlab1(ctx, m): +def _hptlab0_hptlab1(ctx: rule.Context, m: Dict) -> Dict: """ Transform hptlab-0 data to the hptlab-1 schema definition which holds better qualified lists. @@ -657,7 +658,7 @@ def _hptlab0_hptlab1(ctx, m): return m -def _teclab0_teclab1(ctx, m): +def _teclab0_teclab1(ctx: rule.Context, m: Dict) -> Dict: """ Transform teclab-0 data to the teclab-1 schema definition which holds better qualified lists. @@ -716,7 +717,7 @@ def _teclab0_teclab1(ctx, m): # }}} -def get(src_id, dst_id): +def get(src_id: str, dst_id: str) -> Callable | None: """ Get a transformation function that maps metadata from the given src schema id to the dst schema id. diff --git a/schema_transformations_utils.py b/schema_transformations_utils.py index 2e5e499b5..d01df318b 100644 --- a/schema_transformations_utils.py +++ b/schema_transformations_utils.py @@ -6,7 +6,7 @@ import re -def correctify_orcid(org_orcid): +def correctify_orcid(org_orcid: str) -> str | None: """Correct illformatted ORCID.""" # Get rid of all spaces. orcid = org_orcid.replace(' ', '') @@ -23,7 +23,7 @@ def correctify_orcid(org_orcid): return "https://orcid.org/{}".format(orcs[-1]) -def correctify_scopus(org_scopus): +def correctify_scopus(org_scopus: str) -> str | None: """Correct illformatted Scopus.""" # Get rid of all spaces. new_scopus = org_scopus.replace(' ', '') @@ -34,7 +34,7 @@ def correctify_scopus(org_scopus): return new_scopus -def correctify_isni(org_isni): +def correctify_isni(org_isni: str) -> str | None: """Correct ill-formatted ISNI.""" # Remove all spaces. new_isni = org_isni.replace(' ', '') @@ -44,14 +44,14 @@ def correctify_isni(org_isni): # The last part should hold a valid id like eg: 123412341234123X. # If not, it is impossible to correct it to the valid isni format - new_isni = new_isni.split('/') - if not re.search("^[0-9]{15}[0-9X]$", new_isni[-1]): + new_isni_split = new_isni.split('/') + if not re.search("^[0-9]{15}[0-9X]$", new_isni_split[-1]): return None - return "https://isni.org/isni/{}".format(new_isni[-1]) + return "https://isni.org/isni/{}".format(new_isni_split[-1]) -def correctify_researcher_id(org_researcher_id): +def correctify_researcher_id(org_researcher_id: str) -> str: """Correct illformatted ResearcherID.""" # Get rid of all spaces. researcher_id = org_researcher_id.replace(' ', '') diff --git a/settings.py b/settings.py index 24bc673ab..ba133eef3 100644 --- a/settings.py +++ b/settings.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2021-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Dict, Sequence + from genquery import Query from util import * @@ -21,7 +23,7 @@ SETTINGS_KEY = constants.UUORGMETADATAPREFIX + "settings_" -def load(ctx, setting, username=None): +def load(ctx: rule.Context, setting: str, username: str | None = None) -> Sequence[str]: """Load user setting. :param ctx: Combined type of a callback and rei struct @@ -44,7 +46,7 @@ def load(ctx, setting, username=None): @api.make() -def api_settings_load(ctx): +def api_settings_load(ctx: rule.Context) -> api.Result: """Load user settings. :param ctx: Combined type of a callback and rei struct @@ -64,7 +66,7 @@ def api_settings_load(ctx): @api.make() -def api_settings_save(ctx, settings): +def api_settings_save(ctx: rule.Context, settings: Dict) -> api.Result: """Save user settings. :param ctx: Combined type of a callback and rei struct diff --git a/setup.cfg b/setup.cfg index a9ef75804..51c0fb4fe 100644 --- a/setup.cfg +++ b/setup.cfg @@ -6,3 +6,17 @@ docstring_style=sphinx max-line-length=127 exclude=__init__.py,tools,tests/env/ application-import-names=avu,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,groups_import,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,revision_strategies,revision_utils,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource,yoda_names,policies_utils + +[mypy] +exclude = tools|unit-tests +disable_error_code = arg-type, attr-defined, index, method-assign, misc, no-redef, operator, union-attr, unreachable, var-annotated +ignore_missing_imports = True +warn_unreachable = True +no_implicit_optional = True +check_untyped_defs = False +disallow_any_generics = False +disallow_incomplete_defs = True +disallow_untyped_calls = False +disallow_untyped_defs = True +show_error_codes = True +show_error_context = True diff --git a/sram.py b/sram.py index ad06db8dc..33caa17a2 100644 --- a/sram.py +++ b/sram.py @@ -5,6 +5,7 @@ import datetime import time +from typing import Dict, List import requests import session_vars @@ -13,7 +14,7 @@ from util import * -def sram_post_collaboration(ctx, group_name, description): +def sram_post_collaboration(ctx: rule.Context, group_name: str, description: str) -> Dict: """Create SRAM Collaborative Organisation Identifier. :param ctx: Combined type of a callback and rei struct @@ -57,7 +58,7 @@ def sram_post_collaboration(ctx, group_name, description): return data -def sram_get_uid(ctx, co_identifier, user_name): +def sram_get_uid(ctx: rule.Context, co_identifier: str, user_name: str) -> str: """Get SRAM Collaboration member uid. :param ctx: Combined type of a callback and rei struct @@ -91,7 +92,7 @@ def sram_get_uid(ctx, co_identifier, user_name): return uid -def sram_delete_collaboration(ctx, co_identifier): +def sram_delete_collaboration(ctx: rule.Context, co_identifier: str) -> bool: """Delete SRAM Collaborative Organisation. :param ctx: Combined type of a callback and rei struct @@ -113,7 +114,7 @@ def sram_delete_collaboration(ctx, co_identifier): return response.status_code == 204 -def sram_delete_collaboration_membership(ctx, co_identifier, uuid): +def sram_delete_collaboration_membership(ctx: rule.Context, co_identifier: str, uuid: str) -> bool: """Delete SRAM Collaborative Organisation membership. :param ctx: Combined type of a callback and rei struct @@ -136,7 +137,7 @@ def sram_delete_collaboration_membership(ctx, co_identifier, uuid): return response.status_code == 204 -def sram_put_collaboration_invitation(ctx, group_name, username, co_identifier): +def sram_put_collaboration_invitation(ctx: rule.Context, group_name: str, username: str, co_identifier: str) -> bool: """Create SRAM Collaborative Organisation Identifier. :param ctx: Combined type of a ctx and rei struct @@ -180,7 +181,7 @@ def sram_put_collaboration_invitation(ctx, group_name, username, co_identifier): return response.status_code == 201 -def sram_connect_service_collaboration(ctx, short_name): +def sram_connect_service_collaboration(ctx: rule.Context, short_name: str) -> bool: """Connect a service to an existing SRAM collaboration. :param ctx: Combined type of a ctx and rei struct @@ -208,7 +209,7 @@ def sram_connect_service_collaboration(ctx, short_name): return response.status_code == 201 -def invitation_mail_group_add_user(ctx, group_name, username, co_identifier): +def invitation_mail_group_add_user(ctx: rule.Context, group_name: str, username: str, co_identifier: str) -> str: """Send invitation email to newly added user to the group. :param ctx: Combined type of a ctx and rei struct @@ -235,7 +236,7 @@ def invitation_mail_group_add_user(ctx, group_name, username, co_identifier): """.format(username.split('@')[0], session_vars.get_map(ctx.rei)["client_user"]["user_name"], config.sram_rest_api_url, co_identifier)) -def sram_update_collaboration_membership(ctx, co_identifier, uuid, new_role): +def sram_update_collaboration_membership(ctx: rule.Context, co_identifier: str, uuid: str, new_role: str) -> bool: """Update SRAM Collaborative Organisation membership. :param ctx: Combined type of a callback and rei struct @@ -269,13 +270,13 @@ def sram_update_collaboration_membership(ctx, co_identifier, uuid, new_role): return response.status_code == 201 -def sram_get_co_members(ctx, co_identifier): +def sram_get_co_members(ctx: rule.Context, co_identifier: str) -> List[str]: """Get SRAM Collaboration members. :param ctx: Combined type of a callback and rei struct :param co_identifier: SRAM CO identifier - :returns: Email of the user + :returns: List of emails of the SRAM Collaboration members """ url = "{}/api/collaborations/v1/{}".format(config.sram_rest_api_url, co_identifier) headers = {'Content-Type': 'application/json', 'charset': 'UTF-8', 'Authorization': 'bearer ' + config.sram_api_key} diff --git a/vault.py b/vault.py index 5a02a994d..d4bb203e0 100644 --- a/vault.py +++ b/vault.py @@ -8,9 +8,9 @@ import subprocess import time from datetime import datetime +from typing import Dict, List, Tuple import genquery -import irods_types from dateutil import parser import folder @@ -48,7 +48,7 @@ @api.make() -def api_vault_submit(ctx, coll, previous_version=None): +def api_vault_submit(ctx: rule.Context, coll: str, previous_version: str | None = None) -> api.Result: """Submit data package for publication. :param ctx: Combined type of a callback and rei struct @@ -72,7 +72,7 @@ def api_vault_submit(ctx, coll, previous_version=None): @api.make() -def api_vault_approve(ctx, coll): +def api_vault_approve(ctx: rule.Context, coll: str) -> api.Result: """Approve data package for publication. :param ctx: Combined type of a callback and rei struct @@ -102,7 +102,7 @@ def api_vault_approve(ctx, coll): @api.make() -def api_vault_cancel(ctx, coll): +def api_vault_cancel(ctx: rule.Context, coll: str) -> api.Result: """Cancel submit of data package. :param ctx: Combined type of a callback and rei struct @@ -125,7 +125,7 @@ def api_vault_cancel(ctx, coll): @api.make() -def api_vault_depublish(ctx, coll): +def api_vault_depublish(ctx: rule.Context, coll: str) -> api.Result: """Depublish data package. :param ctx: Combined type of a callback and rei struct @@ -148,7 +148,7 @@ def api_vault_depublish(ctx, coll): @api.make() -def api_vault_republish(ctx, coll): +def api_vault_republish(ctx: rule.Context, coll: str) -> api.Result: """Republish data package. :param ctx: Combined type of a callback and rei struct @@ -171,7 +171,7 @@ def api_vault_republish(ctx, coll): @api.make() -def api_vault_copy_to_research(ctx, coll_origin, coll_target): +def api_vault_copy_to_research(ctx: rule.Context, coll_origin: str, coll_target: str) -> api.Result: """Copy data package from vault to research space. :param ctx: Combined type of a callback and rei struct @@ -242,7 +242,7 @@ def api_vault_copy_to_research(ctx, coll_origin, coll_target): @api.make() -def api_vault_preservable_formats_lists(ctx): +def api_vault_preservable_formats_lists(ctx: rule.Context) -> api.Result: """Retrieve lists of preservable file formats on the system. :param ctx: Combined type of a callback and rei struct @@ -262,7 +262,7 @@ def api_vault_preservable_formats_lists(ctx): @api.make() -def api_vault_unpreservable_files(ctx, coll, list_name): +def api_vault_unpreservable_files(ctx: rule.Context, coll: str, list_name: str) -> api.Result: """Retrieve list of unpreservable file formats in a collection. :param ctx: Combined type of a callback and rei struct @@ -284,36 +284,35 @@ def api_vault_unpreservable_files(ctx, coll, list_name): collection.data_objects(ctx, coll, recursive=True)) # Exclude Yoda metadata files - data_names = filter(lambda x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), data_names) + data_names_filtered = filter(lambda x: not re.match(r"yoda\-metadata(\[\d+\])?\.(xml|json)", x), data_names) # Data names -> lowercase extensions, without the dot. - exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names))) + exts = set(list(map(lambda x: os.path.splitext(x)[1][1:].lower(), data_names_filtered))) exts -= {''} # Return any ext that is not in the preservable list. return list(exts - preservable_formats) -def rule_vault_copy_original_metadata_to_vault(rule_args, callback, rei): +@rule.make(inputs=[0], outputs=[]) +def rule_vault_copy_original_metadata_to_vault(ctx: rule.Context, vault_package: str) -> None: """Copy the original metadata JSON into the root of the package. - :param rule_args: [0] Path of a new package in the vault - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a callback and rei struct + :param vault_package: Path of a package in the vault """ - vault_package = rule_args[0] - vault_copy_original_metadata_to_vault(callback, vault_package) + vault_copy_original_metadata_to_vault(ctx, vault_package) -def get_vault_copy_numthreads(ctx): +def get_vault_copy_numthreads(ctx: rule.Context) -> int: # numThreads should be 0 if want multithreading with no specified amount of threads return 0 if config.vault_copy_multithread_enabled else 1 -def vault_copy_original_metadata_to_vault(ctx, vault_package_path): +def vault_copy_original_metadata_to_vault(ctx: rule.Context, vault_package_path: str) -> None: """Copy original metadata to the vault package root. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param vault_package_path: Path of a package in the vault """ original_metadata = vault_package_path + "/original/" + constants.IIJSONMETADATA @@ -325,22 +324,20 @@ def vault_copy_original_metadata_to_vault(ctx, vault_package_path): # msi.data_obj_copy(ctx, original_metadata, copied_metadata, 'verifyChksum=', irods_types.BytesBuf()) -def rule_vault_write_license(rule_args, callback, rei): +@rule.make(inputs=[0], outputs=[]) +def rule_vault_write_license(ctx: rule.Context, vault_pkg_coll: str) -> None: """Write the license as a text file into the root of the vault package. - :param rule_args: [0] Path of a package in the vault - :param callback: Callback to rule Language - :param rei: The rei struct + :param ctx: Combined type of a callback and rei struct + :param vault_pkg_coll: Path of a package in the vault """ + vault_write_license(ctx, vault_pkg_coll) - vault_pkg_coll = rule_args[0] - vault_write_license(callback, vault_pkg_coll) - -def vault_write_license(ctx, vault_pkg_coll): +def vault_write_license(ctx: rule.Context, vault_pkg_coll: str) -> None: """Write the license as a text file into the root of the vault package. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param vault_pkg_coll: Path of a package in the vault """ zone = user.zone(ctx) @@ -398,30 +395,31 @@ def vault_write_license(ctx, vault_pkg_coll): @rule.make(inputs=[0], outputs=[1]) -def rule_vault_enable_indexing(ctx, coll): +def rule_vault_enable_indexing(ctx: rule.Context, coll: str) -> str: vault_enable_indexing(ctx, coll) return "Success" -def vault_enable_indexing(ctx, coll): +def vault_enable_indexing(ctx: rule.Context, coll: str) -> None: if config.enable_open_search: if not collection.exists(ctx, coll + "/index"): # index collection does not exist yet path = meta.get_latest_vault_metadata_path(ctx, coll) - ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) - meta.ingest_metadata_vault(ctx, path) + if path: + ctx.msi_rmw_avu('-d', path, '%', '%', constants.UUFLATINDEX) + meta.ingest_metadata_vault(ctx, path) # add indexing attribute and update opensearch subprocess.call(["imeta", "add", "-C", coll + "/index", "irods::indexing::index", "yoda::metadata", "elasticsearch"]) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_disable_indexing(ctx, coll): +def rule_vault_disable_indexing(ctx: rule.Context, coll: str) -> str: vault_disable_indexing(ctx, coll) return "Success" -def vault_disable_indexing(ctx, coll): +def vault_disable_indexing(ctx: rule.Context, coll: str) -> None: if config.enable_open_search: if collection.exists(ctx, coll + "/index"): coll = coll + "/index" @@ -434,7 +432,7 @@ def vault_disable_indexing(ctx, coll): @api.make() -def api_vault_system_metadata(ctx, coll): +def api_vault_system_metadata(ctx: rule.Context, coll: str) -> api.Result: """Return system metadata of a vault collection. :param ctx: Combined type of a callback and rei struct @@ -466,8 +464,8 @@ def api_vault_system_metadata(ctx, coll): # Python 3: https://docs.python.org/3/library/datetime.html#datetime.date.fromisoformat # modified_date = date.fromisoformat(row[0]) modified_date = parser.parse(row[0]) - modified_date = modified_date.strftime('%Y-%m-%d %H:%M:%S%z') - system_metadata["Modified date"] = "{}".format(modified_date) + modified_date_time = modified_date.strftime('%Y-%m-%d %H:%M:%S%z') + system_metadata["Modified date"] = "{}".format(modified_date_time) # Landingpage URL. landinpage_url = "" @@ -524,15 +522,15 @@ def api_vault_system_metadata(ctx, coll): return system_metadata -def get_coll_vault_status(ctx, path, org_metadata=None): +def get_coll_vault_status(ctx: rule.Context, path: str, org_metadata: List | None = None) -> constants.vault_package_state: """Get the status of a vault folder.""" if org_metadata is None: org_metadata = folder.get_org_metadata(ctx, path) # Don't care about duplicate attr names here. - org_metadata = dict(org_metadata) - if constants.IIVAULTSTATUSATTRNAME in org_metadata: - x = org_metadata[constants.IIVAULTSTATUSATTRNAME] + org_metadata_dict = dict(org_metadata) + if constants.IIVAULTSTATUSATTRNAME in org_metadata_dict: + x = org_metadata_dict[constants.IIVAULTSTATUSATTRNAME] try: return constants.vault_package_state(x) except Exception: @@ -541,7 +539,7 @@ def get_coll_vault_status(ctx, path, org_metadata=None): return constants.vault_package_state.EMPTY -def get_all_published_versions(ctx, path): +def get_all_published_versions(ctx: rule.Context, path: str) -> Tuple[str | None, str | None, List]: """Get all published versions of a data package.""" base_doi = get_doi(ctx, path, 'base') package_doi = get_doi(ctx, path) @@ -582,7 +580,7 @@ def get_all_published_versions(ctx, path): @api.make() -def api_vault_collection_details(ctx, path): +def api_vault_collection_details(ctx: rule.Context, path: str) -> api.Result: """Return details of a vault collection. :param ctx: Combined type of a callback and rei struct @@ -694,7 +692,7 @@ def api_vault_collection_details(ctx, path): @api.make() -def api_vault_get_package_by_reference(ctx, reference): +def api_vault_get_package_by_reference(ctx: rule.Context, reference: str) -> api.Result: """Return path to data package with provided reference (UUID4). :param ctx: Combined type of a callback and rei struct @@ -719,7 +717,7 @@ def api_vault_get_package_by_reference(ctx, reference): @api.make() -def api_vault_get_landingpage_data(ctx, coll): +def api_vault_get_landingpage_data(ctx: rule.Context, coll: str) -> api.Result: """Retrieve landingpage data of data package. Landinpage data consists of metadata and system metadata. @@ -767,7 +765,7 @@ def api_vault_get_landingpage_data(ctx, coll): @api.make() -def api_vault_get_publication_terms(ctx): +def api_vault_get_publication_terms(ctx: rule.Context) -> api.Result: """Retrieve the publication terms.""" zone = user.zone(ctx) terms_collection = "/{}{}".format(zone, constants.IITERMSCOLLECTION) @@ -791,7 +789,7 @@ def api_vault_get_publication_terms(ctx): return api.Error('TermsReadFailed', 'Could not open Terms and Agreements.') -def change_read_access_group(ctx, coll, actor, group, grant=True): +def change_read_access_group(ctx: rule.Context, coll: str, actor: str, group: str, grant: bool = True) -> Tuple[bool, api.Result]: """Grant/revoke research group read access to vault package. :param ctx: Combined type of a callback and rei struct @@ -818,7 +816,7 @@ def change_read_access_group(ctx, coll, actor, group, grant=True): return True, '' -def check_change_read_access_research_group(ctx, coll, grant=True): +def check_change_read_access_research_group(ctx: rule.Context, coll: str, grant: bool = True) -> Tuple[bool, api.Result]: """Initial checks when changing read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -843,7 +841,7 @@ def check_change_read_access_research_group(ctx, coll, grant=True): return True, '' -def change_read_access_research_group(ctx, coll, grant=True): +def change_read_access_research_group(ctx: rule.Context, coll: str, grant: bool = True) -> api.Result: """Grant/revoke read rights of members of research group to a datapackage in vault. This operation also includes read only members. @@ -884,7 +882,7 @@ def change_read_access_research_group(ctx, coll, grant=True): @api.make() -def api_grant_read_access_research_group(ctx, coll): +def api_grant_read_access_research_group(ctx: rule.Context, coll: str) -> api.Result: """Grant read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -896,7 +894,7 @@ def api_grant_read_access_research_group(ctx, coll): @api.make() -def api_revoke_read_access_research_group(ctx, coll): +def api_revoke_read_access_research_group(ctx: rule.Context, coll: str) -> api.Result: """Revoke read rights of research group for datapackage in vault. :param ctx: Combined type of a callback and rei struct @@ -908,17 +906,17 @@ def api_revoke_read_access_research_group(ctx, coll): @rule.make() -def rule_vault_retry_copy_to_vault(ctx): +def rule_vault_retry_copy_to_vault(ctx: rule.Context) -> None: copy_to_vault(ctx, constants.CRONJOB_STATE["PENDING"]) copy_to_vault(ctx, constants.CRONJOB_STATE["RETRY"]) -def copy_to_vault(ctx, state): +def copy_to_vault(ctx: rule.Context, state: str) -> None: """ Collect all folders with a given cronjob state and try to copy them to the vault. - :param ctx: Combined type of a callback and rei struct - :param state: one of constants.CRONJOB_STATE + :param ctx: Combined type of a callback and rei struct + :param state: One of constants.CRONJOB_STATE """ iter = get_copy_to_vault_colls(ctx, state) for row in iter: @@ -933,7 +931,7 @@ def copy_to_vault(ctx, state): folder.folder_secure_set_retry(ctx, coll) -def get_copy_to_vault_colls(ctx, cronjob_state): +def get_copy_to_vault_colls(ctx: rule.Context, cronjob_state: str) -> List: iter = list(genquery.Query(ctx, ['COLL_NAME'], "META_COLL_ATTR_NAME = '{}' AND META_COLL_ATTR_VALUE = '{}'".format( @@ -943,7 +941,7 @@ def get_copy_to_vault_colls(ctx, cronjob_state): return iter -def copy_folder_to_vault(ctx, coll, target): +def copy_folder_to_vault(ctx: rule.Context, coll: str, target: str) -> bool: """Copy folder and all its contents to target in vault using irsync. The data will reside under folder '/original' within the vault. @@ -958,7 +956,7 @@ def copy_folder_to_vault(ctx, coll, target): try: returncode = subprocess.call(["irsync", "-rK", "i:{}/".format(coll), "i:{}/original".format(target)]) except Exception as e: - log.write(ctx, "irsync failure: " + e) + log.write(ctx, "irsync failure: " + str(e)) log.write(ctx, "irsync failure for coll <{}> and target <{}>".format(coll, target)) return False @@ -969,103 +967,7 @@ def copy_folder_to_vault(ctx, coll, target): return True -def treewalk_and_ingest(ctx, folder, target, origin, error): - """Treewalk folder and ingest. - - :param ctx: Combined type of a callback and rei struct - :param folder: Will change every time as it represents every folder that has to be copied to vault - :param target: Target of ingest - :param origin: Origin of treewalk - :param error: 0/1 indicating if treewalk or ingest failed - - :returns: Error status (which should remain 0 for further processing in iterative manner) - """ - parent_coll, coll = pathutil.chop(folder) - - # 1. Process this collection itself as a collection. - # INGEST - if error == 0: - # INGEST COLLECTION - error = ingest_object(ctx, parent_coll, coll, True, target, origin) - - # 2. Process dataobjects located directly within the collection - if error == 0: - iter = genquery.row_iterator( - "DATA_NAME", - "COLL_NAME = '" + folder + "'", - genquery.AS_LIST, ctx - ) - for row in iter: - # INGEST OBJECT - error = ingest_object(ctx, folder, row[0], False, target, origin) - if error: - break - - if error == 0: - # 3. Process the subfolders - # Loop through subfolders which have folder as parent folder - iter = genquery.row_iterator( - "COLL_NAME", - "COLL_PARENT_NAME = '" + folder + "'", - genquery.AS_LIST, ctx - ) - for row in iter: - error = treewalk_and_ingest(ctx, row[0], target, origin, error) - if error: - break - - return error - - -def ingest_object(ctx, parent, item, item_is_collection, destination, origin): - source_path = parent + "/" + item - read_access = msi.check_access(ctx, source_path, 'read_object', irods_types.BytesBuf())['arguments'][2] - - # TODO use set_acl_check? - if read_access != b'\x01': - try: - msi.set_acl(ctx, "default", "admin:read", user.full_name(ctx), source_path) - except msi.Error: - return 1 - - dest_path = destination - - if source_path != origin: - markIncomplete = False - # rewrite path to copy objects that are located underneath the toplevel collection - source_length = len(source_path) - relative_path = source_path[len(origin) + 1: source_length] - dest_path = destination + '/' + relative_path - else: - markIncomplete = True - - if item_is_collection: - # CREATE COLLECTION - try: - msi.coll_create(ctx, dest_path, '', irods_types.BytesBuf()) - except msi.Error: - return 1 - - if markIncomplete: - avu.set_on_coll(ctx, dest_path, constants.IIVAULTSTATUSATTRNAME, constants.vault_package_state.INCOMPLETE) - else: - # CREATE COPY OF DATA OBJECT - try: - # msi.data_obj_copy(ctx, source_path, dest_path, '', irods_types.BytesBuf()) - ctx.msiDataObjCopy(source_path, dest_path, 'numThreads={}++++verifyChksum='.format(get_vault_copy_numthreads(ctx)), 0) - except msi.Error: - return 1 - - if read_access != b'\x01': - try: - msi.set_acl(ctx, "default", "admin:null", user.full_name(ctx), source_path) - except msi.Error: - return 1 - - return 0 - - -def set_vault_permissions(ctx, coll, target): +def set_vault_permissions(ctx: rule.Context, coll: str, target: str) -> bool: """Set permissions in the vault as such that data can be copied to the vault.""" group_name = folder.collection_group_name(ctx, coll) if group_name == '': @@ -1161,7 +1063,7 @@ def set_vault_permissions(ctx, coll, target): return True -def reader_needs_access(ctx, group_name, coll): +def reader_needs_access(ctx: rule.Context, group_name: str, coll: str) -> bool: """Return if research group has access to this group but readers do not""" iter = genquery.row_iterator( "COLL_ACCESS_USER_ID", @@ -1183,7 +1085,7 @@ def reader_needs_access(ctx, group_name, coll): return not reader_found and research_found -def set_reader_vault_permissions(ctx, group_name, zone, dry_run): +def set_reader_vault_permissions(ctx: rule.Context, group_name: str, zone: str, dry_run: bool) -> bool: """Given a research group name, give reader group access to vault packages if they don't have that access already. @@ -1240,7 +1142,7 @@ def set_reader_vault_permissions(ctx, group_name, zone, dry_run): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): +def rule_vault_grant_readers_vault_access(ctx: rule.Context, dry_run: str, verbose: str) -> str: """Rule for granting reader members of research groups access to vault packages in their group if they don't have access already @@ -1250,8 +1152,8 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): :return: String status of completed successfully ('0') or there were errors ('1') """ - dry_run = (dry_run == '1') - verbose = (verbose == '1') + dry_run_mode = (dry_run == '1') + verbose_mode = (verbose == '1') no_errors = True log.write(ctx, "grant_readers_vault_access started.") @@ -1260,11 +1162,11 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): log.write(ctx, "User is not rodsadmin") return '1' - if dry_run or verbose: + if dry_run_mode or verbose_mode: modes = [] - if dry_run: + if dry_run_mode: modes.append("dry run") - if verbose: + if verbose_mode: modes.append("verbose") log.write(ctx, "Running grant_readers_vault_access in {} mode.".format((" and ").join(modes))) @@ -1281,7 +1183,7 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): name = row[0] if verbose: log.write(ctx, "{}: checking permissions".format(name)) - if not set_reader_vault_permissions(ctx, name, zone, dry_run): + if not set_reader_vault_permissions(ctx, name, zone, dry_run_mode): no_errors = False message = "" @@ -1295,13 +1197,13 @@ def rule_vault_grant_readers_vault_access(ctx, dry_run, verbose): @rule.make(inputs=[0, 1, 2, 3], outputs=[4, 5]) -def rule_vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous_version): +def rule_vault_process_status_transitions(ctx: rule.Context, coll: str, new_coll_status: str, actor: str, previous_version: str) -> str: """Rule interface for processing vault status transition request. - :param ctx: Combined type of a callback and rei struct - :param coll: Vault collection to change status for - :param new_coll_status: New vault package status - :param actor: Actor of the status change + :param ctx: Combined type of a callback and rei struct + :param coll: Vault collection to change status for + :param new_coll_status: New vault package status + :param actor: Actor of the status change :param previous_version: Path to previous version of data package in the vault :return: Dict with status and statusinfo. @@ -1311,7 +1213,7 @@ def rule_vault_process_status_transitions(ctx, coll, new_coll_status, actor, pre return 'Success' -def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous_version): +def vault_process_status_transitions(ctx: rule.Context, coll: str, new_coll_status: str, actor: str, previous_version: str) -> List: """Processing vault status transition request. :param ctx: Combined type of a callback and rei struct @@ -1320,7 +1222,7 @@ def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous :param actor: Actor of the status change :param previous_version: Path to previous version of data package in the vault - :return: Dict with status and statusinfo + :return: List with status and statusinfo """ # check permissions - rodsadmin only if user.user_type(ctx) != 'rodsadmin': @@ -1374,7 +1276,7 @@ def vault_process_status_transitions(ctx, coll, new_coll_status, actor, previous return ['Success', ''] -def vault_request_status_transitions(ctx, coll, new_vault_status, previous_version=None): +def vault_request_status_transitions(ctx: rule.Context, coll: str, new_vault_status: str, previous_version: str | None = None) -> List: """Request vault status transition action. :param ctx: Combined type of a callback and rei struct @@ -1382,7 +1284,7 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi :param new_vault_status: New vault status :param previous_version: Path to previous version of data package in the vault - :return: Dict with status and statusinfo + :return: List with status and statusinfo """ # check permissions - rodsadmin only if user.user_type(ctx) != 'rodsadmin': @@ -1444,9 +1346,10 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi # Data package is new version of existing data package with a DOI. previous_version_path = "" - doi = get_doi(ctx, previous_version) - if previous_version and doi: - previous_version_path = previous_version + if previous_version: + doi = get_doi(ctx, previous_version) + if doi: + previous_version_path = previous_version # Add vault action request to actor group. avu.set_on_coll(ctx, actor_group_path, constants.UUORGMETADATAPREFIX + 'vault_action_' + coll_id, jsonutil.dump([coll, str(new_vault_status), actor, previous_version_path])) @@ -1458,13 +1361,13 @@ def vault_request_status_transitions(ctx, coll, new_vault_status, previous_versi return ['', ''] -def set_submitter(ctx, path, actor): +def set_submitter(ctx: rule.Context, path: str, actor: str) -> None: """Set submitter of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_submission_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_submitter(ctx, path): +def get_submitter(ctx: rule.Context, path: str) -> str: """Set submitter of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_submission_actor" org_metadata = dict(folder.get_org_metadata(ctx, path)) @@ -1472,16 +1375,16 @@ def get_submitter(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def set_approver(ctx, path, actor): +def set_approver(ctx: rule.Context, path: str, actor: str) -> None: """Set approver of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_approval_actor" avu.set_on_coll(ctx, path, attribute, actor) -def get_approver(ctx, path): +def get_approver(ctx: rule.Context, path: str) -> str: """Set approver of data package for publication.""" attribute = constants.UUORGMETADATAPREFIX + "publication_approval_actor" org_metadata = dict(folder.get_org_metadata(ctx, path)) @@ -1489,10 +1392,10 @@ def get_approver(ctx, path): if attribute in org_metadata: return org_metadata[attribute] else: - return None + return "" -def get_doi(ctx, path, doi='version'): +def get_doi(ctx: rule.Context, path: str, doi: str = 'version') -> str | None: """Get the DOI of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1516,7 +1419,7 @@ def get_doi(ctx, path, doi='version'): return None -def get_previous_version(ctx, path): +def get_previous_version(ctx: rule.Context, path: str) -> str | None: """Get the previous version of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1536,7 +1439,7 @@ def get_previous_version(ctx, path): return None -def get_title(ctx, path): +def get_title(ctx: rule.Context, path: str) -> str: """Get the title of a data package in the vault. :param ctx: Combined type of a callback and rei struct @@ -1556,7 +1459,7 @@ def get_title(ctx, path): return "(no title)" -def meta_add_new_version(ctx, new_version, previous_version): +def meta_add_new_version(ctx: rule.Context, new_version: str, previous_version: str) -> None: """Add new version as related resource metadata to data package in a vault. :param ctx: Combined type of a callback and rei struct @@ -1604,13 +1507,13 @@ def meta_add_new_version(ctx, new_version, previous_version): meta_form.save(ctx, new_version, metadata) -def get_all_doi_versions(ctx, path): +def get_all_doi_versions(ctx: rule.Context, path: str) -> Tuple[List, List, List]: """Get the path and DOI of latest versions of published data package in a vault. - :param ctx: Combined type of a callback and rei struct - :param path: Path of vault with data packages + :param ctx: Combined type of a callback and rei struct + :param path: Path of vault with data packages - :return: Dict of data packages with DOI + :return: Lists of data packages with DOI """ iter = genquery.row_iterator( @@ -1646,7 +1549,7 @@ def get_all_doi_versions(ctx, path): @api.make() -def api_vault_get_published_packages(ctx, path): +def api_vault_get_published_packages(ctx: rule.Context, path: str) -> Dict: """Get the path and DOI of latest versions of published data package in a vault. :param ctx: Combined type of a callback and rei struct @@ -1676,7 +1579,7 @@ def api_vault_get_published_packages(ctx, path): return published_packages -def update_archive(ctx, coll, attr=None): +def update_archive(ctx: rule.Context, coll: str, attr: str | None = None) -> None: """Potentially update archive after metadata changed. :param ctx: Combined type of a callback and rei struct @@ -1685,10 +1588,9 @@ def update_archive(ctx, coll, attr=None): """ if config.enable_data_package_archive: import vault_archive - vault_archive.update(ctx, coll, attr) @rule.make(inputs=[], outputs=[0]) -def rule_vault_copy_numthreads(ctx): +def rule_vault_copy_numthreads(ctx: rule.Context) -> int: return get_vault_copy_numthreads(ctx) diff --git a/vault_archive.py b/vault_archive.py index 96be89a0f..3a0705520 100644 --- a/vault_archive.py +++ b/vault_archive.py @@ -5,6 +5,7 @@ import json import time +from typing import Dict, List import genquery import irods_types @@ -25,7 +26,7 @@ 'rule_vault_update_archive'] -def package_system_metadata(ctx, coll): +def package_system_metadata(ctx: rule.Context, coll: str) -> List: """Retrieve system metadata of collection. :param ctx: Combined type of a callback and rei struct @@ -46,7 +47,7 @@ def package_system_metadata(ctx, coll): ] -def package_provenance_log(ctx, system_metadata): +def package_provenance_log(ctx: rule.Context, system_metadata: List) -> List: """Retrieve provenance log from system metadata. :param ctx: Combined type of a callback and rei struct @@ -54,7 +55,7 @@ def package_provenance_log(ctx, system_metadata): :returns: List of dicts with provenance log """ - def key(item): + def key(item: Dict) -> int: return int(item["time"]) provenance_log = [] @@ -69,15 +70,16 @@ def key(item): return sorted(provenance_log, key=key) -def package_archive_path(ctx, coll): +def package_archive_path(ctx: rule.Context, coll: str) -> str | None: for row in genquery.row_iterator("DATA_PATH", "COLL_NAME = '{}' AND DATA_NAME = 'archive.tar'".format(coll), genquery.AS_LIST, ctx): return row[0] + return None -def vault_archivable(ctx, coll): +def vault_archivable(ctx: rule.Context, coll: str) -> bool: minimum = int(config.data_package_archive_minimum) maximum = int(config.data_package_archive_maximum) @@ -101,11 +103,11 @@ def vault_archivable(ctx, coll): return False -def vault_archival_status(ctx, coll): +def vault_archival_status(ctx: rule.Context, coll: str) -> str: return bagit.status(ctx, coll) -def create_archive(ctx, coll): +def create_archive(ctx: rule.Context, coll: str) -> None: log.write(ctx, "Creating archive of data package <{}>".format(coll)) user_metadata = meta.get_latest_vault_metadata_path(ctx, coll) system_metadata = package_system_metadata(ctx, coll) @@ -130,7 +132,7 @@ def create_archive(ctx, coll): ctx.dmput(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "REG") -def extract_archive(ctx, coll): +def extract_archive(ctx: rule.Context, coll: str) -> None: while True: state = ctx.dmattr(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "")["arguments"][2] if state not in ("UNM", "MIG"): @@ -144,7 +146,7 @@ def extract_archive(ctx, coll): bagit.extract(ctx, coll + "/archive.tar", coll + "/archive", resource=config.resource_vault) -def vault_archive(ctx, actor, coll): +def vault_archive(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for archival. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "archive") @@ -165,7 +167,7 @@ def vault_archive(ctx, actor, coll): return "Failure" -def vault_create_archive(ctx, coll): +def vault_create_archive(ctx: rule.Context, coll: str) -> str: if vault_archival_status(ctx, coll) != "archive": return "Invalid" try: @@ -202,7 +204,7 @@ def vault_create_archive(ctx, coll): return "Failure" -def vault_unarchive(ctx, actor, coll): +def vault_unarchive(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for unarchival. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "extract") @@ -225,7 +227,7 @@ def vault_unarchive(ctx, actor, coll): return "Failure" -def vault_extract_archive(ctx, coll): +def vault_extract_archive(ctx: rule.Context, coll: str) -> str: if vault_archival_status(ctx, coll) != "extract": return "Invalid" try: @@ -251,13 +253,13 @@ def vault_extract_archive(ctx, coll): return "Failure" -def update(ctx, coll, attr): +def update(ctx: rule.Context, coll: str, attr: str | None) -> None: if pathutil.info(coll).space == pathutil.Space.VAULT and attr not in (constants.IIARCHIVEATTRNAME, constants.UUPROVENANCELOG) and vault_archival_status(ctx, coll) == "archived": avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "update") ctx.dmget(package_archive_path(ctx, coll), config.data_package_archive_fqdn, "OFL") -def vault_update_archive(ctx, coll): +def vault_update_archive(ctx: rule.Context, coll: str) -> str: try: log.write(ctx, "Start update of archived data package <{}>".format(coll)) avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "updating") @@ -279,7 +281,7 @@ def vault_update_archive(ctx, coll): @api.make() -def api_vault_archive(ctx, coll): +def api_vault_archive(ctx: rule.Context, coll: str) -> api.Result: """Request to archive vault data package. :param ctx: Combined type of a callback and rei struct @@ -305,7 +307,7 @@ def api_vault_archive(ctx, coll): @api.make() -def api_vault_archival_status(ctx, coll): +def api_vault_archival_status(ctx: rule.Context, coll: str) -> api.Result: """Request archival status of vault data package. :param ctx: Combined type of a callback and rei struct @@ -317,7 +319,7 @@ def api_vault_archival_status(ctx, coll): @api.make() -def api_vault_extract(ctx, coll): +def api_vault_extract(ctx: rule.Context, coll: str) -> api.Result: """Request to unarchive an archived vault data package. :param ctx: Combined type of a callback and rei struct @@ -343,23 +345,25 @@ def api_vault_extract(ctx, coll): @rule.make(inputs=[0, 1, 2], outputs=[3]) -def rule_vault_archive(ctx, actor, coll, action): +def rule_vault_archive(ctx: rule.Context, actor: str, coll: str, action: str) -> str: if action == "archive": return vault_archive(ctx, actor, coll) elif action == "extract": return vault_unarchive(ctx, actor, coll) + else: + return "Failure" @rule.make(inputs=[0], outputs=[1]) -def rule_vault_create_archive(ctx, coll): +def rule_vault_create_archive(ctx: rule.Context, coll: str) -> str: return vault_create_archive(ctx, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_extract_archive(ctx, coll): +def rule_vault_extract_archive(ctx: rule.Context, coll: str) -> str: return vault_extract_archive(ctx, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_update_archive(ctx, coll): +def rule_vault_update_archive(ctx: rule.Context, coll: str) -> str: return vault_update_archive(ctx, coll) diff --git a/vault_download.py b/vault_download.py index ba99dd1a6..36d5ad710 100644 --- a/vault_download.py +++ b/vault_download.py @@ -14,7 +14,7 @@ 'rule_vault_download_archive'] -def vault_downloadable(ctx, coll): +def vault_downloadable(ctx: rule.Context, coll: str) -> bool: if coll.endswith("/original"): return False @@ -32,17 +32,17 @@ def vault_downloadable(ctx, coll): return False -def vault_bagitor(ctx, coll): +def vault_bagitor(ctx: rule.Context, coll: str) -> str: for row in genquery.row_iterator("META_COLL_ATTR_VALUE", "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = '{}'".format(coll, constants.IIBAGITOR), genquery.AS_LIST, ctx): return row[0] - return False + return "" -def vault_download(ctx, actor, coll): +def vault_download(ctx: rule.Context, actor: str, coll: str) -> str: try: # Prepare for download. avu.set_on_coll(ctx, coll, constants.IIARCHIVEATTRNAME, "bagit") @@ -54,7 +54,7 @@ def vault_download(ctx, actor, coll): return "Failure" -def vault_download_archive(ctx, coll): +def vault_download_archive(ctx: rule.Context, coll: str) -> str: if bagit.status(ctx, coll) != "bagit": return "Invalid" try: @@ -84,7 +84,7 @@ def vault_download_archive(ctx, coll): @api.make() -def api_vault_download(ctx, coll): +def api_vault_download(ctx: rule.Context, coll: str) -> api.Result: """Request to download a vault data package. :param ctx: Combined type of a callback and rei struct @@ -109,10 +109,10 @@ def api_vault_download(ctx, coll): @rule.make(inputs=[0, 1], outputs=[2]) -def rule_vault_download(ctx, actor, coll): +def rule_vault_download(ctx: rule.Context, actor: str, coll: str) -> str: return vault_download(ctx, actor, coll) @rule.make(inputs=[0], outputs=[1]) -def rule_vault_download_archive(ctx, coll): +def rule_vault_download_archive(ctx: rule.Context, coll: str) -> str: return vault_download_archive(ctx, coll) From 02a658a046e0077bd49183cac121b82b2fab9ce5 Mon Sep 17 00:00:00 2001 From: Lazlo Westerhof Date: Wed, 11 Dec 2024 12:14:20 +0100 Subject: [PATCH 27/27] YDA-5992: add type annotations to utils --- .github/workflows/python.yml | 3 +- setup.cfg | 2 +- util/api.py | 31 +++++++------- util/arb_data_manager.py | 60 +++++++++++++-------------- util/avu.py | 54 ++++++++++++------------ util/bagit.py | 9 ++-- util/cached_data_manager.py | 79 +++++++++++++++++++----------------- util/collection.py | 40 +++++++++--------- util/config.py | 14 ++++--- util/constants.py | 4 +- util/data_object.py | 44 +++++++++++--------- util/error.py | 2 +- util/group.py | 12 ++++-- util/irods_type_info.py | 9 +++- util/jsonutil.py | 18 ++++---- util/log.py | 6 +-- util/misc.py | 15 +++---- util/msi.py | 18 ++++---- util/pathutil.py | 30 +++++++------- util/policy.py | 20 +++++---- util/resource.py | 32 ++++++++------- util/rule.py | 13 +++--- util/spool.py | 39 +++++++++--------- util/user.py | 25 +++++++----- util/yoda_names.py | 30 +++++++------- 25 files changed, 327 insertions(+), 282 deletions(-) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 3a5b5afa7..a09044c97 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -19,7 +19,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell mypy types-requests types-python-dateutil + python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell + python -m pip install mypy types-requests types-python-dateutil types-redis - name: Lint with flake8 run: | diff --git a/setup.cfg b/setup.cfg index 51c0fb4fe..880b648c2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,6 +17,6 @@ check_untyped_defs = False disallow_any_generics = False disallow_incomplete_defs = True disallow_untyped_calls = False -disallow_untyped_defs = True +disallow_untyped_defs = False show_error_codes = True show_error_context = True diff --git a/util/api.py b/util/api.py index d67546254..2161112b7 100644 --- a/util/api.py +++ b/util/api.py @@ -11,28 +11,29 @@ import traceback import zlib from collections import OrderedDict +from typing import Callable, Dict +import error import jsonutil import log import rule from config import config -from error import * class Result: """API result.""" - def __init__(self, data=None, status='ok', info=None, debug_info=None): + def __init__(self, data: Dict | None = None, status: str = 'ok', info: str | None = None, debug_info: str | None = None) -> None: self.status = status self.status_info = info self.data = data self.debug_info = debug_info @staticmethod - def ok(**xs): + def ok(**xs: int) -> object: return Result(**xs) - def as_dict(self): + def as_dict(self) -> OrderedDict: if config.environment == 'development': # Emit debug information in dev. # This may contain stack traces, exception texts, timing info, @@ -46,29 +47,29 @@ def as_dict(self): ('status_info', self.status_info), ('data', self.data)]) - def __bool__(self): + def __bool__(self) -> bool: return self.status == 'ok' __nonzero__ = __bool__ -class Error(Result, UUError): +class Error(Result, error.UUError): """Error with descriptive (user-readable) message. Returned/raised by API functions to produce informative error output. """ - def __init__(self, name, info, debug_info=None, data=None): + def __init__(self, name: str, info: str, debug_info: str | None = None, data: str | None = None) -> None: self.name = name self.info = info self.debug_info = debug_info Result.__init__(self, data, 'error_' + name, info, debug_info) - UUError.__init__(self, 'error_' + name) + error.UUError.__init__(self, 'error_' + name) - def __str__(self): + def __str__(self) -> str: return '{}: {}'.format(self.name, self.info) -def _api(f): +def _api(f: Callable) -> Callable: """Turn a Python function into a basic API function. By itself, this wrapper is not very useful, as the resulting function is @@ -107,7 +108,7 @@ def _api(f): # If the function accepts **kwargs, we do not forbid extra arguments. allow_extra = a_kw is not None - def wrapper(ctx, inp): + def wrapper(ctx: rule.Context, inp: str) -> Dict: """A function that receives a JSON string and calls a wrapped function with unpacked arguments. :param ctx: Combined type of a callback and rei struct @@ -119,10 +120,10 @@ def wrapper(ctx, inp): :returns: Result of the JSON API call """ # Result shorthands. - def error_internal(debug_info=None): + def error_internal(debug_info: str | None = None) -> Error: return Error('internal', 'An internal error occurred', debug_info=debug_info) - def bad_request(debug_info=None): + def bad_request(debug_info: str | None = None) -> Error: return Error('badrequest', 'An internal error occurred', debug_info=debug_info) # Input is base64 encoded and compressed to reduce size (max rule length in iRODS is 20KB) @@ -195,7 +196,7 @@ def bad_request(debug_info=None): return wrapper -def make(): +def make() -> Callable: """Create API functions callable as iRODS rules. This translate between a Python calling convention and the iRODS rule @@ -221,7 +222,7 @@ def api_ping(ctx, foo): :returns: API function callable as iRODS rules """ - def deco(f): + def deco(f: Callable) -> Callable: # The "base" API function, that does handling of arguments and errors. base = _api(f) diff --git a/util/arb_data_manager.py b/util/arb_data_manager.py index dd6466da3..c37aa2732 100644 --- a/util/arb_data_manager.py +++ b/util/arb_data_manager.py @@ -6,53 +6,55 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import TYPE_CHECKING + import genquery import cached_data_manager import constants import log import msi +if TYPE_CHECKING: + import rule class ARBDataManager(cached_data_manager.CachedDataManager): AVU_NAME = "yoda::arb" - def get(self, ctx, keyname): - """Retrieves data from the cache if possible, otherwise retrieves - the original. + def get(self, ctx: 'rule.Context', keyname: str) -> str: + """Retrieves data from the cache if possible, otherwise retrieves the original. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key - :returns: data for this key (arb_status) + :returns: Data for this key (arb_status) """ value = super().get(ctx, keyname) return constants.arb_status[value.decode("utf-8")] - def put(self, ctx, keyname, data): + def put(self, ctx: 'rule.Context', keyname: str, data: str) -> None: """Update both the original value and cached value (if cache is not available, it is not updated) - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key - :param data: data for this key (arb_status) + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key + :param data: Data for this key (arb_status) """ super().put(ctx, keyname, data.value) - def _get_context_string(self): - """ :returns: a string that identifies the particular type of data manager + def _get_context_string(self) -> str: + """Returns a string that identifies the particular type of data manager. - :returns: context string for this type of data manager + :returns: context string for this type of data manager """ return "arb" - def _get_original_data(self, ctx, keyname): - """This function is called when data needs to be retrieved from the original - (non-cached) location. + def _get_original_data(self, ctx: 'rule.Context', keyname: str) -> str: + """This function is called when data needs to be retrieved from the original (non-cached) location. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key - :returns: Original data for this key + :returns: Original data for this key """ arb_data = list(genquery.row_iterator( "META_RESC_ATTR_VALUE", @@ -68,21 +70,19 @@ def _get_original_data(self, ctx, keyname): log.write(ctx, "WARNING: multiple ARB AVUs present for resource '{}'. ARB will ignore it.".format(keyname)) return constants.arb_status.IGNORE.value - def _put_original_data(self, ctx, keyname, data): - """This function is called when data needs to be updated in the original - (non-cached) location. + def _put_original_data(self, ctx: 'rule.Context', keyname: str, data: str) -> None: + """This function is called when data needs to be updated in the original (non-cached) location. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key - :param data: Data for this key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key + :param data: Data for this key """ msi.mod_avu_metadata(ctx, "-r", keyname, "set", self.AVU_NAME, data, "") - def _should_populate_cache_on_get(self): - """This function controls whether the manager populates the cache - after retrieving original data. + def _should_populate_cache_on_get(self) -> bool: + """This function controls whether the manager populates the cache after retrieving original data. - :returns: Boolean value that states whether the cache should be populated when original data - is retrieved. + :returns: Boolean value that states whether the cache should be populated when original data + is retrieved. """ return True diff --git a/util/avu.py b/util/avu.py index 585d9756d..47f3e71d3 100644 --- a/util/avu.py +++ b/util/avu.py @@ -6,6 +6,7 @@ import itertools import json from collections import namedtuple +from typing import Dict, Iterable, List, Tuple import genquery import irods_types @@ -13,6 +14,7 @@ import log import msi import pathutil +import rule Avu = namedtuple('Avu', list('avu')) Avu.attr = Avu.a @@ -20,21 +22,21 @@ Avu.unit = Avu.u -def of_data(ctx, path): +def of_data(ctx: rule.Context, path: str) -> Iterable[Avu]: """Get (a,v,u) triplets for a given data object.""" return map(lambda x: Avu(*x), genquery.Query(ctx, "META_DATA_ATTR_NAME, META_DATA_ATTR_VALUE, META_DATA_ATTR_UNITS", "COLL_NAME = '{}' AND DATA_NAME = '{}'".format(*pathutil.chop(path)))) -def of_coll(ctx, coll): +def of_coll(ctx: rule.Context, coll: str) -> Iterable[Avu]: """Get (a,v,u) triplets for a given collection.""" return map(lambda x: Avu(*x), genquery.Query(ctx, "META_COLL_ATTR_NAME, META_COLL_ATTR_VALUE, META_COLL_ATTR_UNITS", "COLL_NAME = '{}'".format(coll))) -def get_attr_val_of_coll(ctx, coll, attr): +def get_attr_val_of_coll(ctx: rule.Context, coll: str, attr: str) -> Dict: """Get the value corresponding to an attr for a given collection.""" iter = genquery.Query( ctx, @@ -46,7 +48,7 @@ def get_attr_val_of_coll(ctx, coll, attr): raise ValueError("Attribute {} not found in AVUs of collection {}".format(attr, coll)) -def inside_coll(ctx, path, recursive=False): +def inside_coll(ctx: rule.Context, path: str, recursive: bool = False) -> Iterable: """Get a list of all AVUs inside a collection with corresponding paths. Note: the returned value is a generator / lazy list, so that large @@ -62,7 +64,7 @@ def inside_coll(ctx, path, recursive=False): :returns: List of all AVUs inside a collection with corresponding paths """ # coll+name -> path - def to_absolute(row, type): + def to_absolute(row: List, type: str) -> Tuple[str, str, str, str, str]: if type == "collection": return (row[1], type, row[2], row[3], row[4]) else: @@ -98,20 +100,20 @@ def to_absolute(row, type): return itertools.chain(collection_root, data_objects_root, collection_sub, data_objects_sub) -def of_group(ctx, group): +def of_group(ctx: rule.Context, group: str) -> Iterable[Avu]: """Get (a,v,u) triplets for a given group.""" return map(lambda x: Avu(*x), genquery.Query(ctx, "META_USER_ATTR_NAME, META_USER_ATTR_VALUE, META_USER_ATTR_UNITS", "USER_NAME = '{}' AND USER_TYPE = 'rodsgroup'".format(group))) -def set_on_data(ctx, path, a, v): +def set_on_data(ctx: rule.Context, path: str, a: str, v: str) -> None: """Set key/value metadata on a data object.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.set_key_value_pairs_to_obj(ctx, x['arguments'][1], path, '-d') -def set_on_coll(ctx, coll, a, v, catch=False): +def set_on_coll(ctx: rule.Context, coll: str, a: str, v: str, catch: bool = False) -> bool | None: """Set key/value metadata on a collection. Optionally catch any exceptions that occur. :param ctx: Combined type of a callback and rei struct @@ -123,18 +125,18 @@ def set_on_coll(ctx, coll, a, v, catch=False): :returns: True if catch=True and no exceptions occurred during operation """ if catch: - return _set_on_coll_catch(ctx, coll, a, v) + return _set_on_coll_catch(ctx, coll, a, v) # type: ignore[func-returns-value] _set_on_coll(ctx, coll, a, v) return True -def _set_on_coll(ctx, coll, a, v): +def _set_on_coll(ctx: rule.Context, coll: str, a: str, v: str) -> None: x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.set_key_value_pairs_to_obj(ctx, x['arguments'][1], coll, '-C') -def _set_on_coll_catch(ctx, coll, a, v): +def _set_on_coll_catch(ctx: rule.Context, coll: str, a: str, v: str) -> bool | None: """Set AVU, but catch exception.""" try: _set_on_coll(ctx, coll, a, v) @@ -145,59 +147,59 @@ def _set_on_coll_catch(ctx, coll, a, v): return True -def set_on_resource(ctx, resource, a, v): +def set_on_resource(ctx: rule.Context, resource: str, a: str, v: str) -> None: """Set key/value metadata on a resource.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.set_key_value_pairs_to_obj(ctx, x['arguments'][1], resource, '-R') -def associate_to_data(ctx, path, a, v): +def associate_to_data(ctx: rule.Context, path: str, a: str, v: str) -> None: """Associate key/value metadata to a data object.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.associate_key_value_pairs_to_obj(ctx, x['arguments'][1], path, '-d') -def associate_to_coll(ctx, coll, a, v): +def associate_to_coll(ctx: rule.Context, coll: str, a: str, v: str) -> None: """Associate key/value metadata on a collection.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.associate_key_value_pairs_to_obj(ctx, x['arguments'][1], coll, '-C') -def associate_to_group(ctx, group, a, v): +def associate_to_group(ctx: rule.Context, group: str, a: str, v: str) -> None: """Associate key/value metadata on a group.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.associate_key_value_pairs_to_obj(ctx, x['arguments'][1], group, '-u') -def associate_to_resource(ctx, resource, a, v): +def associate_to_resource(ctx: rule.Context, resource: str, a: str, v: str) -> None: """Associate key/value metadata on a group.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.associate_key_value_pairs_to_obj(ctx, x['arguments'][1], resource, '-R') -def rm_from_coll(ctx, coll, a, v): +def rm_from_coll(ctx: rule.Context, coll: str, a: str, v: str) -> None: """Remove key/value metadata from a collection.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.remove_key_value_pairs_from_obj(ctx, x['arguments'][1], coll, '-C') -def rm_from_data(ctx, coll, a, v): +def rm_from_data(ctx: rule.Context, coll: str, a: str, v: str) -> None: """Remove key/value metadata from a data object.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.remove_key_value_pairs_from_obj(ctx, x['arguments'][1], coll, '-d') -def rm_from_group(ctx, group, a, v): +def rm_from_group(ctx: rule.Context, group: str, a: str, v: str) -> None: """Remove key/value metadata from a group.""" x = msi.string_2_key_val_pair(ctx, '{}={}'.format(a, v), irods_types.BytesBuf()) msi.remove_key_value_pairs_from_obj(ctx, x['arguments'][1], group, '-u') -def rmw_from_coll(ctx, obj, a, v, catch=False, u=''): +def rmw_from_coll(ctx: rule.Context, obj: str, a: str, v: str, catch: bool = False, u: str = '') -> bool: """Remove AVU from collection with wildcards. Optionally catch any exceptions that occur. :param ctx: Combined type of a callback and rei struct - :param obj: Collection to get paginated contents of + :param obj: Collection to get paginated contents of :param a: Attribute :param v: Value :param catch: Whether to catch any exceptions that occur @@ -212,11 +214,11 @@ def rmw_from_coll(ctx, obj, a, v, catch=False, u=''): return True -def _rmw_from_coll(ctx, obj, a, v, u=''): +def _rmw_from_coll(ctx: rule.Context, obj: str, a: str, v: str, u: str = '') -> None: msi.rmw_avu(ctx, '-C', obj, a, v, u) -def _rmw_from_coll_catch(ctx, obj, a, v, u=''): +def _rmw_from_coll_catch(ctx: rule.Context, obj: str, a: str, v: str, u: str = '') -> bool: try: _rmw_from_coll(ctx, obj, a, v, u) except Exception: @@ -226,17 +228,17 @@ def _rmw_from_coll_catch(ctx, obj, a, v, u=''): return True -def rmw_from_data(ctx, obj, a, v, u=''): +def rmw_from_data(ctx: rule.Context, obj: str, a: str, v: str, u: str = '') -> None: """Remove AVU from data object with wildcards.""" msi.rmw_avu(ctx, '-d', obj, a, v, u) -def rmw_from_group(ctx, group, a, v, u=''): +def rmw_from_group(ctx: rule.Context, group: str, a: str, v: str, u: str = '') -> None: """Remove AVU from group with wildcards.""" msi.rmw_avu(ctx, '-u', group, a, v, u) -def apply_atomic_operations(ctx, operations): +def apply_atomic_operations(ctx: rule.Context, operations: Dict) -> bool: """Sequentially executes all operations as a single transaction. Operations should be a dict with structure as defined in diff --git a/util/bagit.py b/util/bagit.py index 528c2ad7d..7e65ab063 100644 --- a/util/bagit.py +++ b/util/bagit.py @@ -12,9 +12,10 @@ import data_object import log import msi +import rule -def manifest(ctx, coll): +def manifest(ctx: rule.Context, coll: str) -> str: """Generate a BagIt manifest of collection. Manifest with a complete listing of each file name along with @@ -41,7 +42,7 @@ def manifest(ctx, coll): ]) + "\n" -def status(ctx, coll): +def status(ctx: rule.Context, coll: str) -> str | bool: for row in genquery.row_iterator("META_COLL_ATTR_VALUE", "COLL_NAME = '{}' AND META_COLL_ATTR_NAME = '{}'".format(coll, constants.IIARCHIVEATTRNAME), genquery.AS_LIST, @@ -51,7 +52,7 @@ def status(ctx, coll): return False -def create(ctx, archive, coll, resource): +def create(ctx: rule.Context, archive: str, coll: str, resource: str) -> None: # Create manifest file. log.write(ctx, "Creating manifest file for data package <{}>".format(coll)) data_object.write(ctx, coll + "/manifest-sha256.txt", manifest(ctx, coll)) @@ -74,7 +75,7 @@ def create(ctx, archive, coll, resource): ctx.iiCopyACLsFromParent(archive, "default") -def extract(ctx, archive, coll, resource=0): +def extract(ctx: rule.Context, archive: str, coll: str, resource: str = '0') -> None: ret = msi.archive_extract(ctx, archive, coll, 0, resource, 0) if ret < 0: log.write(ctx, "Extracting archive of data package <{}> failed".format(coll)) diff --git a/util/cached_data_manager.py b/util/cached_data_manager.py index f5c415cc5..fc854fa9a 100644 --- a/util/cached_data_manager.py +++ b/util/cached_data_manager.py @@ -2,9 +2,13 @@ __license__ = 'GPLv3, see LICENSE' import traceback +from typing import TYPE_CHECKING import redis +if TYPE_CHECKING: + import rule + class CachedDataManager: """This class contains a framework that subclasses can use @@ -14,26 +18,27 @@ class CachedDataManager: """ # Internal methods to implement by subclass - def _get_context_string(self): - """This function should be implemented by subclasses. It should return - a string that is used in keys to identify the subclass. + def _get_context_string(self) -> None: + """This function should be implemented by subclasses. - :raises Exception: if function has not been implemented in subclass. + It should return a string that is used in keys to identify the subclass. + + :raises Exception: if function has not been implemented in subclass. """ raise Exception("Context string not provided by CacheDataManager.") - def _get_original_data(self, ctx, keyname): + def _get_original_data(self, ctx: 'rule.Context', keyname: str) -> None: """This function is called when data needs to be retrieved from the original (non-cached) location. :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key + :param keyname: Name of the key :raises Exception: if function has not been implemented in subclass. """ raise Exception("Get original data not implemented by CacheDataManager,") - def _put_original_data(self, ctx, keyname, data): + def _put_original_data(self, ctx: 'rule.Context', keyname: str, data: str) -> None: """This function is called when data needs to be updated in the original (non-cached) location. @@ -48,17 +53,17 @@ def _put_original_data(self, ctx, keyname, data): # Internal methods that have a default implementation. Can optionally # be re-implemented by subclass. - def __init__(self, *args, **kwargs): + def __init__(self, *args: str, **kwargs: int) -> None: try: self._connection = redis.Redis(host="localhost") except BaseException: print("Error: opening Redis ARB connection failed with exception: " + traceback.format_exc()) - self._connection = None + self._connection = None # type: ignore[assignment] - def _get_connection(self): + def _get_connection(self) -> redis.Redis: return self._connection - def _cache_available(self): + def _cache_available(self) -> bool: if self._connection is None: return False @@ -67,17 +72,16 @@ def _cache_available(self): except BaseException: return False - def _get_cache_keyname(self, keyname): - return self._get_context_string() + "::" + keyname + def _get_cache_keyname(self, keyname: str) -> str: + return self._get_context_string() + "::" + keyname # type: ignore[func-returns-value] - def get(self, ctx, keyname): - """Retrieves data from the cache if possible, otherwise retrieves - the original. + def get(self, ctx: 'rule.Context', keyname: str) -> str: + """Retrieves data from the cache if possible, otherwise retrieves the original. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key - :returns: data for this key + :returns: Data for this key """ connection = self._get_connection() cache_keyname = self._get_cache_keyname(keyname) @@ -88,48 +92,47 @@ def get(self, ctx, keyname): cached_result = None if cached_result is None: - original_result = self._get_original_data(ctx, keyname) + original_result = self._get_original_data(ctx, keyname) # type: ignore[func-returns-value] if self._should_populate_cache_on_get() and self._cache_available(): self._update_cache(ctx, keyname, original_result) return original_result else: return cached_result - def put(self, ctx, keyname, data): - """Update both the original value and cached value (if cache is not available, it is not updated) + def put(self, ctx: 'rule.Context', keyname: str, data: str) -> None: + """Update both the original value and cached value (if cache is not available, it is not updated). - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key - :param data: data for this key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key + :param data: Data for this key """ self._put_original_data(ctx, keyname, data) if self._cache_available(): self._update_cache(ctx, keyname, data) - def _update_cache(self, ctx, keyname, data): - """Update a value in the cache + def _update_cache(self, ctx: 'rule.Context', keyname: str, data: str) -> None: + """Update a value in the cache. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key - :param data: data for this key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key + :param data: Data for this key """ cache_keyname = self._get_cache_keyname(keyname) self._get_connection().set(cache_keyname, data) - def clear(self, ctx, keyname): + def clear(self, ctx: 'rule.Context', keyname: str) -> None: """Clears cached data for a key if present. - :param ctx: Combined type of a callback and rei struct - :param keyname: name of the key + :param ctx: Combined type of a callback and rei struct + :param keyname: Name of the key """ cache_keyname = self._get_cache_keyname(keyname) self._get_connection().delete(cache_keyname) - def _should_populate_cache_on_get(self): - """This function controls whether the manager populates the cache - after retrieving original data. + def _should_populate_cache_on_get(self) -> bool: + """This function controls whether the manager populates the cache after retrieving original data. - :returns: boolean value that determines whether the data manager populates - the cache after retrieving data + :returns: boolean value that determines whether the data manager populates + the cache after retrieving data """ return False diff --git a/util/collection.py b/util/collection.py index 74bbb8e79..2ea58e9b1 100644 --- a/util/collection.py +++ b/util/collection.py @@ -6,22 +6,24 @@ import itertools import json from functools import reduce +from typing import Iterable, List, Tuple import genquery import irods_types import data_object import msi +import rule -def exists(ctx, path): +def exists(ctx: rule.Context, path: str) -> bool: """Check if a collection with the given path exists.""" return len(list(genquery.row_iterator( "COLL_ID", "COLL_NAME = '{}'".format(path), genquery.AS_LIST, ctx))) > 0 -def owner(ctx, path): +def owner(ctx: rule.Context, path: str) -> Tuple[str, str] | None: """Find the owner of a collection. Returns (name, zone) or None.""" owners = list(genquery.row_iterator( "COLL_OWNER_NAME, COLL_OWNER_ZONE", @@ -30,7 +32,7 @@ def owner(ctx, path): return tuple(owners[0]) if len(owners) > 0 else None -def empty(ctx, path): +def empty(ctx: rule.Context, path: str) -> bool: """Check if a collection contains any data objects.""" return (len(list(genquery.row_iterator( "DATA_ID", @@ -42,9 +44,9 @@ def empty(ctx, path): genquery.AS_LIST, ctx))) == 0) -def size(ctx, path): +def size(ctx: rule.Context, path: str) -> int: """Get a collection's size in bytes.""" - def func(x, row): + def func(x: int, row: List) -> int: return x + int(row[1]) return reduce(func, @@ -56,7 +58,7 @@ def func(x, row): genquery.AS_LIST, ctx)), 0) -def data_count(ctx, path, recursive=True): +def data_count(ctx: rule.Context, path: str, recursive: bool = True) -> int: """Get a collection's data count. :param ctx: Combined type of a callback and rei struct @@ -69,7 +71,7 @@ def data_count(ctx, path, recursive=True): return sum(1 for _ in data_objects(ctx, path, recursive=recursive)) -def collection_count(ctx, path, recursive=True): +def collection_count(ctx: rule.Context, path: str, recursive: bool = True) -> int: """Get a collection's collection count (the amount of collections within a collection).""" return sum(1 for _ in genquery.row_iterator( "COLL_ID", @@ -78,7 +80,7 @@ def collection_count(ctx, path, recursive=True): genquery.AS_LIST, ctx)) -def subcollections(ctx, path, recursive=False): +def subcollections(ctx: rule.Context, path: str, recursive: bool = False) -> Iterable: """Get a list of all subcollections in a collection. Note: the returned value is a generator / lazy list, so that large @@ -94,7 +96,7 @@ def subcollections(ctx, path, recursive=False): :returns: List of all subcollections in a collection """ # coll+subcoll name -> path - def to_absolute(row): + def to_absolute(row: List) -> str: return '{}/{}'.format(*row) q_root = genquery.row_iterator("COLL_PARENT_NAME, COLL_NAME", @@ -112,7 +114,7 @@ def to_absolute(row): return map(to_absolute, itertools.chain(q_root, q_sub)) -def data_objects(ctx, path, recursive=False): +def data_objects(ctx: rule.Context, path: str, recursive: bool = False) -> Iterable: """Get a list of all data objects in a collection. Note: the returned value is a generator / lazy list, so that large @@ -128,7 +130,7 @@ def data_objects(ctx, path, recursive=False): :returns: List of all data objects in a collection """ # coll+data name -> path - def to_absolute(row): + def to_absolute(row: List) -> str: return '{}/{}'.format(*row) q_root = genquery.row_iterator("COLL_NAME, DATA_NAME", @@ -146,7 +148,7 @@ def to_absolute(row): return map(to_absolute, itertools.chain(q_root, q_sub)) -def create(ctx, path, entire_tree=''): +def create(ctx: rule.Context, path: str, entire_tree: str = '') -> None: """Create new collection. :param ctx: Combined type of a callback and rei struct @@ -162,7 +164,7 @@ def create(ctx, path, entire_tree=''): irods_types.BytesBuf()) -def copy(ctx, path_org, path_copy, force=True): +def copy(ctx: rule.Context, path_org: str, path_copy: str, force: bool = True) -> None: """Copy a collection. :param ctx: Combined type of a callback and rei struct @@ -197,7 +199,7 @@ def copy(ctx, path_org, path_copy, force=True): msi.touch(ctx, json.dumps(json_inp)) -def move(ctx, path_org, path_move, force=True): +def move(ctx: rule.Context, path_org: str, path_move: str, force: bool = True) -> None: """Move a collection. :param ctx: Combined type of a callback and rei struct @@ -215,7 +217,7 @@ def move(ctx, path_org, path_move, force=True): irods_types.BytesBuf()) -def remove(ctx, path): +def remove(ctx: rule.Context, path: str) -> None: """Delete a collection. :param ctx: Combined type of a callback and rei struct @@ -230,7 +232,7 @@ def remove(ctx, path): irods_types.BytesBuf()) -def rename(ctx, path_org, path_target): +def rename(ctx: rule.Context, path_org: str, path_target: str) -> None: """Rename collection from path_org to path_target. :param ctx: Combined type of a callback and rei struct @@ -247,10 +249,10 @@ def rename(ctx, path_org, path_target): irods_types.BytesBuf()) -def id_from_name(ctx, coll_name): +def id_from_name(ctx: rule.Context, coll_name: str) -> str: """Get collection id from collection name. - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct :param coll_name: Collection name :returns: Collection id @@ -258,7 +260,7 @@ def id_from_name(ctx, coll_name): return genquery.Query(ctx, "COLL_ID", "COLL_NAME = '{}'".format(coll_name)).first() -def name_from_id(ctx, coll_id): +def name_from_id(ctx: rule.Context, coll_id: str) -> str: """Get collection name from collection id. :param ctx: Combined type of a callback and rei struct diff --git a/util/config.py b/util/config.py index 762b94366..f49f2403c 100644 --- a/util/config.py +++ b/util/config.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import List + # Config class {{{ @@ -22,16 +24,16 @@ class Config: y = config.bar # AttributeError """ - def __init__(self, **kwargs): + def __init__(self, **kwargs: int) -> None: """kwargs must contain all valid options and their default values.""" self._items = kwargs self._frozen = False - def freeze(self): + def freeze(self) -> None: """Prevent further config changes via setattr.""" self._frozen = True - def __setattr__(self, k, v): + def __setattr__(self, k: str, v: int) -> None: if k.startswith('_'): return super().__setattr__(k, v) if self._frozen: @@ -43,7 +45,7 @@ def __setattr__(self, k, v): # Set as config option. self._items[k] = v - def __getattr__(self, k): + def __getattr__(self, k: str) -> str | int | bool | List: if k.startswith('_'): return super().__getattr__(k) try: @@ -53,10 +55,10 @@ def __getattr__(self, k): raise AttributeError('Config item <{}> does not exist'.format(k)) # Never dump config values, they may contain sensitive info. - def __str__(self): + def __str__(self) -> str: return 'Config()' - def __repr__(self): + def __repr__(self) -> str: return 'Config()' # def __repr__(self): diff --git a/util/constants.py b/util/constants.py index 0b76ebaee..9911aa0f9 100644 --- a/util/constants.py +++ b/util/constants.py @@ -104,7 +104,7 @@ class vault_package_state(Enum): DEPUBLISHED = 'DEPUBLISHED' PENDING_REPUBLICATION = 'PENDING_REPUBLICATION' - def __str__(self): + def __str__(self) -> str: return self.name @@ -135,7 +135,7 @@ class research_package_state(Enum): REJECTED = 'REJECTED' SECURED = 'SECURED' - def __str__(self): + def __str__(self) -> str: return self.name diff --git a/util/data_object.py b/util/data_object.py index 10f1bee39..d41225685 100644 --- a/util/data_object.py +++ b/util/data_object.py @@ -5,6 +5,7 @@ import binascii import json +from typing import Dict, List, Tuple import genquery import irods_types @@ -13,9 +14,10 @@ import error import msi import pathutil +import rule -def exists(ctx, path): +def exists(ctx: rule.Context, path: str) -> bool: """Check if a data object with the given path exists.""" return len(list(genquery.row_iterator( "DATA_ID", @@ -23,14 +25,14 @@ def exists(ctx, path): genquery.AS_LIST, ctx))) > 0 -def get_properties(ctx, data_id, resource): - """ Retrieves default properties of a data object from iRODS. +def get_properties(ctx: rule.Context, data_id: str, resource: str) -> Dict | None: + """Retrieves default properties of a data object from iRODS. - :param ctx: Combined type of a callback and rei struct - :param data_id: data_id of the data object - :param resource: Name of resource + :param ctx: Combined type of a callback and rei struct + :param data_id: Data ID of the data object + :param resource: Name of resource - :returns: dictionary mapping each requested property to its retrieved value, or None if not found. + :returns: Dictionary mapping each requested property to its retrieved value, or None if not found. """ # Default properties available for retrieva properties = [ @@ -56,7 +58,7 @@ def get_properties(ctx, data_id, resource): return prop_dict -def owner(ctx, path): +def owner(ctx: rule.Context, path: str) -> Tuple[str, str] | None: """Find the owner of a data object. Returns (name, zone) or None.""" owners = list(genquery.row_iterator( "DATA_OWNER_NAME, DATA_OWNER_ZONE", @@ -65,7 +67,7 @@ def owner(ctx, path): return tuple(owners[0]) if len(owners) > 0 else None -def size(ctx, path): +def size(ctx: rule.Context, path: str) -> int | None: """Get a data object's size in bytes. :param ctx: Combined type of a callback and rei struct @@ -83,8 +85,10 @@ def size(ctx, path): for row in iter: return int(row[0]) + return None + -def has_replica_with_status(ctx, path, statuses): +def has_replica_with_status(ctx: rule.Context, path: str, statuses: List) -> bool: """Check if data object has replica with specified replica statuses. :param ctx: Combined type of a callback and rei struct @@ -106,7 +110,7 @@ def has_replica_with_status(ctx, path, statuses): return False -def write(ctx, path, data): +def write(ctx: rule.Context, path: str, data: str) -> None: """Write a string to an iRODS data object. This will overwrite the data object if it exists. @@ -126,7 +130,7 @@ def write(ctx, path, data): msi.data_obj_close(ctx, handle, 0) -def read(ctx, path, max_size=constants.IIDATA_MAX_SLURP_SIZE): +def read(ctx: rule.Context, path: str, max_size: int = constants.IIDATA_MAX_SLURP_SIZE) -> str: """Read an entire iRODS data object into a string.""" sz = size(ctx, path) if sz is None: @@ -159,7 +163,7 @@ def read(ctx, path, max_size=constants.IIDATA_MAX_SLURP_SIZE): return output -def copy(ctx, path_org, path_copy, force=True): +def copy(ctx: rule.Context, path_org: str, path_copy: str, force: bool = True) -> None: """Copy a data object. :param ctx: Combined type of a callback and rei struct @@ -180,7 +184,7 @@ def copy(ctx, path_org, path_copy, force=True): msi.touch(ctx, json.dumps(json_inp)) -def remove(ctx, path, force=False): +def remove(ctx: rule.Context, path: str, force: bool = False) -> None: """Delete a data object. :param ctx: Combined type of a callback and rei struct @@ -195,7 +199,7 @@ def remove(ctx, path, force=False): irods_types.BytesBuf()) -def rename(ctx, path_org, path_target): +def rename(ctx: rule.Context, path_org: str, path_target: str) -> None: """Rename data object from path_org to path_target. :param ctx: Combined type of a callback and rei struct @@ -220,7 +224,7 @@ def rename(ctx, path_org, path_target): msi.touch(ctx, json.dumps(json_inp)) -def name_from_id(ctx, data_id): +def name_from_id(ctx: rule.Context, data_id: str) -> str | None: """Get data object name from data object id. :param ctx: Combined type of a callback and rei struct @@ -232,8 +236,10 @@ def name_from_id(ctx, data_id): if x is not None: return '/'.join(x) + return None + -def id_from_path(ctx, path): +def id_from_path(ctx: rule.Context, path: str) -> str: """Get data object id from data object path at its first appearance. :param ctx: Combined type of a callback and rei struct @@ -245,7 +251,7 @@ def id_from_path(ctx, path): "COLL_NAME = '%s' AND DATA_NAME = '%s'" % pathutil.chop(path)).first() -def decode_checksum(checksum): +def decode_checksum(checksum: str) -> str: """Decode data object checksum. :param checksum: Base64 encoded SHA256 checksum @@ -258,7 +264,7 @@ def decode_checksum(checksum): return binascii.hexlify(binascii.a2b_base64(checksum[5:])).decode("UTF-8") -def get_group_owners(ctx, path): +def get_group_owners(ctx: rule.Context, path: str) -> List: """Return list of groups of data object, each entry being name of the group and the zone.""" parent, basename = pathutil.chop(path) groups = list(genquery.row_iterator( diff --git a/util/error.py b/util/error.py index f4f40d981..722af12e6 100644 --- a/util/error.py +++ b/util/error.py @@ -6,7 +6,7 @@ class UUError(Exception): """Generic Python rule error.""" - def __init__(self, message): + def __init__(self, message: str) -> None: self.message = message super(UUError, self).__init__(message) diff --git a/util/group.py b/util/group.py index ee6de5c88..372549051 100644 --- a/util/group.py +++ b/util/group.py @@ -3,12 +3,16 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import List, TYPE_CHECKING + import genquery import user +if TYPE_CHECKING: + import rule -def exists(ctx, grp): +def exists(ctx: 'rule.Context', grp: str) -> bool: """Check if a group with the given name exists. :param ctx: Combined type of a callback and rei struct @@ -20,7 +24,7 @@ def exists(ctx, grp): .format(grp)).first() is not None -def members(ctx, grp): +def members(ctx: 'rule.Context', grp: str) -> List: """Get members of a given group. :param ctx: Combined type of a callback and rei struct @@ -33,7 +37,7 @@ def members(ctx, grp): .format(grp)) -def is_member(ctx, grp, usr=None): +def is_member(ctx: 'rule.Context', grp: str, usr: str | None = None) -> bool: """Check if a group has a certain member. :param ctx: Combined type of a callback and rei struct @@ -45,7 +49,7 @@ def is_member(ctx, grp, usr=None): return user.is_member_of(ctx, grp, usr) -def get_category(ctx, grp): +def get_category(ctx: 'rule.Context', grp: str) -> str | None: """Get the category of a group. :param ctx: Combined type of a callback and rei struct diff --git a/util/irods_type_info.py b/util/irods_type_info.py index a317e8640..70bc42cf3 100644 --- a/util/irods_type_info.py +++ b/util/irods_type_info.py @@ -4,10 +4,15 @@ irods_types types. """ +__copyright__ = 'Copyright (c) 2018-2024, Utrecht University' +__license__ = 'GPLv3, see LICENSE' + +from typing import Any + import irods_types -def pyify(x): +def pyify(x: Any) -> Any: """Turn irods type into equivalent python type, if possible.""" return x._pyify() if '_pyify' in dir(x) else str(x) @@ -42,7 +47,7 @@ def pyify(x): # (add more as needed) -def col_name(i): +def col_name(i: int) -> str: return list(filter(lambda kv: kv[1] == i, cols))[0][0] diff --git a/util/jsonutil.py b/util/jsonutil.py index 93ae7729e..c44766ba3 100644 --- a/util/jsonutil.py +++ b/util/jsonutil.py @@ -5,6 +5,7 @@ import json from collections import OrderedDict +from typing import Dict import jsonavu @@ -13,16 +14,17 @@ import error import log import msi +import rule class ParseError(error.UUError): """Exception for unparsable JSON text.""" -def parse(text): +def parse(text: str) -> OrderedDict: """Parse JSON into an OrderedDict. - :param text: JSON to parse into an OrderedDict + :param text: JSON to parse into an OrderedDict :raises ParseError: JSON file format error @@ -34,7 +36,7 @@ def parse(text): raise ParseError('JSON file format error') -def dump(data, **options): +def dump(data: Dict, **options: int) -> str: """Dump an object to a JSON string.""" # json.dumps seems to not like mixed str/unicode input, so make sure # everything is of the same type first. @@ -43,17 +45,17 @@ def dump(data, **options): **({'indent': 4} if options == {} else options)) -def read(callback, path, **options): +def read(ctx: rule.Context, path: str, **options: int) -> OrderedDict: """Read an iRODS data object and parse it as JSON.""" - return parse(data_object.read(callback, path), **options) + return parse(data_object.read(ctx, path), **options) -def write(callback, path, data, **options): +def write(ctx: rule.Context, path: str, data: Dict, **options: int) -> None: """Write a JSON object to an iRODS data object.""" - return data_object.write(callback, path, dump(data, **options)) + return data_object.write(ctx, path, dump(data, **options)) -def set_on_object(ctx, path, type, namespace, json_string): +def set_on_object(ctx: rule.Context, path: str, type: str, namespace: str, json_string: str) -> bool: """Write a JSON object as AVUs to an iRODS object. :param ctx: Combined type of a callback and rei struct diff --git a/util/log.py b/util/log.py index 7679b7023..f669e588b 100644 --- a/util/log.py +++ b/util/log.py @@ -16,7 +16,7 @@ import user -def write(ctx, message, write_stdout=False): +def write(ctx: rule.Context, message: str, write_stdout: bool = False) -> None: """Write a message to the log or stdout. Includes client name and originating module if writing to log. @@ -32,7 +32,7 @@ def write(ctx, message, write_stdout=False): _write(ctx, '[{}] {}'.format(module.__name__.replace("rules_uu.", ""), message)) -def _write(ctx, message): +def _write(ctx: rule.Context, message: str) -> None: """Write a message to the log, including the client name (intended for internal use). :param ctx: Combined type of a callback and rei struct @@ -44,7 +44,7 @@ def _write(ctx, message): ctx.writeString('serverLog', message) -def debug(ctx, message): +def debug(ctx: rule.Context, message: str) -> None: """"Write a message to the log, if in a development environment. :param ctx: Combined type of a callback and rei struct diff --git a/util/misc.py b/util/misc.py index 005a45f3a..4e8205047 100644 --- a/util/misc.py +++ b/util/misc.py @@ -6,19 +6,20 @@ import math import time from collections import OrderedDict +from typing import Dict import constants -def check_data_package_system_avus(extracted_avus): - """ - Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_'). +def check_data_package_system_avus(extracted_avus: Dict) -> Dict: + """Checks whether a data package has the expected system AVUs that start with constants.UUORGMETADATAPREFIX (i.e, 'org_'). + This function compares the AVUs of the provided data package against a set of ground truth AVUs derived from a successfully published data package. :param extracted_avus: AVUs of the data package in AVU form - :returns: Dictionary of the results of the check + :returns: Dictionary of the results of the check """ # Filter those starting with 'org_publication' extracted_avs = {} @@ -90,7 +91,7 @@ def check_data_package_system_avus(extracted_avus): return results -def last_run_time_acceptable(found, last_run, config_backoff_time): +def last_run_time_acceptable(found: bool, last_run: int, config_backoff_time: int) -> bool: """Return whether the last run time is acceptable to continue with task.""" now = int(time.time()) @@ -102,7 +103,7 @@ def last_run_time_acceptable(found, last_run, config_backoff_time): return True -def human_readable_size(size_bytes): +def human_readable_size(size_bytes: int) -> str: if size_bytes == 0: return "0 B" @@ -113,7 +114,7 @@ def human_readable_size(size_bytes): return '{} {}'.format(s, size_name[i]) -def remove_empty_objects(d): +def remove_empty_objects(d: Dict) -> Dict: """Remove empty objects (None, '', {}, []) from OrderedDict.""" if isinstance(d, dict): # Create OrderedDict to maintain order. diff --git a/util/msi.py b/util/msi.py index 7835fd34d..03dc0491d 100644 --- a/util/msi.py +++ b/util/msi.py @@ -8,15 +8,19 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Callable, Tuple, TYPE_CHECKING + import irods_types import error +if TYPE_CHECKING: + import rule class Error(error.UUError): """Error for microservice failure.""" - def __init__(self, message, msi_status, msi_code, msi_args, src_exception): + def __init__(self, message: str, msi_status: str, msi_code: str, msi_args: str, src_exception: str) -> None: super().__init__(message) # Store msi result, if any. # These may be None when an msi aborts in an abnormal way. @@ -25,7 +29,7 @@ def __init__(self, message, msi_status, msi_code, msi_args, src_exception): self.msi_args = msi_args self.src_exception = src_exception - def __str__(self): + def __str__(self) -> str: if self.msi_status is not None: return '{}: error code {}'.format(self.message, self.msi_code) elif self.src_exception is not None: @@ -36,13 +40,13 @@ def __str__(self): # Machinery for wrapping microservices and creating microservice-specific exceptions. {{{ -def make(name, error_text): +def make(name: str, error_text: str) -> Tuple[Callable, type]: """Create msi wrapper function and exception type as a tuple (see functions below).""" e = _make_exception(name, error_text) return (_wrap('msi' + name, e), e) -def _run(msi, exception, *args): +def _run(msi: str, exception: str, *args: str) -> str: """Run an MSI such that it throws an MSI-specific exception on failure.""" try: ret = msi(*args) @@ -60,7 +64,7 @@ def _run(msi, exception, *args): return ret -def _wrap(msi, exception): +def _wrap(msi: str, exception: str) -> Callable: """Wrap an MSI such that it throws an MSI-specific exception on failure. The arguments to the wrapper are the same as that of the msi, only with @@ -77,7 +81,7 @@ def _wrap(msi, exception): return lambda callback, *args: _run(getattr(callback, msi), exception, *args) -def _make_exception(name, message): +def _make_exception(name: str, message: str) -> type: """Create a msi Error subtype for a specific microservice.""" t = type('{}Error'.format(name), (Error,), {}) t.__init__ = lambda self, status, code, args, e = None: \ @@ -151,6 +155,6 @@ def _make_exception(name, message): obj_stat, ObjStatError = make('ObjStat', 'Could not get the stat of data object or collection') -def kvpair(ctx, k, v): +def kvpair(ctx: 'rule.Context', k: str, v: str) -> str: """Create a keyvalpair object, needed by certain msis.""" return string_2_key_val_pair(ctx, '{}={}'.format(k, v), irods_types.BytesBuf())['arguments'][1] diff --git a/util/pathutil.py b/util/pathutil.py index 5b2ab2fda..0e76bf0d5 100644 --- a/util/pathutil.py +++ b/util/pathutil.py @@ -6,7 +6,9 @@ __license__ = 'GPLv3, see LICENSE' import re +from collections import namedtuple from enum import Enum +from typing import List, Tuple class Space(Enum): @@ -20,7 +22,7 @@ class Space(Enum): INTAKE = 5 DEPOSIT = 6 - def __repr__(self): + def __repr__(self) -> str: return 'Space.' + self.name @@ -28,14 +30,14 @@ class ObjectType(Enum): COLL = 0 DATA = 1 - def __repr__(self): + def __repr__(self) -> str: return 'ObjectType.' + self.name - def __str__(self): + def __str__(self) -> str: return '-d' if self is ObjectType.DATA else '-C' -def chop(path): +def chop(path: str) -> Tuple[str, str]: """Split off the rightmost path component of a path. /a/b/c -> (/a/b, c) @@ -53,22 +55,22 @@ def chop(path): return '/'.join(x[:-1]), x[-1] -def dirname(path): +def dirname(path: str) -> str: """Return the dirname of a path.""" return chop(path)[0] # chops last component off -def basename(path): +def basename(path: str) -> str: """Return basename of a path.""" return chop(path)[1] # chops everything *but* the last component -def chopext(path): +def chopext(path: str) -> List[str]: """Return the extension of a path.""" return path.rsplit('.', 1) -def info(path): +def info(path: str) -> Tuple[Space, str, str, str]: """Parse a path into a (Space, zone, group, subpath) tuple. Synopsis: space, zone, group, subpath = pathutil.info(path) @@ -94,23 +96,21 @@ def info(path): :returns: Tuple with space, zone, group and subpath """ # Turn empty match groups into empty strings. - def f(x): + def f(x: str) -> str: return '' if x is None else x - def g(m, i): + def g(m: re.Match, i: int) -> str: return '' if i > len(m.groups()) else f(m.group(i)) - def result(s, m): + def result(s: Space, m: re.Match) -> Tuple[Space, str, str, str]: return (s, g(m, 1), g(m, 2), g(m, 3)) # Try a pattern and report success if it matches. - def test(r, space): + def test(r: str, space: Space) -> Tuple[Space, str, str, str] | None: m = re.match(r, path) return m and result(space, m) - from collections import namedtuple - - return (namedtuple('PathInfo', 'space zone group subpath'.split()) + return (namedtuple('PathInfo', ['space', 'zone', 'group', 'subpath']) # type: ignore[call-arg] (*test('^/([^/]+)/home/(vault-[^/]+)(?:/(.+))?$', Space.VAULT) or test('^/([^/]+)/home/(research-[^/]+)(?:/(.+))?$', Space.RESEARCH) or test('^/([^/]+)/home/(deposit-[^/]+)(?:/(.+))?$', Space.DEPOSIT) diff --git a/util/policy.py b/util/policy.py index edf3523ac..149b6b863 100644 --- a/util/policy.py +++ b/util/policy.py @@ -3,6 +3,8 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import Callable, Tuple + import api import log import rule @@ -14,10 +16,10 @@ class Succeed: Evaluates to True in boolean context. """ - def __str__(self): + def __str__(self) -> str: return 'Action permitted' - def __bool__(self): + def __bool__(self) -> bool: return True __nonzero__ = __bool__ @@ -31,13 +33,13 @@ class Fail: Evaluates to False in boolean context. """ - def __init__(self, reason): + def __init__(self, reason: str) -> None: self.reason = reason - def __str__(self): + def __str__(self) -> str: return 'Action not permitted: ' + self.reason - def __bool__(self): + def __bool__(self) -> bool: return False __nonzero__ = __bool__ @@ -47,14 +49,14 @@ def __bool__(self): succeed = Succeed -def all(*x): +def all(*x: Tuple[Succeed | Fail]) -> Succeed | Fail: for i in x: if not i: return i return succeed() -def require(): +def require() -> Callable: """Turn a function into a PEP rule that fails unless policy.succeed() is returned. The function must explicitly return policy.succeed() or .fail('reason') as a result. @@ -62,9 +64,9 @@ def require(): :returns: Decorator to turn a function into a PEP rule that fails unless policy.succeed() is returned """ - def deco(f): + def deco(f: Callable) -> Callable: @rule.make(outputs=[]) - def r(ctx, *args): + def r(ctx: rule.Context, *args: str) -> None: """Execute a function as a PEP rule. :param ctx: Combined type of a callback and rei struct diff --git a/util/resource.py b/util/resource.py index 66ed0183d..982c59d0a 100644 --- a/util/resource.py +++ b/util/resource.py @@ -3,17 +3,21 @@ __copyright__ = 'Copyright (c) 2019-2024, Utrecht University' __license__ = 'GPLv3, see LICENSE' +from typing import List, TYPE_CHECKING + import genquery +if TYPE_CHECKING: + import rule -def exists(ctx, name): +def exists(ctx: 'rule.Context', name: str) -> bool: """Check if a resource with a given name exists.""" return len(list(genquery.row_iterator( "RESC_ID", "RESC_NAME = '{}'".format(name), genquery.AS_LIST, ctx))) > 0 -def id_from_name(ctx, resc_name): +def id_from_name(ctx: 'rule.Context', resc_name: str) -> str: """Get resource ID from resource name. :param ctx: Combined type of a callback and rei struct @@ -24,7 +28,7 @@ def id_from_name(ctx, resc_name): return genquery.Query(ctx, ["RESC_ID"], "RESC_NAME = '{}'".format(resc_name)).first() -def name_from_id(ctx, resc_id): +def name_from_id(ctx: 'rule.Context', resc_id: str) -> str: """Get resource name from resource ID. :param ctx: Combined type of a callback and rei struct @@ -35,7 +39,7 @@ def name_from_id(ctx, resc_id): return genquery.Query(ctx, ["RESC_NAME"], "RESC_ID = '{}'".format(resc_id)).first() -def get_parent_by_id(ctx, resc_id): +def get_parent_by_id(ctx: 'rule.Context', resc_id: str) -> str | None: """Get resource parent ID from resource ID :param ctx: Combined type of a callback and rei struct @@ -47,7 +51,7 @@ def get_parent_by_id(ctx, resc_id): return None if result == "" else result -def get_parent_by_name(ctx, resc_name): +def get_parent_by_name(ctx: 'rule.Context', resc_name: str) -> str | None: """Get resource parent name from resource name :param ctx: Combined type of a callback and rei struct @@ -60,7 +64,7 @@ def get_parent_by_name(ctx, resc_name): return None if parent_resource_id is None else name_from_id(ctx, parent_resource_id) -def get_children_by_id(ctx, resc_id): +def get_children_by_id(ctx: 'rule.Context', resc_id: str) -> List: """Get resource children IDs from resource ID :param ctx: Combined type of a callback and rei struct @@ -77,7 +81,7 @@ def get_children_by_id(ctx, resc_id): return [r[0] for r in result] -def get_children_by_name(ctx, resc_name): +def get_children_by_name(ctx: 'rule.Context', resc_name: str) -> List: """Get resource children names from resource name :param ctx: Combined type of a callback and rei struct @@ -90,7 +94,7 @@ def get_children_by_name(ctx, resc_name): return [name_from_id(ctx, child_id) for child_id in child_resource_ids] -def get_type_by_id(ctx, resc_id): +def get_type_by_id(ctx: 'rule.Context', resc_id: str) -> str: """Get resource type from resource ID :param ctx: Combined type of a callback and rei struct @@ -101,7 +105,7 @@ def get_type_by_id(ctx, resc_id): return genquery.Query(ctx, ["RESC_TYPE_NAME"], "RESC_ID = '{}'".format(resc_id)).first() -def get_type_by_name(ctx, resc_name): +def get_type_by_name(ctx: 'rule.Context', resc_name: str) -> str: """Get resource type from resource name :param ctx: Combined type of a callback and rei struct @@ -112,13 +116,13 @@ def get_type_by_name(ctx, resc_name): return genquery.Query(ctx, ["RESC_TYPE_NAME"], "RESC_NAME = '{}'".format(resc_name)).first() -def get_resource_names_by_type(ctx, resc_type): +def get_resource_names_by_type(ctx: 'rule.Context', resc_type: str) -> List: """Get resource names by type :param ctx: Combined type of a callback and rei struct :param resc_type: Resource type (e.g. "passthru" or "unixfilesystem") - :returns: List of matching resource names + :returns: List of matching resource names """ result = list(genquery.row_iterator( "RESC_NAME", @@ -127,12 +131,12 @@ def get_resource_names_by_type(ctx, resc_type): return [r[0] for r in result] -def get_all_resource_names(ctx): +def get_all_resource_names(ctx: 'rule.Context') -> List: """Get a list of all resource names - :param ctx: Combined type of a callback and rei struct + :param ctx: Combined type of a callback and rei struct - :returns: list of all resource names + :returns: list of all resource names """ result = list(genquery.row_iterator( "RESC_NAME", diff --git a/util/rule.py b/util/rule.py index c553e48e6..a1ae4fcb1 100644 --- a/util/rule.py +++ b/util/rule.py @@ -5,6 +5,7 @@ import json from enum import Enum +from typing import Callable, Dict, List class Context: @@ -13,11 +14,11 @@ class Context: `Context` can be treated as a rule engine callback for all intents and purposes. However @rule and @api functions that need access to the rei, can do so through this object. """ - def __init__(self, callback, rei): + def __init__(self, callback: object, rei: object) -> None: self.callback = callback self.rei = rei - def __getattr__(self, name): + def __getattr__(self, name: str) -> object: """Allow accessing the callback directly.""" return getattr(self.callback, name) @@ -29,7 +30,7 @@ class Output(Enum): STDOUT_BIN = 2 # write to stdout, without a trailing newline -def make(inputs=None, outputs=None, transform=lambda x: x, handler=Output.STORE): +def make(inputs: List | None = None, outputs: List | None = None, transform: Callable = lambda x: x, handler: Output = Output.STORE) -> Callable: """Create a rule (with iRODS calling conventions) from a Python function. :param inputs: Optional list of rule_args indices to influence how parameters are passed to the function. @@ -75,7 +76,7 @@ def foo(rule_args, callback, rei): :returns: Decorator to create a rule from a Python function """ - def encode_val(v): + def encode_val(v: str | int | List | Dict) -> str: """Encode a value such that it can be safely transported in rule_args, as output.""" if type(v) is str: return v @@ -84,8 +85,8 @@ def encode_val(v): # note: the result of encoding e.g. int(5) should be equal to str(int(5)). return json.dumps(v) - def deco(f): - def r(rule_args, callback, rei): + def deco(f: Callable) -> Callable: + def r(rule_args: List, callback: object, rei: object) -> None: a = rule_args if inputs is None else [rule_args[i] for i in inputs] result = f(Context(callback, rei), *a) diff --git a/util/spool.py b/util/spool.py index d23ede0ab..59dd41a3c 100644 --- a/util/spool.py +++ b/util/spool.py @@ -11,6 +11,7 @@ """ import os +from typing import Iterable import persistqueue import persistqueue.serializers.json @@ -18,11 +19,11 @@ import constants -def get_spool_data(process): +def get_spool_data(process: str) -> object | None: """Retrieves one data object for a given batch process for processing. This function is non-blocking. - :param process: Spool process name (see util.constants for defined names) + :param process: Spool process name (see util.constants for defined names) :returns: Spool data object, or None if there is no spool data for this process """ @@ -38,12 +39,12 @@ def get_spool_data(process): return result -def put_spool_data(process, data_list): +def put_spool_data(process: str, data_list: Iterable) -> None: """Stores data structures in the spooling subsystem for batch processing. - :param process: Spool process name (see util.constants for defined names) - :param data_list: List (or other iterable) of arbitrary serializable data objects to store - in the spooling system + :param process: Spool process name (see util.constants for defined names) + :param data_list: List (or other iterable) of arbitrary serializable data objects to store + in the spooling system """ _ensure_spool_process_initialized(process) q = _get_spool_queue(process) @@ -51,43 +52,43 @@ def put_spool_data(process, data_list): q.put(data) -def has_spool_data(process): - """ Checks whether there any data objects in the spool system for a given process +def has_spool_data(process: str) -> bool: + """Checks whether there any data objects in the spool system for a given process. - :param process: Spool process name (see util.constants for defined names) + :param process: Spool process name (see util.constants for defined names) - :returns: Boolean value that represents whether there is any spool data - present for this process + :returns: Boolean value that represents whether there is any spool data + present for this process """ return num_spool_data(process) > 0 -def num_spool_data(process): - """ Returns the number of items in the spool system for a given process +def num_spool_data(process: str) -> int: + """Returns the number of items in the spool system for a given process. - :param process: Spool process name (see util.constants for defined names) + :param process: Spool process name (see util.constants for defined names) - :returns: The number of data items in the spool system for this process + :returns: The number of data items in the spool system for this process """ _ensure_spool_process_initialized(process) return _get_spool_queue(process).qsize() -def _get_spool_directory(process): +def _get_spool_directory(process: str) -> str: if process in constants.SPOOL_PROCESSES: return os.path.join(constants.SPOOL_MAIN_DIRECTORY, process, "spool") else: raise Exception("Spool process {} not found.".format(process)) -def _get_temp_directory(process): +def _get_temp_directory(process: str) -> str: if process in constants.SPOOL_PROCESSES: return os.path.join(constants.SPOOL_MAIN_DIRECTORY, process, "tmp") else: raise Exception("Spool process {} not found.".format(process)) -def _get_spool_queue(process): +def _get_spool_queue(process: str) -> persistqueue.Queue: directory = _get_spool_directory(process) # JSON serialization is used to make it easier to examine spooled objects manually return persistqueue.Queue(directory, @@ -96,7 +97,7 @@ def _get_spool_queue(process): chunksize=1) -def _ensure_spool_process_initialized(process): +def _ensure_spool_process_initialized(process: str) -> None: if process not in constants.SPOOL_PROCESSES: raise Exception("Spool process {} not found.".format(process)) diff --git a/util/user.py b/util/user.py index 887cc96e3..6ca228c3f 100644 --- a/util/user.py +++ b/util/user.py @@ -5,39 +5,42 @@ import subprocess from collections import namedtuple +from typing import TYPE_CHECKING import genquery import session_vars import log +if TYPE_CHECKING: + import rule # User is a tuple consisting of a name and a zone, which stringifies into 'user#zone'. User = namedtuple('User', ['name', 'zone']) User.__str__ = lambda self: '{}#{}'.format(*self) -def user_and_zone(ctx): +def user_and_zone(ctx: 'rule.Context') -> User: """Obtain client name and zone.""" client = session_vars.get_map(ctx.rei)['client_user'] return User(client['user_name'], client['irods_zone']) -def full_name(ctx): +def full_name(ctx: 'rule.Context') -> str: """Obtain client name and zone, formatted as a 'x#y' string.""" return str(user_and_zone(ctx)) -def name(ctx): +def name(ctx: 'rule.Context') -> str: """Get the name of the client user.""" return session_vars.get_map(ctx.rei)['client_user']['user_name'] -def zone(ctx): +def zone(ctx: 'rule.Context') -> str: """Get the zone of the client user.""" return session_vars.get_map(ctx.rei)['client_user']['irods_zone'] -def from_str(ctx, s): +def from_str(ctx: 'rule.Context', s: str) -> User: """Create a (user,zone) tuple from a user[#zone] string. If no zone is present in the string, the client's zone is used. @@ -55,7 +58,7 @@ def from_str(ctx, s): return User(*parts) -def exists(ctx, user): +def exists(ctx: 'rule.Context', user: str | User) -> bool: """Check if a user ('rodsuser' or 'rodsadmin') exists. :param ctx: Combined type of a callback and rei struct @@ -69,7 +72,7 @@ def exists(ctx, user): return genquery.Query(ctx, "USER_TYPE", "USER_NAME = '{}' AND USER_ZONE = '{}'".format(*user)).first() in ["rodsuser", "rodsadmin"] -def user_type(ctx, user=None): +def user_type(ctx: 'rule.Context', user: str | User | None = None) -> str: """Return the user type ('rodsuser' or 'rodsadmin') for the given user, or the client user if no user is given. If the user does not exist, None is returned. @@ -88,12 +91,12 @@ def user_type(ctx, user=None): "USER_NAME = '{}' AND USER_ZONE = '{}'".format(*user)).first() -def is_admin(ctx, user=None): +def is_admin(ctx: 'rule.Context', user: str | User | None = None) -> bool: """Check if user is an admin.""" return user_type(ctx, user) == 'rodsadmin' -def is_member_of(ctx, group, user=None): +def is_member_of(ctx: 'rule.Context', group: str, user: str | User | None = None) -> bool: """Check if user is member of given group.""" if user is None: user = user_and_zone(ctx) @@ -105,7 +108,7 @@ def is_member_of(ctx, group, user=None): .format(*list(user) + [group])).first() is not None -def name_from_id(ctx, user_id): +def name_from_id(ctx: 'rule.Context', user_id: str) -> str: """Retrieve username from user ID.""" for row in genquery.row_iterator("USER_NAME", "USER_ID = '{}'".format(user_id), @@ -114,7 +117,7 @@ def name_from_id(ctx, user_id): return '' -def number_of_connections(ctx): +def number_of_connections(ctx: 'rule.Context') -> int: """Get number of active connections from client user.""" connections = 0 try: diff --git a/util/yoda_names.py b/util/yoda_names.py index f1f295565..ebc1e6d6e 100644 --- a/util/yoda_names.py +++ b/util/yoda_names.py @@ -6,11 +6,12 @@ import re from datetime import datetime +from typing import List from config import config -def is_valid_category(name): +def is_valid_category(name: str) -> bool: """Is this name a valid category name? :param name: name of the category @@ -20,7 +21,7 @@ def is_valid_category(name): return re.search(r"^[a-zA-Z0-9\-_]+$", name) is not None -def is_valid_subcategory(name): +def is_valid_subcategory(name: str) -> bool: """Is this name a valid subcategory name? :param name: name of the subcategory @@ -30,7 +31,7 @@ def is_valid_subcategory(name): return is_valid_category(name) -def is_valid_groupname(name): +def is_valid_groupname(name: str) -> bool: """Is this name a valid group name :param name: name of the group @@ -40,28 +41,27 @@ def is_valid_groupname(name): return re.search(r"^[a-zA-Z0-9\-]+$", name) is not None and len(name) < 64 -def is_email_username(name): +def is_email_username(name: str) -> bool: """Is this name a valid email username? - :param name: name of the user + :param name: name of the user - :returns: boolean value that indicates whether this name is a valid email username + :returns: boolean value that indicates whether this name is a valid email username """ return re.search(r'@.*[^\.]+\.[^\.]+$', name) is not None -def is_internal_user(username): - """Determines if a username refers to an internal user (a user in one of - the internal domains) +def is_internal_user(username: str) -> bool: + """Determines if a username refers to an internal user (a user in one of the internal domains) - :param username: name of the user + :param username: name of the user - :returns: boolean value that indicates whether this username refers to an internal user -""" + :returns: boolean value that indicates whether this username refers to an internal user + """ return _is_internal_user(username, config.external_users_domain_filter) -def _is_internal_user(username, external_domain_filter): +def _is_internal_user(username: str, external_domain_filter: List) -> bool: if '@' not in username: return True @@ -76,7 +76,7 @@ def _is_internal_user(username, external_domain_filter): return False -def is_valid_expiration_date(expiration_date): +def is_valid_expiration_date(expiration_date: str) -> bool: """Validation of expiration date. :param expiration_date: String containing date that has to be validated @@ -99,7 +99,7 @@ def is_valid_expiration_date(expiration_date): return False -def is_valid_schema_id(schema_id): +def is_valid_schema_id(schema_id: str) -> bool: """Is this schema at least a correctly formatted schema-id?""" if schema_id == "": return True