UtrechtUniversity · lwesterhof · Dec 11, 2024 · Nov 19, 2024 · Nov 19, 2024 · Nov 19, 2024
diff --git a/.github/workflows/api-and-integration-tests.yml b/.github/workflows/api-and-integration-tests.yml
@@ -114,7 +114,7 @@ jobs:
         cd tests
         nohup bash -c 'while true ; do sleep 5 ;  ../yoda/docker/run-cronjob.sh copytovault >> ../copytovault.log 2>&1 ; ../yoda/docker/run-cronjob.sh publication >> ../publication.log 2>&1 ; done' &
         test -d mycache || mkdir -p mycache
-        python3 -m pytest --skip-ui --datarequest --deposit -o cache_dir=mycache --environment environments/docker.json
+        python3 -m pytest --skip-ui --deposit -o cache_dir=mycache --environment environments/docker.json
         cat ../copytovault.log
         cat ../publication.log
 

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
@@ -4,10 +4,10 @@ on: [push, pull_request]
 
 jobs:
   lint:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10', '3.11']
+        python-version: ['3.11', '3.12']
     steps:
       - uses: actions/checkout@v4
       - name: Set up Python
@@ -19,12 +19,17 @@ jobs:
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
-          python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell types-requests
+          python -m pip install flake8==6.0.0 flake8-import-order==0.18.2 darglint==1.8.1 codespell
+          python -m pip install mypy types-requests types-python-dateutil types-redis
 
       - name: Lint with flake8
         run: |
           flake8 --statistics
 
+      - name: Check static typing
+        run: |
+          mypy . --explicit-package-bases
+
       - name: Check code for common misspellings
         run: |
           codespell -q 3 --skip="*.r,*.xsd,*.json" || true

diff --git a/.github/workflows/python2.yml b/.github/workflows/python2.yml
diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml
@@ -10,31 +10,29 @@ on:
 
 jobs:
   unit-tests:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-24.04
     strategy:
       matrix:
-        python-version: [2.7]
+        python-version: ['3.12']
     steps:
       - uses: actions/checkout@v4
-
       - name: Set up Python
-        # setup-python stopped supporting Python 2.7, use https://github.com/MatteoH2O1999/setup-python
-        uses: MatteoH2O1999/[email protected]
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-          allow-build: info
-          cache-build: true
+          architecture: x64
 
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           python -m pip install -r requirements.txt
-          python -m pip install coveragepy==1.6.0
+          python -m pip install coverage==7.6.7
 
       - name: Run unit tests
         run: |
           cd unit-tests
-          coverage run --omit=test_*.py,unit_tests.py --source=$(cd .. ; pwd),$(cd ../util ; pwd) -m unittest unit_tests
+          export PYTHONPATH=$(cd ../util ; pwd):$PYTHONPATH
+          coverage run --omit=test_*.py,unit_tests.py -m unittest unit_tests
 
       - name: Report code coverage
         run: |

diff --git a/__init__.py b/__init__.py
@@ -1,7 +1,6 @@
-# -*- coding: utf-8 -*-
 """Yoda core ruleset containing iRODS and Python rules and policies useful for all Yoda environments."""
 
-__version__   = '1.10.0'
+__version__   = '2.0.0'
 __copyright__ = 'Copyright (c) 2015-2024, Utrecht University'
 __license__   = 'GPLv3, see LICENSE'
 
@@ -23,6 +22,9 @@
               + ', Jelmer Zondergeld')
 # (in alphabetical order)
 
+import sys
+sys.path.extend([ '/etc/irods/rules_uu', '/etc/irods/rules_uu/util' ])
+
 # Import all modules containing rules into the package namespace,
 # so that they become visible to iRODS.
 

diff --git a/admin.py b/admin.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """Functions for admin module."""
 
 __copyright__ = 'Copyright 2024, Utrecht University'
@@ -12,7 +11,7 @@
 
 
 @api.make()
-def api_admin_has_access(ctx):
+def api_admin_has_access(ctx: rule.Context) -> api.Result:
     """
     Checks if the user has admin access based on user rights or membership in admin-priv group.
 

diff --git a/browse.py b/browse.py
@@ -1,11 +1,11 @@
-# -*- coding: utf-8 -*-
 """Functions for listing collection information."""
 
 __copyright__ = 'Copyright (c) 2019-2024, Utrecht University'
 __license__   = 'GPLv3, see LICENSE'
 
 import re
 from collections import OrderedDict
+from typing import Dict
 
 import magic
 from genquery import AS_DICT, Query
@@ -19,13 +19,13 @@
 
 
 @api.make()
-def api_browse_folder(ctx,
-                      coll='/',
-                      sort_on='name',
-                      sort_order='asc',
-                      offset=0,
-                      limit=10,
-                      space=pathutil.Space.OTHER.value):
+def api_browse_folder(ctx: rule.Context,
+                      coll: str = '/',
+                      sort_on: str = 'name',
+                      sort_order: str = 'asc',
+                      offset: int = 0,
+                      limit: int = 10,
+                      space: str = pathutil.Space.OTHER.value) -> api.Result:
     """Get paginated collection contents, including size/modify date information.
 
     :param ctx:        Combined type of a callback and rei struct
@@ -38,9 +38,9 @@ def api_browse_folder(ctx,
 
     :returns: Dict with paginated collection contents
     """
-    def transform(row):
+    def transform(row: Dict) -> Dict:
         # Remove ORDER_BY etc. wrappers from column names.
-        x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
+        x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()}
         if 'DATA_NAME' in x and 'META_DATA_ATTR_VALUE' in x:
             return {x['DATA_NAME']: x['META_DATA_ATTR_VALUE']}
         elif 'DATA_NAME' in x:
@@ -89,11 +89,11 @@ def transform(row):
         qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll),
                       offset=offset, limit=limit, output=AS_DICT)
 
-    colls = map(transform, [c for c in list(qcoll) if _filter_vault_deposit_index(c)])
+    colls = list(map(transform, [c for c in list(qcoll) if _filter_vault_deposit_index(c)]))
 
     qdata = Query(ctx, dcols, "COLL_NAME = '{}' AND DATA_REPL_STATUS n> '0'".format(coll),
                   offset=max(0, offset - qcoll.total_rows()), limit=limit - len(colls), output=AS_DICT)
-    datas = map(transform, list(qdata))
+    datas = list(map(transform, list(qdata)))
 
     # No results at all? Make sure the collection actually exists.
     if len(colls) + len(datas) == 0 and not collection.exists(ctx, coll):
@@ -105,13 +105,13 @@ def transform(row):
 
 
 @api.make()
-def api_browse_collections(ctx,
-                           coll='/',
-                           sort_on='name',
-                           sort_order='asc',
-                           offset=0,
-                           limit=10,
-                           space=pathutil.Space.OTHER.value):
+def api_browse_collections(ctx: rule.Context,
+                           coll: str = '/',
+                           sort_on: str = 'name',
+                           sort_order: str = 'asc',
+                           offset: int = 0,
+                           limit: int = 10,
+                           space: str = pathutil.Space.OTHER.value) -> api.Result:
     """Get paginated collection contents, including size/modify date information.
 
     This function browses a folder and only looks at the collections in it. No dataobjects.
@@ -127,9 +127,9 @@ def api_browse_collections(ctx,
 
     :returns: Dict with paginated collection contents
     """
-    def transform(row):
+    def transform(row: Dict) -> Dict:
         # Remove ORDER_BY etc. wrappers from column names.
-        x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
+        x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()}
 
         if 'DATA_NAME' in x:
             return {'name':        x['DATA_NAME'],
@@ -173,7 +173,7 @@ def transform(row):
         qcoll = Query(ctx, ccols, "COLL_PARENT_NAME = '{}'".format(coll),
                       offset=offset, limit=limit, output=AS_DICT)
 
-    colls = map(transform, [d for d in list(qcoll) if _filter_vault_deposit_index(d)])
+    colls = list(map(transform, [d for d in list(qcoll) if _filter_vault_deposit_index(d)]))
 
     # No results at all? Make sure the collection actually exists.
     if len(colls) == 0 and not collection.exists(ctx, coll):
@@ -185,13 +185,13 @@ def transform(row):
 
 
 @api.make()
-def api_search(ctx,
-               search_string,
-               search_type='filename',
-               sort_on='name',
-               sort_order='asc',
-               offset=0,
-               limit=10):
+def api_search(ctx: rule.Context,
+               search_string: str,
+               search_type: str = 'filename',
+               sort_on: str = 'name',
+               sort_order: str = 'asc',
+               offset: int = 0,
+               limit: int = 10) -> api.Result:
     """Get paginated search results, including size/modify date/location information.
 
     :param ctx:           Combined type of a callback and rei struct
@@ -204,9 +204,9 @@ def api_search(ctx,
 
     :returns: Dict with paginated search results
     """
-    def transform(row):
+    def transform(row: Dict) -> Dict:
         # Remove ORDER_BY etc. wrappers from column names.
-        x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
+        x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()}
 
         if 'DATA_NAME' in x:
             _, _, path, subpath = pathutil.info(x['COLL_NAME'])
@@ -217,23 +217,24 @@ def transform(row):
                     'type':        'data',
                     'size':        int(x['DATA_SIZE']),
                     'modify_time': int(x['DATA_MODIFY_TIME'])}
-
-        if 'COLL_NAME' in x:
+        elif 'COLL_NAME' in x:
             _, _, path, subpath = pathutil.info(x['COLL_NAME'])
             if subpath != '':
                 path = path + "/" + subpath
 
             return {'name':        "/{}".format(path),
                     'type':        'coll',
                     'modify_time': int(x['COLL_MODIFY_TIME'])}
+        else:
+            return {}
 
     # Replace, %, _ and \ since iRODS does not handle those correctly.
     # HdR this can only be done in a situation where search_type is NOT status!
     # Status description must be kept in tact.
     if search_type != 'status':
         search_string = search_string.replace("\\", "\\\\")
-        search_string = search_string.replace("%", "\%")
-        search_string = search_string.replace("_", "\_")
+        search_string = search_string.replace("%", r"\%")
+        search_string = search_string.replace("_", r"\_")
 
     zone = user.zone(ctx)
 
@@ -280,13 +281,13 @@ def transform(row):
     qdata = Query(ctx, cols, where, offset=max(0, int(offset)),
                   limit=int(limit), case_sensitive=query_is_case_sensitive, output=AS_DICT)
 
-    datas = map(transform, [d for d in list(qdata) if _filter_vault_deposit_index(d)])
+    datas = list(map(transform, [d for d in list(qdata) if _filter_vault_deposit_index(d)]))
 
     return OrderedDict([('total', qdata.total_rows()),
                         ('items', datas)])
 
 
-def _filter_vault_deposit_index(row):
+def _filter_vault_deposit_index(row: Dict) -> bool:
     """This internal function filters out index collections in deposit vault collections.
        These collections are used internally by Yoda for indexing data package metadata, and
        should not be displayed.
@@ -296,14 +297,14 @@ def _filter_vault_deposit_index(row):
        :returns: boolean value that indicates whether row should be displayed
     """
     # Remove ORDER_BY etc. wrappers from column names.
-    x = {re.sub('.*\((.*)\)', '\\1', k): v for k, v in row.items()}
+    x = {re.sub(r'.*\((.*)\)', '\\1', k): v for k, v in row.items()}
     # Filter out deposit vault index collection
     return not re.match("^/[^/]+/home/vault-[^/]+/deposit-[^/]+/index$",
                         x['COLL_NAME'])
 
 
 @api.make()
-def api_load_text_obj(ctx, file_path='/'):
+def api_load_text_obj(ctx: rule.Context, file_path: str = '/') -> api.Result:
     """Retrieve a text file (as a string) in either the research, deposit, or vault space.
 
     :param ctx:       Combined type of a callback and rei struct
@@ -345,3 +346,5 @@ def api_load_text_obj(ctx, file_path='/'):
         return api.Error('large_size', 'The given text file is too large to render')
     except error.UUError:
         return api.Error('ReadError', 'Could not retrieve file')
+    except Exception:
+        return api.Error('not_valid', 'The given data object is not a text file')