Skip to content

Commit

Permalink
YDA-5506 Extract utility functions for Yoda names
Browse files Browse the repository at this point in the history
Move utility functions for names of Yoda entities (e.g. categories, user
names, etc) to utils and add unit tests.

This also standardizes the function for determining whether a user is
internal or external between the group manager and the statistics
module.
  • Loading branch information
stsnel committed Oct 19, 2023
1 parent 840c43c commit ace5ad9
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 58 deletions.
36 changes: 5 additions & 31 deletions groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,18 +742,6 @@ def _get_duplicate_columns(fields_list):

def _process_csv_line(ctx, line):
"""Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers."""
def is_email(username):
"""Is this email a valid email?"""
return re.search(r'@.*[^\.]+\.[^\.]+$', username) is not None

def is_valid_category(name):
"""Is this name a valid (sub)category name?"""
return re.search(r"^[a-zA-Z0-9\-_]+$", name) is not None

def is_valid_groupname(name):
"""Is this name a valid group name (prefix such as "research-" can be omitted"""
return re.search(r"^[a-zA-Z0-9\-]+$", name) is not None

category = line['category'].strip().lower().replace('.', '')
subcategory = line['subcategory'].strip()
groupname = "research-" + line['groupname'].strip().lower()
Expand All @@ -776,7 +764,7 @@ def is_valid_groupname(name):

if username == '': # empty value
continue
elif not is_email(username):
elif not yoda_names.is_email_username(username):
return None, 'Username "{}" is not a valid email address.'.format(
username)
# elif not is_valid_domain(username.split('@')[1]):
Expand All @@ -794,13 +782,13 @@ def is_valid_groupname(name):
if len(managers) == 0:
return None, "Group must have a group manager"

if not is_valid_category(category):
if not yoda_names.is_valid_category(category):
return None, '"{}" is not a valid category name.'.format(category)

if not is_valid_category(subcategory):
if not yoda_names.is_valid_subcategory(subcategory):
return None, '"{}" is not a valid subcategory name.'.format(subcategory)

if not is_valid_groupname(groupname):
if not yoda_names.is_valid_groupname(groupname):
return None, '"{}" is not a valid group name.'.format(groupname)

row_data = (category, subcategory, groupname, managers, members, viewers)
Expand Down Expand Up @@ -978,20 +966,6 @@ def rule_group_remove_external_user(rule_args, ctx, rei):
log.write(ctx, removeExternalUser(ctx, rule_args[0], rule_args[1]))


def is_internal_user(username):
for domain in config.external_users_domain_filter:
parts = domain.split('.')
if parts[0] == '*':
# Wildcard - search including subdomains
domain_pattern = "\@([0-9a-z]*\.){0,2}" + parts[-2] + "\." + parts[-1]
else:
# No wildcard - search for exact match
domain_pattern = "@{}$".format(domain)
if re.search(domain_pattern, username) is not None:
return True
return False


@rule.make(inputs=[0], outputs=[1])
def rule_group_check_external_user(ctx, username):
"""Check that a user is external.
Expand All @@ -1001,7 +975,7 @@ def rule_group_check_external_user(ctx, username):
:returns: String indicating if user is external ('1': yes, '0': no)
"""
if is_internal_user(username):
if yoda_names.is_internal_user(username):
return '0'
return '1'

Expand Down
28 changes: 2 additions & 26 deletions resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
__copyright__ = 'Copyright (c) 2018-2023, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

import re
from datetime import datetime

import genquery
Expand Down Expand Up @@ -267,34 +266,11 @@ def api_resource_category_stats(ctx):

cat_members['YODA_INSTANCE_TOTAL'] = list(set(members_total))

def is_internal_user(username):
if '@' not in username:
return (username != 'anonymous')
for domain in config.external_users_domain_filter:
parts = domain.split('.')
if parts[0] == '*':
# Wildcard - search including subdomains
domain_pattern = "\@([0-9a-z]*\.){0,2}" + parts[-2] + "\." + parts[-1]
else:
# No wildcard - search for exact match
domain_pattern = "@{}$".format(domain)
if re.search(domain_pattern, username) is not None:
return True
return False

def count_externals(members):
count = 0
for mb in members:
if not is_internal_user(mb):
count += 1
return count
return len([member for member in members if not yoda_names.is_internal_user(member)])

def count_internals(members):
count = 0
for mb in members:
if is_internal_user(mb):
count += 1
return count
return len([member for member in members if yoda_names.is_internal_user(member)])

for category in categories:
storage_humanized = {}
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ strictness=short
docstring_style=sphinx
max-line-length=127
exclude=__init__.py,tools,tests/env/
application-import-names=avu_json,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,intake,intake_dataset,intake_lock,intake_scan,intake_utils,intake_vault,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource
application-import-names=avu_json,conftest,util,api,config,constants,data_access_token,datacite,datarequest,data_object,epic,error,folder,groups,intake,intake_dataset,intake_lock,intake_scan,intake_utils,intake_vault,json_datacite,json_landing_page,jsonutil,log,mail,meta,meta_form,msi,notifications,schema,schema_transformation,schema_transformations,settings,pathutil,provenance,policies_intake,policies_datamanager,policies_datapackage_status,policies_folder_status,policies_datarequest_status,publication,query,replication,revisions,rule,user,vault,sram,arb_data_manager,cached_data_manager,resource,yoda_names
52 changes: 52 additions & 0 deletions unit-tests/test_util_yoda_names.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- coding: utf-8 -*-
"""Unit tests for the yoda_names utils functions"""

__copyright__ = 'Copyright (c) 2023, Utrecht University'
__license__ = 'GPLv3, see LICENSE'

import sys
from unittest import TestCase

sys.path.append('../util')

from yoda_names import _is_internal_user, is_email_username, is_valid_category, is_valid_groupname, is_valid_subcategory


class UtilYodaNamesTest(TestCase):

def test_is_valid_category(self):
self.assertEquals(is_valid_category(""), False)
self.assertEquals(is_valid_category("foo"), True)
self.assertEquals(is_valid_category("foo123"), True)
self.assertEquals(is_valid_category("foo-bar"), True)
self.assertEquals(is_valid_category("foo_bar"), True)

def test_is_valid_subcategory(self):
self.assertEquals(is_valid_subcategory(""), False)
self.assertEquals(is_valid_subcategory("foo"), True)
self.assertEquals(is_valid_subcategory("foo123"), True)
self.assertEquals(is_valid_subcategory("foo-bar"), True)
self.assertEquals(is_valid_subcategory("foo_bar"), True)

def test_is_valid_groupname(self):
self.assertEquals(is_valid_groupname(""), False)
self.assertEquals(is_valid_groupname("foo"), True)
self.assertEquals(is_valid_groupname("foo123"), True)
self.assertEquals(is_valid_groupname("foo-bar"), True)
self.assertEquals(is_valid_groupname("foo_bar"), False)

def test_is_email_username(self):
self.assertEquals(is_email_username("peter"), False)
self.assertEquals(is_email_username("[email protected]"), True)

def test_is_internal_user(self):
self.assertEquals(_is_internal_user("peter", ["uu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["uu.nl"]), True)
self.assertEquals(_is_internal_user("[email protected]", ["uu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["uu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["buu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["*.uu.nl"]), True)
self.assertEquals(_is_internal_user("[email protected]", ["*.uu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["*.uu.nl"]), False)
self.assertEquals(_is_internal_user("[email protected]", ["*.uu.nl"]), True)
self.assertEquals(_is_internal_user("[email protected]", ["*.cs.uu.nl"]), True)
2 changes: 2 additions & 0 deletions unit-tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@

from test_intake import IntakeTest
from test_util_misc import UtilMiscTest
from test_util_yoda_names import UtilYodaNamesTest


def suite():
test_suite = TestSuite()
test_suite.addTest(makeSuite(IntakeTest))
test_suite.addTest(makeSuite(UtilMiscTest))
test_suite.addTest(makeSuite(UtilYodaNamesTest))
return test_suite
1 change: 1 addition & 0 deletions util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import resource
import arb_data_manager
import cached_data_manager
import yoda_names

# Config items can be accessed directly as 'config.foo' by any module
# that imports * from util.
Expand Down

0 comments on commit ace5ad9

Please sign in to comment.