YDA-5512: Remove suffixes in import csv

UtrechtUniversity · Jan 17, 2024 · f8ee2dc · f8ee2dc
1 parent 8bd0e8a
commit f8ee2dc
Show file tree

Hide file tree

Showing 10 changed files with 201 additions and 62 deletions.
diff --git a/groups.py b/groups.py
@@ -473,7 +473,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users):
     :param ctx:                 Combined type of a ctx and rei struct
     :param csv_header_and_data: CSV data holding a head conform description and the actual row data
     :param allow_update:        Allow updates in groups
-    :param delete_users:         Allow for deleting of users from groups
+    :param delete_users:        Allow for deleting of users from groups
 
     :returns: Dict containing status, error(s) and the resulting group definitions so the frontend can present the results
 
@@ -501,7 +501,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users):
 
 
 def parse_data(ctx, csv_header_and_data):
-    """Process contents of csv data consisting of header and 1 row of data.
+    """Process contents of csv data consisting of header and rows of data.
 
     :param ctx:                 Combined type of a ctx and rei struct
     :param csv_header_and_data: CSV data holding a head conform description and the actual row data
@@ -514,24 +514,30 @@ def parse_data(ctx, csv_header_and_data):
     header = csv_lines[0]
     import_lines = csv_lines[1:]
 
-    # List of dicts each containing label / value pairs.
+    # List of dicts each containing label / list of values pairs.
     lines = []
     header_cols = header.split(',')
     for import_line in import_lines:
         data = import_line.split(',')
         if len(data) != len(header_cols):
             return [], 'Amount of header columns differs from data columns.'
+        # A kind of MultiDict
+        # each key is a header column
+        # each item is a list of items for that header column
         line_dict = {}
         for x in range(0, len(header_cols)):
             if header_cols[x] == '':
                 if x == len(header_cols) - 1:
                     return [], "Header row ends with ','"
                 else:
                     return [], 'Empty column description found in header row.'
-            try:
-                line_dict[header_cols[x]] = data[x]
-            except (KeyError, IndexError):
-                line_dict[header_cols[x]] = ''
+
+            # EVERY row should have all the headers that were listed at the top of the file
+            if header_cols[x] not in line_dict:
+                line_dict[header_cols[x]] = []
+
+            if len(data[x]):
+                line_dict[header_cols[x]].append(data[x])
 
         lines.append(line_dict)
 
@@ -561,7 +567,7 @@ def validate_data(ctx, data, allow_update):
     can_add_category = user.is_member_of(ctx, 'priv-category-add')
     is_admin = user.is_admin(ctx)
 
-    for (category, subcategory, groupname, managers, members, viewers) in data:
+    for (category, subcategory, groupname, managers, members, viewers, _, _) in data:
 
         if group.exists(ctx, groupname) and not allow_update:
             errors.append('Group "{}" already exists'.format(groupname))
@@ -589,13 +595,15 @@ def apply_data(ctx, data, allow_update, delete_users):
     :returns: Errors if found any
     """
 
-    for (category, subcategory, group_name, managers, members, viewers) in data:
+    for (category, subcategory, group_name, managers, members, viewers, schema_id, expiration_date) in data:
         new_group = False
 
         log.write(ctx, 'CSV import - Adding and updating group: {}'.format(group_name))
 
         # First create the group. Note that the actor will become a groupmanager
-        response = group_create(ctx, group_name, category, subcategory, config.default_yoda_schema, '', '', 'unspecified')
+        if not len(schema_id):
+            schema_id = config.default_yoda_schema
+        response = group_create(ctx, group_name, category, subcategory, schema_id, expiration_date, '', 'unspecified')
 
         if response:
             new_group = True
@@ -615,7 +623,7 @@ def apply_data(ctx, data, allow_update, delete_users):
                     log.write(ctx, "CSV import - Notice: added user {} to group {}".format(username, group_name))
                 else:
                     log.write(ctx, "CSV import - Warning: error occurred while attempting to add user {} to group {}".format(username, group_name))
-                    log.write(ctx, "CSV import - Status: {} , Message: {}".format(status, message))
+                    log.write(ctx, "CSV import - Status: {} , Message: {}".format(response.status, response.status_info))
             else:
                 log.write(ctx, "CSV import - Notice: user {} is already present in group {}.".format(username, group_name))
 
@@ -691,12 +699,19 @@ def parse_csv_file(ctx):
 
     # Validate header columns (should be first row in file)
 
-    # are all all required fields present?
-    for label in _get_csv_predefined_labels():
+    # Are all required fields present?
+    for label in _get_csv_required_labels():
         if label not in reader.fieldnames:
             _exit_with_error(
                 'CSV header is missing compulsory field "{}"'.format(label))
 
+    # Check that all header names are valid
+    possible_labels = _get_csv_possible_labels()
+    for label in header:
+        if label not in possible_labels:
+            _exit_with_error(
+                'CSV header contains unknown field "{}"'.format(label))
+
     # duplicate fieldnames present?
     duplicate_columns = _get_duplicate_columns(reader.fieldnames)
     if (len(duplicate_columns) > 0):
@@ -716,16 +731,25 @@ def parse_csv_file(ctx):
     return extracted_data
 
 
-def _get_csv_predefined_labels():
+def _get_csv_possible_labels():
+    return ['category', 'subcategory', 'groupname', 'viewer', 'member', 'manager', 'schema_id', 'expiration_date']
+
+
+def _get_csv_required_labels():
     return ['category', 'subcategory', 'groupname']
 
 
+def _get_csv_predefined_labels():
+    """These labels should not repeat"""
+    return ['category', 'subcategory', 'groupname', 'schema_id', 'expiration_date']
+
+
 def _get_duplicate_columns(fields_list):
     fields_seen = set()
     duplicate_fields = set()
 
     for field in fields_list:
-        if (field in _get_csv_predefined_labels() or field.startswith(("manager:", "viewer:", "member:"))):
+        if field in _get_csv_predefined_labels():
             if field in fields_seen:
                 duplicate_fields.add(field)
             else:
@@ -735,41 +759,51 @@ def _get_duplicate_columns(fields_list):
 
 
 def _process_csv_line(ctx, line):
-    """Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers."""
-    category = line['category'].strip().lower().replace('.', '')
-    subcategory = line['subcategory'].strip()
-    groupname = "research-" + line['groupname'].strip().lower()
+    """Process a line as found in the csv consisting of
+       category, subcategory, groupname, managers, members and viewers,
+       and optionally schema id and expiration date.
+
+    :param ctx:      Combined type of a ctx and rei struct
+    :param line:     Dictionary of labels and corresponding lists of values
+
+    :returns: Tuple of processed row data (None if error), and error message
+    """
+
+    if (not len(line['category'])
+            or not len(line['subcategory'])
+            or not len(line['groupname'])):
+        return None, "Row has a missing group name, category or subcategory"
+
+    category = line['category'][0].strip().lower().replace('.', '')
+    subcategory = line['subcategory'][0].strip()
+    groupname = "research-" + line['groupname'][0].strip().lower()
+    schema_id = line['schema_id'][0] if 'schema_id' in line and len(line['schema_id']) else ''
+    expiration_date = line['expiration_date'][0] if 'expiration_date' in line and len(line['expiration_date']) else ''
     managers = []
     members = []
     viewers = []
 
-    for column_name in line.keys():
+    for column_name, item_list in line.items():
         if column_name == '':
             return None, 'Column cannot have an empty label'
         elif column_name in _get_csv_predefined_labels():
             continue
-
-        username = line.get(column_name)
-
-        if isinstance(username, list):
-            return None, "Data is present in an unlabelled column"
-
-        username = username.strip().lower()
-
-        if username == '':    # empty value
-            continue
-        elif not yoda_names.is_email_username(username):
-            return None, 'Username "{}" is not a valid email address.'.format(
-                username)
-
-        if column_name.lower().startswith('manager:'):
-            managers.append(username)
-        elif column_name.lower().startswith('member:'):
-            members.append(username)
-        elif column_name.lower().startswith('viewer:'):
-            viewers.append(username)
-        else:
-            return None, "Column label '{}' is neither predefined nor a valid role label.".format(column_name)
+        elif column_name not in _get_csv_possible_labels():
+            return None, "Column label '{}' is not a valid label.".format(column_name)
+
+        for i in range(len(item_list)):
+            item_list[i] = item_list[i].strip().lower()
+            username = item_list[i]
+            if not yoda_names.is_email_username(username):
+                return None, 'Username "{}" is not a valid email address.'.format(
+                    username)
+
+        if column_name.lower() == 'manager':
+            managers = item_list
+        elif column_name.lower() == 'member':
+            members = item_list
+        elif column_name.lower() == 'viewer':
+            viewers = item_list
 
     if len(managers) == 0:
         return None, "Group must have a group manager"
@@ -783,7 +817,14 @@ def _process_csv_line(ctx, line):
     if not yoda_names.is_valid_groupname("research-" + groupname):
         return None, '"{}" is not a valid group name.'.format(groupname)
 
-    row_data = (category, subcategory, groupname, managers, members, viewers)
+    if not yoda_names.is_valid_schema_id(schema_id):
+        return None, '"{}" is not a valid schema id.'.format(schema_id)
+
+    if not yoda_names.is_valid_expiration_date(expiration_date):
+        return None, '"{}" is not a valid expiration date.'.format(expiration_date)
+
+    row_data = (category, subcategory, groupname, managers,
+                members, viewers, schema_id, expiration_date)
     return row_data, None
 
 

diff --git a/publication.py b/publication.py
@@ -1510,7 +1510,7 @@ def get_all_versions(ctx, path, doi):
     org_publ_info, data_packages, grouped_base_dois = vault.get_all_doi_versions(ctx, coll_parent_name)
 
     # Sort by publication date
-    sorted_publ = [sorted(x, key=lambda x:datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois]
+    sorted_publ = [sorted(x, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois]
 
     sorted_publ = [element for innerList in sorted_publ for element in innerList]
 

diff --git a/tests/features/api/api_group.feature b/tests/features/api/api_group.feature
@@ -159,7 +159,7 @@ Feature: Group API
 
     Scenario Outline: Group import CSV
         Given user technicaladmin is authenticated
-        And the Yoda API for processing csv group data API is queried
+        And the Yoda API for processing csv group data API is queried for data "csvtestgroup"
         Then the response status code is "200"
         And user "[email protected]" is now a member of the group "research-csvtestgroup"
         And user "[email protected]" is now a member of the group "research-csvtestgroup"
@@ -168,6 +168,24 @@ Feature: Group API
         And user "[email protected]" is now a member of the group "research-csvtestgroup"
 
 
+    Scenario Outline: Group import CSV schema id and expiration date
+        Given user technicaladmin is authenticated
+        And the Yoda API for processing csv group data API is queried for data "csvtestgroup1"
+        Then the response status code is "200"
+        And user "[email protected]" is now a member of the group "research-csvtestgroup1"
+
+
+    Scenario Outline: Group import CSV errors
+        Given user technicaladmin is authenticated
+        And the Yoda API for processing csv group data API is queried for data "<group_name>"
+        Then the response status code is "400"
+
+        Examples:
+            | group_name         |
+            | csv-missing-header |
+            | csv-missing-entry  |
+
+
     Scenario Outline: Group delete
         Given user <user> is authenticated
         And the group "<group_name>" exists
@@ -182,6 +200,7 @@ Feature: Group API
             | functionaladminpriv | research-api-test1-group |
             | technicaladmin      | datamanager-api-test1    |
             | technicaladmin      | research-csvtestgroup    |
+            | technicaladmin      | research-csvtestgroup1   |
             | technicaladmin      | not-a-yoda-group         |
 
 

diff --git a/tests/features/ui/ui_group.feature b/tests/features/ui/ui_group.feature
@@ -76,10 +76,12 @@ Feature: Group UI
         Given user functionaladminpriv is logged in
         And module "group_manager" is shown
         When user opens group import dialog
-        And user clicks upload button
-        And user clicks allow updates checkbox
+        And user clicks upload button and uploads csv "csv-import-test.csv"
+        Then there are 4 groups presented
+        When user clicks allow updates checkbox
         And user clicks allow deletions checkbox
-        Then process csv and check number of rows
+        Then process csv
+        And check number of rows is 4
         And click on imported row 0 and check group properties
         And find group member "[email protected]"
         And user opens group import dialog
@@ -93,6 +95,26 @@ Feature: Group UI
         And find group member "[email protected]"
 
 
+    Scenario: Imports group CSV schema id and expiration date
+        Given user functionaladminpriv is logged in
+        And module "group_manager" is shown
+        When user opens group import dialog
+        And user clicks upload button and uploads csv "csv-import-test-exp-schema.csv"
+        Then there are 2 groups presented
+        When user clicks allow updates checkbox
+        And user clicks allow deletions checkbox
+        Then process csv
+        And check number of rows is 2
+        And click on imported row 0 and check group properties
+        And find group member "[email protected]"
+        And find group member "[email protected]"
+        And user opens group import dialog
+        And click on imported row 1 and check group properties
+        And schema id is "default-3"
+        And expiration date is "2027-01-01"
+        And find group member "[email protected]"
+
+
     Scenario Outline: Group research create with default schema id
         Given user <user> is logged in
         And module "group_manager" is shown
@@ -192,6 +214,8 @@ Feature: Group UI
             | functionaladminpriv | test-automation   | csv-test          | research-csv-test-group2      |
             | functionaladminpriv | test-automation   | csv-test          | research-csv-test-group3      |
             | functionaladminpriv | test-automation   | csv-test          | research-csv-test-group4      |
+            | functionaladminpriv | test-automation   | csv-test          | research-csv-test-group5      |
+            | functionaladminpriv | test-automation   | csv-test          | research-csv-test-group6      |
             | functionaladminpriv | test-datamanager  | test-datamanager  | datamanager-test-datamanager  |
             | functionaladminpriv | test-datamanager  | test-datamanager  | research-test-datamanager     |
             | technicaladmin      | test-datamanager1 | test-datamanager1 | datamanager-test-datamanager1 |

diff --git a/tests/files/csv-import-test-exp-schema.csv b/tests/files/csv-import-test-exp-schema.csv
@@ -0,0 +1,3 @@
+category,subcategory,groupname,manager,member,schema_id,expiration_date
+test-automation,csv-test,csv-test-group5,[email protected],[email protected],,
+test-automation,csv-test,csv-test-group6,[email protected],,default-3,2027-01-01
diff --git a/tests/files/csv-import-test.csv b/tests/files/csv-import-test.csv
@@ -1,4 +1,4 @@
-category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1
+category,subcategory,groupname,manager,member,member,viewer
 test-automation,csv-test,csv-test-group1,[email protected],[email protected],[email protected],[email protected]
 test-automation,csv-test,csv-test-group2,[email protected],[email protected],[email protected],[email protected]
 test-automation,csv-test,csv-test-group3,[email protected],[email protected],[email protected],[email protected]

diff --git a/tests/step_defs/api/test_api_group.py b/tests/step_defs/api/test_api_group.py
@@ -294,13 +294,18 @@ def then_user_update_persisted(user, new_user, group_name):
     assert role == "manager"
 
 
-@given('the Yoda API for processing csv group data API is queried', target_fixture="api_response")
-def api_group_import_csv_data(user):
-    header_and_data = "category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1,member:member3\rdefault-2,default-2,csvtestgroup,[email protected],[email protected],[email protected],[email protected],[email protected]"
+@given(parsers.parse('the Yoda API for processing csv group data API is queried for data "{data_id}"'), target_fixture="api_response")
+def api_group_import_csv_data(user, data_id):
+    headers_and_data = {
+        "csvtestgroup": "category,subcategory,groupname,manager,member,member,viewer,member\rdefault-2,default-2,csvtestgroup,[email protected],[email protected],[email protected],[email protected],[email protected]",
+        "csvtestgroup1": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,[email protected],2030-01-01,default-2",
+        "csv-missing-header": "category,,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,[email protected]",
+        "csv-missing-entry": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,,csvtestgroup1,[email protected]",
+    }
     return api_request(
         user,
         "group_process_csv",
-        {"csv_header_and_data": header_and_data,
+        {"csv_header_and_data": headers_and_data[data_id],
          "allow_update": True,
          "delete_users": True}
     )