From f8ee2dc7fc10e7d9afaa80913fc0d0964da469c6 Mon Sep 17 00:00:00 2001 From: claravox Date: Wed, 17 Jan 2024 13:13:01 +0100 Subject: [PATCH] YDA-5512: Remove suffixes in import csv --- groups.py | 127 ++++++++++++++------- publication.py | 2 +- tests/features/api/api_group.feature | 21 +++- tests/features/ui/ui_group.feature | 30 ++++- tests/files/csv-import-test-exp-schema.csv | 3 + tests/files/csv-import-test.csv | 2 +- tests/step_defs/api/test_api_group.py | 13 ++- tests/step_defs/ui/test_ui_group.py | 32 ++++-- util/yoda_names.py | 31 +++++ vault.py | 2 +- 10 files changed, 201 insertions(+), 62 deletions(-) create mode 100644 tests/files/csv-import-test-exp-schema.csv diff --git a/groups.py b/groups.py index a55ffec01..1e89cd273 100644 --- a/groups.py +++ b/groups.py @@ -473,7 +473,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): :param ctx: Combined type of a ctx and rei struct :param csv_header_and_data: CSV data holding a head conform description and the actual row data :param allow_update: Allow updates in groups - :param delete_users: Allow for deleting of users from groups + :param delete_users: Allow for deleting of users from groups :returns: Dict containing status, error(s) and the resulting group definitions so the frontend can present the results @@ -501,7 +501,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users): def parse_data(ctx, csv_header_and_data): - """Process contents of csv data consisting of header and 1 row of data. + """Process contents of csv data consisting of header and rows of data. :param ctx: Combined type of a ctx and rei struct :param csv_header_and_data: CSV data holding a head conform description and the actual row data @@ -514,13 +514,16 @@ def parse_data(ctx, csv_header_and_data): header = csv_lines[0] import_lines = csv_lines[1:] - # List of dicts each containing label / value pairs. + # List of dicts each containing label / list of values pairs. lines = [] header_cols = header.split(',') for import_line in import_lines: data = import_line.split(',') if len(data) != len(header_cols): return [], 'Amount of header columns differs from data columns.' + # A kind of MultiDict + # each key is a header column + # each item is a list of items for that header column line_dict = {} for x in range(0, len(header_cols)): if header_cols[x] == '': @@ -528,10 +531,13 @@ def parse_data(ctx, csv_header_and_data): return [], "Header row ends with ','" else: return [], 'Empty column description found in header row.' - try: - line_dict[header_cols[x]] = data[x] - except (KeyError, IndexError): - line_dict[header_cols[x]] = '' + + # EVERY row should have all the headers that were listed at the top of the file + if header_cols[x] not in line_dict: + line_dict[header_cols[x]] = [] + + if len(data[x]): + line_dict[header_cols[x]].append(data[x]) lines.append(line_dict) @@ -561,7 +567,7 @@ def validate_data(ctx, data, allow_update): can_add_category = user.is_member_of(ctx, 'priv-category-add') is_admin = user.is_admin(ctx) - for (category, subcategory, groupname, managers, members, viewers) in data: + for (category, subcategory, groupname, managers, members, viewers, _, _) in data: if group.exists(ctx, groupname) and not allow_update: errors.append('Group "{}" already exists'.format(groupname)) @@ -589,13 +595,15 @@ def apply_data(ctx, data, allow_update, delete_users): :returns: Errors if found any """ - for (category, subcategory, group_name, managers, members, viewers) in data: + for (category, subcategory, group_name, managers, members, viewers, schema_id, expiration_date) in data: new_group = False log.write(ctx, 'CSV import - Adding and updating group: {}'.format(group_name)) # First create the group. Note that the actor will become a groupmanager - response = group_create(ctx, group_name, category, subcategory, config.default_yoda_schema, '', '', 'unspecified') + if not len(schema_id): + schema_id = config.default_yoda_schema + response = group_create(ctx, group_name, category, subcategory, schema_id, expiration_date, '', 'unspecified') if response: new_group = True @@ -615,7 +623,7 @@ def apply_data(ctx, data, allow_update, delete_users): log.write(ctx, "CSV import - Notice: added user {} to group {}".format(username, group_name)) else: log.write(ctx, "CSV import - Warning: error occurred while attempting to add user {} to group {}".format(username, group_name)) - log.write(ctx, "CSV import - Status: {} , Message: {}".format(status, message)) + log.write(ctx, "CSV import - Status: {} , Message: {}".format(response.status, response.status_info)) else: log.write(ctx, "CSV import - Notice: user {} is already present in group {}.".format(username, group_name)) @@ -691,12 +699,19 @@ def parse_csv_file(ctx): # Validate header columns (should be first row in file) - # are all all required fields present? - for label in _get_csv_predefined_labels(): + # Are all required fields present? + for label in _get_csv_required_labels(): if label not in reader.fieldnames: _exit_with_error( 'CSV header is missing compulsory field "{}"'.format(label)) + # Check that all header names are valid + possible_labels = _get_csv_possible_labels() + for label in header: + if label not in possible_labels: + _exit_with_error( + 'CSV header contains unknown field "{}"'.format(label)) + # duplicate fieldnames present? duplicate_columns = _get_duplicate_columns(reader.fieldnames) if (len(duplicate_columns) > 0): @@ -716,16 +731,25 @@ def parse_csv_file(ctx): return extracted_data -def _get_csv_predefined_labels(): +def _get_csv_possible_labels(): + return ['category', 'subcategory', 'groupname', 'viewer', 'member', 'manager', 'schema_id', 'expiration_date'] + + +def _get_csv_required_labels(): return ['category', 'subcategory', 'groupname'] +def _get_csv_predefined_labels(): + """These labels should not repeat""" + return ['category', 'subcategory', 'groupname', 'schema_id', 'expiration_date'] + + def _get_duplicate_columns(fields_list): fields_seen = set() duplicate_fields = set() for field in fields_list: - if (field in _get_csv_predefined_labels() or field.startswith(("manager:", "viewer:", "member:"))): + if field in _get_csv_predefined_labels(): if field in fields_seen: duplicate_fields.add(field) else: @@ -735,41 +759,51 @@ def _get_duplicate_columns(fields_list): def _process_csv_line(ctx, line): - """Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers.""" - category = line['category'].strip().lower().replace('.', '') - subcategory = line['subcategory'].strip() - groupname = "research-" + line['groupname'].strip().lower() + """Process a line as found in the csv consisting of + category, subcategory, groupname, managers, members and viewers, + and optionally schema id and expiration date. + + :param ctx: Combined type of a ctx and rei struct + :param line: Dictionary of labels and corresponding lists of values + + :returns: Tuple of processed row data (None if error), and error message + """ + + if (not len(line['category']) + or not len(line['subcategory']) + or not len(line['groupname'])): + return None, "Row has a missing group name, category or subcategory" + + category = line['category'][0].strip().lower().replace('.', '') + subcategory = line['subcategory'][0].strip() + groupname = "research-" + line['groupname'][0].strip().lower() + schema_id = line['schema_id'][0] if 'schema_id' in line and len(line['schema_id']) else '' + expiration_date = line['expiration_date'][0] if 'expiration_date' in line and len(line['expiration_date']) else '' managers = [] members = [] viewers = [] - for column_name in line.keys(): + for column_name, item_list in line.items(): if column_name == '': return None, 'Column cannot have an empty label' elif column_name in _get_csv_predefined_labels(): continue - - username = line.get(column_name) - - if isinstance(username, list): - return None, "Data is present in an unlabelled column" - - username = username.strip().lower() - - if username == '': # empty value - continue - elif not yoda_names.is_email_username(username): - return None, 'Username "{}" is not a valid email address.'.format( - username) - - if column_name.lower().startswith('manager:'): - managers.append(username) - elif column_name.lower().startswith('member:'): - members.append(username) - elif column_name.lower().startswith('viewer:'): - viewers.append(username) - else: - return None, "Column label '{}' is neither predefined nor a valid role label.".format(column_name) + elif column_name not in _get_csv_possible_labels(): + return None, "Column label '{}' is not a valid label.".format(column_name) + + for i in range(len(item_list)): + item_list[i] = item_list[i].strip().lower() + username = item_list[i] + if not yoda_names.is_email_username(username): + return None, 'Username "{}" is not a valid email address.'.format( + username) + + if column_name.lower() == 'manager': + managers = item_list + elif column_name.lower() == 'member': + members = item_list + elif column_name.lower() == 'viewer': + viewers = item_list if len(managers) == 0: return None, "Group must have a group manager" @@ -783,7 +817,14 @@ def _process_csv_line(ctx, line): if not yoda_names.is_valid_groupname("research-" + groupname): return None, '"{}" is not a valid group name.'.format(groupname) - row_data = (category, subcategory, groupname, managers, members, viewers) + if not yoda_names.is_valid_schema_id(schema_id): + return None, '"{}" is not a valid schema id.'.format(schema_id) + + if not yoda_names.is_valid_expiration_date(expiration_date): + return None, '"{}" is not a valid expiration date.'.format(expiration_date) + + row_data = (category, subcategory, groupname, managers, + members, viewers, schema_id, expiration_date) return row_data, None diff --git a/publication.py b/publication.py index 02fe50bd2..b5b437f40 100644 --- a/publication.py +++ b/publication.py @@ -1510,7 +1510,7 @@ def get_all_versions(ctx, path, doi): org_publ_info, data_packages, grouped_base_dois = vault.get_all_doi_versions(ctx, coll_parent_name) # Sort by publication date - sorted_publ = [sorted(x, key=lambda x:datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois] + sorted_publ = [sorted(x, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois] sorted_publ = [element for innerList in sorted_publ for element in innerList] diff --git a/tests/features/api/api_group.feature b/tests/features/api/api_group.feature index 8711c06d1..202165e66 100644 --- a/tests/features/api/api_group.feature +++ b/tests/features/api/api_group.feature @@ -159,7 +159,7 @@ Feature: Group API Scenario Outline: Group import CSV Given user technicaladmin is authenticated - And the Yoda API for processing csv group data API is queried + And the Yoda API for processing csv group data API is queried for data "csvtestgroup" Then the response status code is "200" And user "functionaladminpriv@yoda.test" is now a member of the group "research-csvtestgroup" And user "datamanager@yoda.test" is now a member of the group "research-csvtestgroup" @@ -168,6 +168,24 @@ Feature: Group API And user "researcher1@example.com" is now a member of the group "research-csvtestgroup" + Scenario Outline: Group import CSV schema id and expiration date + Given user technicaladmin is authenticated + And the Yoda API for processing csv group data API is queried for data "csvtestgroup1" + Then the response status code is "200" + And user "datamanager@yoda.test" is now a member of the group "research-csvtestgroup1" + + + Scenario Outline: Group import CSV errors + Given user technicaladmin is authenticated + And the Yoda API for processing csv group data API is queried for data "" + Then the response status code is "400" + + Examples: + | group_name | + | csv-missing-header | + | csv-missing-entry | + + Scenario Outline: Group delete Given user is authenticated And the group "" exists @@ -182,6 +200,7 @@ Feature: Group API | functionaladminpriv | research-api-test1-group | | technicaladmin | datamanager-api-test1 | | technicaladmin | research-csvtestgroup | + | technicaladmin | research-csvtestgroup1 | | technicaladmin | not-a-yoda-group | diff --git a/tests/features/ui/ui_group.feature b/tests/features/ui/ui_group.feature index 27aa552c3..45bb8bea3 100644 --- a/tests/features/ui/ui_group.feature +++ b/tests/features/ui/ui_group.feature @@ -76,10 +76,12 @@ Feature: Group UI Given user functionaladminpriv is logged in And module "group_manager" is shown When user opens group import dialog - And user clicks upload button - And user clicks allow updates checkbox + And user clicks upload button and uploads csv "csv-import-test.csv" + Then there are 4 groups presented + When user clicks allow updates checkbox And user clicks allow deletions checkbox - Then process csv and check number of rows + Then process csv + And check number of rows is 4 And click on imported row 0 and check group properties And find group member "groupmanager@yoda.test" And user opens group import dialog @@ -93,6 +95,26 @@ Feature: Group UI And find group member "viewer@yoda.test" + Scenario: Imports group CSV schema id and expiration date + Given user functionaladminpriv is logged in + And module "group_manager" is shown + When user opens group import dialog + And user clicks upload button and uploads csv "csv-import-test-exp-schema.csv" + Then there are 2 groups presented + When user clicks allow updates checkbox + And user clicks allow deletions checkbox + Then process csv + And check number of rows is 2 + And click on imported row 0 and check group properties + And find group member "groupmanager@yoda.test" + And find group member "researcher@yoda.test" + And user opens group import dialog + And click on imported row 1 and check group properties + And schema id is "default-3" + And expiration date is "2027-01-01" + And find group member "groupmanager@yoda.test" + + Scenario Outline: Group research create with default schema id Given user is logged in And module "group_manager" is shown @@ -192,6 +214,8 @@ Feature: Group UI | functionaladminpriv | test-automation | csv-test | research-csv-test-group2 | | functionaladminpriv | test-automation | csv-test | research-csv-test-group3 | | functionaladminpriv | test-automation | csv-test | research-csv-test-group4 | + | functionaladminpriv | test-automation | csv-test | research-csv-test-group5 | + | functionaladminpriv | test-automation | csv-test | research-csv-test-group6 | | functionaladminpriv | test-datamanager | test-datamanager | datamanager-test-datamanager | | functionaladminpriv | test-datamanager | test-datamanager | research-test-datamanager | | technicaladmin | test-datamanager1 | test-datamanager1 | datamanager-test-datamanager1 | diff --git a/tests/files/csv-import-test-exp-schema.csv b/tests/files/csv-import-test-exp-schema.csv new file mode 100644 index 000000000..d9afe20db --- /dev/null +++ b/tests/files/csv-import-test-exp-schema.csv @@ -0,0 +1,3 @@ +category,subcategory,groupname,manager,member,schema_id,expiration_date +test-automation,csv-test,csv-test-group5,groupmanager@yoda.test,researcher@yoda.test,, +test-automation,csv-test,csv-test-group6,groupmanager@yoda.test,,default-3,2027-01-01 diff --git a/tests/files/csv-import-test.csv b/tests/files/csv-import-test.csv index 1c3059db7..a47f3e699 100644 --- a/tests/files/csv-import-test.csv +++ b/tests/files/csv-import-test.csv @@ -1,4 +1,4 @@ -category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1 +category,subcategory,groupname,manager,member,member,viewer test-automation,csv-test,csv-test-group1,groupmanager@yoda.test,researcher@yoda.test,datamanager@yoda.test,viewer@yoda.test test-automation,csv-test,csv-test-group2,groupmanager@yoda.test,researcher@yoda.test,datamanager@yoda.test,viewer@yoda.test test-automation,csv-test,csv-test-group3,groupmanager@yoda.test,researcher@yoda.test,datamanager@yoda.test,viewer@yoda.test diff --git a/tests/step_defs/api/test_api_group.py b/tests/step_defs/api/test_api_group.py index a0f47d5b9..96e0369f4 100644 --- a/tests/step_defs/api/test_api_group.py +++ b/tests/step_defs/api/test_api_group.py @@ -294,13 +294,18 @@ def then_user_update_persisted(user, new_user, group_name): assert role == "manager" -@given('the Yoda API for processing csv group data API is queried', target_fixture="api_response") -def api_group_import_csv_data(user): - header_and_data = "category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1,member:member3\rdefault-2,default-2,csvtestgroup,datamanager@yoda.test,researcher@yoda.test,functionaladminpriv@yoda.test,viewer@yoda.test,researcher1@example.com" +@given(parsers.parse('the Yoda API for processing csv group data API is queried for data "{data_id}"'), target_fixture="api_response") +def api_group_import_csv_data(user, data_id): + headers_and_data = { + "csvtestgroup": "category,subcategory,groupname,manager,member,member,viewer,member\rdefault-2,default-2,csvtestgroup,datamanager@yoda.test,researcher@yoda.test,functionaladminpriv@yoda.test,viewer@yoda.test,researcher1@example.com", + "csvtestgroup1": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,datamanager@yoda.test,2030-01-01,default-2", + "csv-missing-header": "category,,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,datamanager@yoda.test", + "csv-missing-entry": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,,csvtestgroup1,datamanager@yoda.test", + } return api_request( user, "group_process_csv", - {"csv_header_and_data": header_and_data, + {"csv_header_and_data": headers_and_data[data_id], "allow_update": True, "delete_users": True} ) diff --git a/tests/step_defs/ui/test_ui_group.py b/tests/step_defs/ui/test_ui_group.py index cb79538ca..92b21ae88 100644 --- a/tests/step_defs/ui/test_ui_group.py +++ b/tests/step_defs/ui/test_ui_group.py @@ -276,16 +276,19 @@ def ui_group_click_group_import_dlg_button(browser): browser.find_by_css('.import-groups-csv').click() -@when("user clicks upload button") -def ui_group_click_upload_button(browser): +@when(parsers.parse('user clicks upload button and uploads csv "{csv_file}"')) +def ui_group_click_upload_button(browser, csv_file): cwd = os.getcwd() if os.name == 'nt': - browser.find_by_css('.csv-import-file')[0].fill("{}\\files\\csv-import-test.csv".format(cwd)) + browser.find_by_css('.csv-import-file')[0].fill("{}\\files\\{}".format(cwd, csv_file)) else: - browser.find_by_css('.csv-import-file')[0].fill("{}/files/csv-import-test.csv".format(cwd)) + browser.find_by_css('.csv-import-file')[0].fill("{}/files/{}".format(cwd, csv_file)) + +@then(parsers.parse("there are {num_groups} groups presented")) +def ui_group_csv_groups_presented(browser, num_groups): # File contains 4 groups - check the number of rows presented. - assert len(browser.find_by_css('.import-groupname')) == 4 + assert len(browser.find_by_css('.import-groupname')) == int(num_groups) @when("user clicks allow updates checkbox") @@ -308,7 +311,7 @@ def ui_group_confirms_group_removal(browser): browser.find_by_id('f-group-delete').click() -@then("process csv and check number of rows") +@then("process csv") def ui_group_process_csv(browser): # Start processing the uploaded file. browser.find_by_css('.process-csv').click() @@ -316,11 +319,14 @@ def ui_group_process_csv(browser): # Take enough time so processing is complete. time.sleep(5) + +@then(parsers.parse("check number of rows is {num_rows}")) +def ui_group_csv_check_rows(browser, num_rows): # Check whether 4 checkmarks are present so each row was processed. - assert len(browser.find_by_css('.import-groupname-done')) == 4 + assert len(browser.find_by_css('.import-groupname-done')) == int(num_rows) # Check whether each row was processed correctly. - assert len(browser.find_by_css('.import-csv-group-ok')) == 4 + assert len(browser.find_by_css('.import-csv-group-ok')) == int(num_rows) @then(parsers.parse("click on imported row {row} and check group properties")) @@ -335,6 +341,16 @@ def ui_group_csv_click_row(browser, row): assert browser.find_by_id('f-group-update-name').value == groupname +@then(parsers.parse('schema id is "{schema}"')) +def ui_group_check_schema_id(browser, schema): + assert browser.find_by_id('f-group-update-schema-id').value == schema + + +@then(parsers.parse('expiration date is "{expir}"')) +def ui_group_check_schema_id_expiration(browser, expir): + assert browser.find_by_id('f-group-update-expiration-date').value == expir + + @then(parsers.parse('find group member "{group_member}"')) def ui_group_csv_find_group_member(browser, group_member): # Find the groupmember in the group member list. diff --git a/util/yoda_names.py b/util/yoda_names.py index 1491314e1..4a91bfbcb 100644 --- a/util/yoda_names.py +++ b/util/yoda_names.py @@ -7,6 +7,7 @@ __license__ = 'GPLv3, see LICENSE' import re +from datetime import datetime from config import config @@ -75,3 +76,33 @@ def _is_internal_user(username, external_domain_filter): return True return False + + +def is_valid_expiration_date(expiration_date): + """Validation of expiration date. + + :param expiration_date: String containing date that has to be validated + + :returns: Indication whether expiration date is an accepted value + """ + # Copied from rule_group_expiration_date_validate + if expiration_date in ["", "."]: + return True + + try: + if expiration_date != datetime.strptime(expiration_date, "%Y-%m-%d").strftime('%Y-%m-%d'): + raise ValueError + + # Expiration date should be in the future + if expiration_date <= datetime.now().strftime('%Y-%m-%d'): + raise ValueError + return True + except ValueError: + return False + + +def is_valid_schema_id(schema_id): + """Is this schema at least a correctly formatted schema-id?""" + if schema_id == "": + return True + return re.search(r"^[a-zA-Z0-9\-]+\-[0-9]+$", schema_id) is not None diff --git a/vault.py b/vault.py index a2df41383..2c3aa63b3 100644 --- a/vault.py +++ b/vault.py @@ -1371,7 +1371,7 @@ def api_vault_get_published_packages(ctx, path): org_publ_info, data_packages, grouped_base_dois = get_all_doi_versions(ctx, path) # Sort by publication date - sorted_publ = [sorted(x, key=lambda x:datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f")) for x in grouped_base_dois] + sorted_publ = [sorted(x, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f")) for x in grouped_base_dois] latest_publ = map(lambda x: x[-1], sorted_publ)