Skip to content

Commit

Permalink
YDA-5512: Remove suffixes in import csv
Browse files Browse the repository at this point in the history
  • Loading branch information
claravox authored Jan 17, 2024
1 parent 8bd0e8a commit f8ee2dc
Show file tree
Hide file tree
Showing 10 changed files with 201 additions and 62 deletions.
127 changes: 84 additions & 43 deletions groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users):
:param ctx: Combined type of a ctx and rei struct
:param csv_header_and_data: CSV data holding a head conform description and the actual row data
:param allow_update: Allow updates in groups
:param delete_users: Allow for deleting of users from groups
:param delete_users: Allow for deleting of users from groups
:returns: Dict containing status, error(s) and the resulting group definitions so the frontend can present the results
Expand Down Expand Up @@ -501,7 +501,7 @@ def api_group_process_csv(ctx, csv_header_and_data, allow_update, delete_users):


def parse_data(ctx, csv_header_and_data):
"""Process contents of csv data consisting of header and 1 row of data.
"""Process contents of csv data consisting of header and rows of data.
:param ctx: Combined type of a ctx and rei struct
:param csv_header_and_data: CSV data holding a head conform description and the actual row data
Expand All @@ -514,24 +514,30 @@ def parse_data(ctx, csv_header_and_data):
header = csv_lines[0]
import_lines = csv_lines[1:]

# List of dicts each containing label / value pairs.
# List of dicts each containing label / list of values pairs.
lines = []
header_cols = header.split(',')
for import_line in import_lines:
data = import_line.split(',')
if len(data) != len(header_cols):
return [], 'Amount of header columns differs from data columns.'
# A kind of MultiDict
# each key is a header column
# each item is a list of items for that header column
line_dict = {}
for x in range(0, len(header_cols)):
if header_cols[x] == '':
if x == len(header_cols) - 1:
return [], "Header row ends with ','"
else:
return [], 'Empty column description found in header row.'
try:
line_dict[header_cols[x]] = data[x]
except (KeyError, IndexError):
line_dict[header_cols[x]] = ''

# EVERY row should have all the headers that were listed at the top of the file
if header_cols[x] not in line_dict:
line_dict[header_cols[x]] = []

if len(data[x]):
line_dict[header_cols[x]].append(data[x])

lines.append(line_dict)

Expand Down Expand Up @@ -561,7 +567,7 @@ def validate_data(ctx, data, allow_update):
can_add_category = user.is_member_of(ctx, 'priv-category-add')
is_admin = user.is_admin(ctx)

for (category, subcategory, groupname, managers, members, viewers) in data:
for (category, subcategory, groupname, managers, members, viewers, _, _) in data:

if group.exists(ctx, groupname) and not allow_update:
errors.append('Group "{}" already exists'.format(groupname))
Expand Down Expand Up @@ -589,13 +595,15 @@ def apply_data(ctx, data, allow_update, delete_users):
:returns: Errors if found any
"""

for (category, subcategory, group_name, managers, members, viewers) in data:
for (category, subcategory, group_name, managers, members, viewers, schema_id, expiration_date) in data:
new_group = False

log.write(ctx, 'CSV import - Adding and updating group: {}'.format(group_name))

# First create the group. Note that the actor will become a groupmanager
response = group_create(ctx, group_name, category, subcategory, config.default_yoda_schema, '', '', 'unspecified')
if not len(schema_id):
schema_id = config.default_yoda_schema
response = group_create(ctx, group_name, category, subcategory, schema_id, expiration_date, '', 'unspecified')

if response:
new_group = True
Expand All @@ -615,7 +623,7 @@ def apply_data(ctx, data, allow_update, delete_users):
log.write(ctx, "CSV import - Notice: added user {} to group {}".format(username, group_name))
else:
log.write(ctx, "CSV import - Warning: error occurred while attempting to add user {} to group {}".format(username, group_name))
log.write(ctx, "CSV import - Status: {} , Message: {}".format(status, message))
log.write(ctx, "CSV import - Status: {} , Message: {}".format(response.status, response.status_info))
else:
log.write(ctx, "CSV import - Notice: user {} is already present in group {}.".format(username, group_name))

Expand Down Expand Up @@ -691,12 +699,19 @@ def parse_csv_file(ctx):

# Validate header columns (should be first row in file)

# are all all required fields present?
for label in _get_csv_predefined_labels():
# Are all required fields present?
for label in _get_csv_required_labels():
if label not in reader.fieldnames:
_exit_with_error(
'CSV header is missing compulsory field "{}"'.format(label))

# Check that all header names are valid
possible_labels = _get_csv_possible_labels()
for label in header:
if label not in possible_labels:
_exit_with_error(
'CSV header contains unknown field "{}"'.format(label))

# duplicate fieldnames present?
duplicate_columns = _get_duplicate_columns(reader.fieldnames)
if (len(duplicate_columns) > 0):
Expand All @@ -716,16 +731,25 @@ def parse_csv_file(ctx):
return extracted_data


def _get_csv_predefined_labels():
def _get_csv_possible_labels():
return ['category', 'subcategory', 'groupname', 'viewer', 'member', 'manager', 'schema_id', 'expiration_date']


def _get_csv_required_labels():
return ['category', 'subcategory', 'groupname']


def _get_csv_predefined_labels():
"""These labels should not repeat"""
return ['category', 'subcategory', 'groupname', 'schema_id', 'expiration_date']


def _get_duplicate_columns(fields_list):
fields_seen = set()
duplicate_fields = set()

for field in fields_list:
if (field in _get_csv_predefined_labels() or field.startswith(("manager:", "viewer:", "member:"))):
if field in _get_csv_predefined_labels():
if field in fields_seen:
duplicate_fields.add(field)
else:
Expand All @@ -735,41 +759,51 @@ def _get_duplicate_columns(fields_list):


def _process_csv_line(ctx, line):
"""Process a line as found in the csv consisting of category, subcategory, groupname, managers, members and viewers."""
category = line['category'].strip().lower().replace('.', '')
subcategory = line['subcategory'].strip()
groupname = "research-" + line['groupname'].strip().lower()
"""Process a line as found in the csv consisting of
category, subcategory, groupname, managers, members and viewers,
and optionally schema id and expiration date.
:param ctx: Combined type of a ctx and rei struct
:param line: Dictionary of labels and corresponding lists of values
:returns: Tuple of processed row data (None if error), and error message
"""

if (not len(line['category'])
or not len(line['subcategory'])
or not len(line['groupname'])):
return None, "Row has a missing group name, category or subcategory"

category = line['category'][0].strip().lower().replace('.', '')
subcategory = line['subcategory'][0].strip()
groupname = "research-" + line['groupname'][0].strip().lower()
schema_id = line['schema_id'][0] if 'schema_id' in line and len(line['schema_id']) else ''
expiration_date = line['expiration_date'][0] if 'expiration_date' in line and len(line['expiration_date']) else ''
managers = []
members = []
viewers = []

for column_name in line.keys():
for column_name, item_list in line.items():
if column_name == '':
return None, 'Column cannot have an empty label'
elif column_name in _get_csv_predefined_labels():
continue

username = line.get(column_name)

if isinstance(username, list):
return None, "Data is present in an unlabelled column"

username = username.strip().lower()

if username == '': # empty value
continue
elif not yoda_names.is_email_username(username):
return None, 'Username "{}" is not a valid email address.'.format(
username)

if column_name.lower().startswith('manager:'):
managers.append(username)
elif column_name.lower().startswith('member:'):
members.append(username)
elif column_name.lower().startswith('viewer:'):
viewers.append(username)
else:
return None, "Column label '{}' is neither predefined nor a valid role label.".format(column_name)
elif column_name not in _get_csv_possible_labels():
return None, "Column label '{}' is not a valid label.".format(column_name)

for i in range(len(item_list)):
item_list[i] = item_list[i].strip().lower()
username = item_list[i]
if not yoda_names.is_email_username(username):
return None, 'Username "{}" is not a valid email address.'.format(
username)

if column_name.lower() == 'manager':
managers = item_list
elif column_name.lower() == 'member':
members = item_list
elif column_name.lower() == 'viewer':
viewers = item_list

if len(managers) == 0:
return None, "Group must have a group manager"
Expand All @@ -783,7 +817,14 @@ def _process_csv_line(ctx, line):
if not yoda_names.is_valid_groupname("research-" + groupname):
return None, '"{}" is not a valid group name.'.format(groupname)

row_data = (category, subcategory, groupname, managers, members, viewers)
if not yoda_names.is_valid_schema_id(schema_id):
return None, '"{}" is not a valid schema id.'.format(schema_id)

if not yoda_names.is_valid_expiration_date(expiration_date):
return None, '"{}" is not a valid expiration date.'.format(expiration_date)

row_data = (category, subcategory, groupname, managers,
members, viewers, schema_id, expiration_date)
return row_data, None


Expand Down
2 changes: 1 addition & 1 deletion publication.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@ def get_all_versions(ctx, path, doi):
org_publ_info, data_packages, grouped_base_dois = vault.get_all_doi_versions(ctx, coll_parent_name)

# Sort by publication date
sorted_publ = [sorted(x, key=lambda x:datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois]
sorted_publ = [sorted(x, key=lambda x: datetime.strptime(x[1], "%Y-%m-%dT%H:%M:%S.%f"), reverse=True) for x in grouped_base_dois]

sorted_publ = [element for innerList in sorted_publ for element in innerList]

Expand Down
21 changes: 20 additions & 1 deletion tests/features/api/api_group.feature
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ Feature: Group API

Scenario Outline: Group import CSV
Given user technicaladmin is authenticated
And the Yoda API for processing csv group data API is queried
And the Yoda API for processing csv group data API is queried for data "csvtestgroup"
Then the response status code is "200"
And user "[email protected]" is now a member of the group "research-csvtestgroup"
And user "[email protected]" is now a member of the group "research-csvtestgroup"
Expand All @@ -168,6 +168,24 @@ Feature: Group API
And user "[email protected]" is now a member of the group "research-csvtestgroup"


Scenario Outline: Group import CSV schema id and expiration date
Given user technicaladmin is authenticated
And the Yoda API for processing csv group data API is queried for data "csvtestgroup1"
Then the response status code is "200"
And user "[email protected]" is now a member of the group "research-csvtestgroup1"


Scenario Outline: Group import CSV errors
Given user technicaladmin is authenticated
And the Yoda API for processing csv group data API is queried for data "<group_name>"
Then the response status code is "400"

Examples:
| group_name |
| csv-missing-header |
| csv-missing-entry |


Scenario Outline: Group delete
Given user <user> is authenticated
And the group "<group_name>" exists
Expand All @@ -182,6 +200,7 @@ Feature: Group API
| functionaladminpriv | research-api-test1-group |
| technicaladmin | datamanager-api-test1 |
| technicaladmin | research-csvtestgroup |
| technicaladmin | research-csvtestgroup1 |
| technicaladmin | not-a-yoda-group |


Expand Down
30 changes: 27 additions & 3 deletions tests/features/ui/ui_group.feature
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,12 @@ Feature: Group UI
Given user functionaladminpriv is logged in
And module "group_manager" is shown
When user opens group import dialog
And user clicks upload button
And user clicks allow updates checkbox
And user clicks upload button and uploads csv "csv-import-test.csv"
Then there are 4 groups presented
When user clicks allow updates checkbox
And user clicks allow deletions checkbox
Then process csv and check number of rows
Then process csv
And check number of rows is 4
And click on imported row 0 and check group properties
And find group member "[email protected]"
And user opens group import dialog
Expand All @@ -93,6 +95,26 @@ Feature: Group UI
And find group member "[email protected]"


Scenario: Imports group CSV schema id and expiration date
Given user functionaladminpriv is logged in
And module "group_manager" is shown
When user opens group import dialog
And user clicks upload button and uploads csv "csv-import-test-exp-schema.csv"
Then there are 2 groups presented
When user clicks allow updates checkbox
And user clicks allow deletions checkbox
Then process csv
And check number of rows is 2
And click on imported row 0 and check group properties
And find group member "[email protected]"
And find group member "[email protected]"
And user opens group import dialog
And click on imported row 1 and check group properties
And schema id is "default-3"
And expiration date is "2027-01-01"
And find group member "[email protected]"


Scenario Outline: Group research create with default schema id
Given user <user> is logged in
And module "group_manager" is shown
Expand Down Expand Up @@ -192,6 +214,8 @@ Feature: Group UI
| functionaladminpriv | test-automation | csv-test | research-csv-test-group2 |
| functionaladminpriv | test-automation | csv-test | research-csv-test-group3 |
| functionaladminpriv | test-automation | csv-test | research-csv-test-group4 |
| functionaladminpriv | test-automation | csv-test | research-csv-test-group5 |
| functionaladminpriv | test-automation | csv-test | research-csv-test-group6 |
| functionaladminpriv | test-datamanager | test-datamanager | datamanager-test-datamanager |
| functionaladminpriv | test-datamanager | test-datamanager | research-test-datamanager |
| technicaladmin | test-datamanager1 | test-datamanager1 | datamanager-test-datamanager1 |
Expand Down
3 changes: 3 additions & 0 deletions tests/files/csv-import-test-exp-schema.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
category,subcategory,groupname,manager,member,schema_id,expiration_date
test-automation,csv-test,csv-test-group5,[email protected],[email protected],,
test-automation,csv-test,csv-test-group6,[email protected],,default-3,2027-01-01
2 changes: 1 addition & 1 deletion tests/files/csv-import-test.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1
category,subcategory,groupname,manager,member,member,viewer
test-automation,csv-test,csv-test-group1,[email protected],[email protected],[email protected],[email protected]
test-automation,csv-test,csv-test-group2,[email protected],[email protected],[email protected],[email protected]
test-automation,csv-test,csv-test-group3,[email protected],[email protected],[email protected],[email protected]
Expand Down
13 changes: 9 additions & 4 deletions tests/step_defs/api/test_api_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,13 +294,18 @@ def then_user_update_persisted(user, new_user, group_name):
assert role == "manager"


@given('the Yoda API for processing csv group data API is queried', target_fixture="api_response")
def api_group_import_csv_data(user):
header_and_data = "category,subcategory,groupname,manager:manager,member:member1,member:member2,viewer:viewer1,member:member3\rdefault-2,default-2,csvtestgroup,[email protected],[email protected],[email protected],[email protected],[email protected]"
@given(parsers.parse('the Yoda API for processing csv group data API is queried for data "{data_id}"'), target_fixture="api_response")
def api_group_import_csv_data(user, data_id):
headers_and_data = {
"csvtestgroup": "category,subcategory,groupname,manager,member,member,viewer,member\rdefault-2,default-2,csvtestgroup,[email protected],[email protected],[email protected],[email protected],[email protected]",
"csvtestgroup1": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,[email protected],2030-01-01,default-2",
"csv-missing-header": "category,,groupname,manager,expiration_date,schema_id\rdefault-2,default-2,csvtestgroup1,[email protected]",
"csv-missing-entry": "category,subcategory,groupname,manager,expiration_date,schema_id\rdefault-2,,csvtestgroup1,[email protected]",
}
return api_request(
user,
"group_process_csv",
{"csv_header_and_data": header_and_data,
{"csv_header_and_data": headers_and_data[data_id],
"allow_update": True,
"delete_users": True}
)
Loading

0 comments on commit f8ee2dc

Please sign in to comment.