Skip to content

Commit

Permalink
YDA-5728: reduce number of queries for category statistics from O(n²)…
Browse files Browse the repository at this point in the history
… to O(1)
  • Loading branch information
lwesterhof committed Aug 15, 2024
1 parent defa169 commit 515994f
Showing 1 changed file with 56 additions and 54 deletions.
110 changes: 56 additions & 54 deletions resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,81 +151,83 @@ def api_resource_category_stats(ctx):
if len(categories) == 0:
return {'categories': [], 'external_filter': ''}

# Continue for admins and datamanagers
storage = {}

# Go through current groups of current categories.
# This function has no historic value so it is allowed to do so
for category in categories:
storage[category] = {'total': 0, 'research': 0, 'vault': 0, 'revision': 0, 'internal': 0, 'external': 0}

# for all groups in category
groups = get_groups_on_categories(ctx, [category])
for groupname in groups:
if groupname.startswith(('research', 'deposit', 'intake', 'grp')):
# Only check the most recent storage measurement
iter = list(genquery.Query(ctx,
['META_USER_ATTR_VALUE', 'ORDER_DESC(META_USER_ATTR_NAME)', 'USER_NAME', 'USER_GROUP_NAME'],
"META_USER_ATTR_VALUE like '[\"{}\",%%' AND META_USER_ATTR_NAME like '{}%%' AND USER_NAME = '{}'".format(category, constants.UUMETADATAGROUPSTORAGETOTALS, groupname),
offset=0, limit=1, output=genquery.AS_LIST))

for row in iter:
temp = jsonutil.parse(row[0])
# Retrieve storage statistics of groups.
iter = list(genquery.Query(ctx,
['USER_GROUP_NAME', 'ORDER_DESC(META_USER_ATTR_NAME)', 'META_USER_ATTR_VALUE'],
"META_USER_ATTR_NAME like '{}%%'".format(constants.UUMETADATAGROUPSTORAGETOTALS),
output=genquery.AS_LIST))

storage[category]['total'] += temp[4]
storage[category]['research'] += temp[1]
storage[category]['vault'] += temp[2]
storage[category]['revision'] += temp[3]
# Go through storage statistics of groups.
storage = {}
group_counted = []
for row in iter:
group_name = row[2]

# Now go through all totals
all_storage = []
# Check if group is valid and has not been counted yet.
if group_name.startswith(('research-', 'deposit-', 'intake-', 'grp-')) and group_name not in group_counted:
# Add group to list of groups counted for category statistics.
group_counted.append(group_name)

# Totalization for the entire instance.
instance_totals = {'total': 0, 'research': 0, 'vault': 0, 'revision': 0}
# Add group to category statistics.
category, research, vault, revisions, total = jsonutil.parse(row[0])
storage[category]['research'] += research
storage[category]['vault'] += vault
storage[category]['revision'] += revisions
storage[category]['total'] += total

# Member counts
cat_members = {}
members_total = []
for category in categories:
members = []
# this information is only available for yoda-admins
for groupname in get_groups_on_categories(ctx, [category]):
group_members = list(group.members(ctx, groupname))
for gm in group_members:
members.append(gm[0])
members_total.append(gm[0])
# deduplicate member list
cat_members[category] = list(set(members))
# Retrieve groups and their memebers.
iter = list(genquery.Query(ctx,
['USER_GROUP_NAME', 'USER_NAME'],
"USER_TYPE != 'rodsgroup'",
output=genquery.AS_LIST))

cat_members['YODA_INSTANCE_TOTAL'] = list(set(members_total))
members = {}

def count_externals(members):
return len([member for member in members if not yoda_names.is_internal_user(member)])
# Calaculate number of members per type per group.
for row in iter:
group_name = row[0]
user_name = row[1]

def count_internals(members):
return len([member for member in members if yoda_names.is_internal_user(member)])
members.setdefault(group_name, {'internal': 0, 'external': 0, 'total': 0})
if yoda_names.is_internal_user(user_name):
members[group_name]['internal'] += 1
else:
members[group_name]['external'] += 1
members[group_name]['total'] += 1

# Calculate cateory members and storage totals.
instance_totals = {'total': 0, 'research': 0, 'vault': 0, 'revision': 0, 'internals': 0, 'externals': 0}
all_storage = []
for category in categories:
storage_humanized = {}
# humanize storage sizes for the frontend
for type in ['total', 'research', 'vault', 'revision']:
# Calculate category members and totals.
internals = 0
externals = 0
for group_name in get_groups_on_categories(ctx, [category]):
internals += members[group_name]['internal']
externals += members[group_name]['external']

users = {'internals': internals, 'externals': externals}
instance_totals['internals'] += internals
instance_totals['externals'] += externals

# Humanize storage sizes for the frontend and calculate instance totals.
for type in ['research', 'vault', 'revision', 'total']:
storage_humanized[type] = misc.human_readable_size(1.0 * storage[category][type])
instance_totals[type] += 1.0 * storage[category][type]

users = {'internals': count_internals(cat_members[category]), 'externals': count_externals(cat_members[category])}
all_storage.append({'category': category,
'storage': storage_humanized,
'users': users})

# Add the yoda instance information as an extra row with category name YODA_INSTANCE_TOTAL
# So the frontend can distinguish instance totals from real category totals
users = {'internals': count_internals(cat_members['YODA_INSTANCE_TOTAL']), 'externals': count_externals(cat_members['YODA_INSTANCE_TOTAL'])}
# Add the Yoda instance information as an extra row with category name YODA_INSTANCE_TOTAL.
# So the frontend can distinguish instance totals from real category totals.
all_storage.append({'category': "YODA_INSTANCE_TOTAL",
'storage': {'total': misc.human_readable_size(instance_totals['total']),
'research': misc.human_readable_size(instance_totals['research']),
'vault': misc.human_readable_size(instance_totals['vault']),
'revision': misc.human_readable_size(instance_totals['revision'])},
'users': users})
'users': {'internals': instance_totals['internals'],
'externals': instance_totals['externals']}})

return {'categories': sorted(all_storage, key=lambda d: d['category']),
'external_filter': ', '.join(config.external_users_domain_filter)}
Expand Down

0 comments on commit 515994f

Please sign in to comment.