Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SAM-173 Add label_data_links method and basic functionality #445

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion SampleService.html

Large diffs are not rendered by default.

26 changes: 26 additions & 0 deletions SampleService.spec
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ module SampleService {
node_id node;
boolean update;
boolean as_admin;
list<string> labels;
user as_user;
} CreateDataLinkParams;

Expand Down Expand Up @@ -487,6 +488,31 @@ module SampleService {
*/
funcdef expire_data_link(ExpireDataLinkParams params) returns() authentication required;

/* label_data_links parameters.

links - the the links to be labeled.
add_labels - the labels to be added to the links.
remove_labels - the labels to be removed from the links.
as_admin - run the method as a service administrator. The user must have full
administration permissions.
as_user - label the links as a different user. Ignored if as_admin is not true. Neither
the administrator nor the impersonated user need have permissions to the link if a
new version is saved.
*/
typedef structure {
list<DataLink> links;
list<string> add_labels;
list<string> remove_labels;
boolean as_admin;
user as_user;
} LabelDataLinksParams;

/* Label data links.

The user must have write permissions for the Workspace object.
*/
funcdef label_data_links(LabelDataLinksParams params) returns() authentication required;

/* get_data_links_from_sample parameters.

id - the sample ID.
Expand Down
47 changes: 47 additions & 0 deletions lib/SampleService/SampleServiceClient.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,53 @@ def expire_data_link(self, params, context=None):
return self._client.call_method('SampleService.expire_data_link',
[params], self._service_ver, context)

def label_data_links(self, params, context=None):
"""
Label data links.
The user must have write permissions for the Workspace object.
:param params: instance of type "LabelDataLinksParams"
(label_data_links parameters. links - the the links to be labeled.
add_labels - the labels to be added to the links. remove_labels -
the labels to be removed from the links. as_admin - run the method
as a service administrator. The user must have full administration
permissions. as_user - label the links as a different user.
Ignored if as_admin is not true. Neither the administrator nor the
impersonated user need have permissions to the link if a new
version is saved.) -> structure: parameter "links" of list of type
"DataLink" (A data link from a KBase workspace object to a sample.
upa - the workspace UPA of the linked object. dataid - the dataid
of the linked data, if any, within the object. If omitted the
entire object is linked to the sample. id - the sample id. version
- the sample version. node - the sample node. createdby - the user
that created the link. created - the time the link was created.
expiredby - the user that expired the link, if any. expired - the
time the link was expired, if at all.) -> structure: parameter
"linkid" of type "link_id" (A link ID. Must be globally unique.
Always assigned by the Sample service. Typically only of use to
service admins.), parameter "upa" of type "ws_upa" (A KBase
Workspace service Unique Permanent Address (UPA). E.g. 5/6/7 where
5 is the workspace ID, 6 the object ID, and 7 the object
version.), parameter "dataid" of type "data_id" (An id for a unit
of data within a KBase Workspace object. A single object may
contain many data units. A dataid is expected to be unique within
a single object. Must be less than 255 characters.), parameter
"id" of type "sample_id" (A Sample ID. Must be globally unique.
Always assigned by the Sample service.), parameter "version" of
type "version" (The version of a sample. Always > 0.), parameter
"node" of type "node_id" (A SampleNode ID. Must be unique within a
Sample and be less than 255 characters.), parameter "createdby" of
type "user" (A user's username.), parameter "created" of type
"timestamp" (A timestamp in epoch milliseconds.), parameter
"expiredby" of type "user" (A user's username.), parameter
"expired" of type "timestamp" (A timestamp in epoch
milliseconds.), parameter "add_labels" of list of String,
parameter "remove_labels" of list of String, parameter "as_admin"
of type "boolean" (A boolean value, 0 for false, 1 for true.),
parameter "as_user" of type "user" (A user's username.)
"""
return self._client.call_method('SampleService.label_data_links',
[params], self._service_ver, context)

def get_data_links_from_sample(self, params, context=None):
"""
Get data links to Workspace objects originating from a sample.
Expand Down
66 changes: 65 additions & 1 deletion lib/SampleService/SampleServiceImpl.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from SampleService.impl_methods import (
update_samples_acls as _update_samples_acls
)
from SampleService.core.workspace import DataUnitID

_CTX_USER = 'user_id'
_CTX_TOKEN = 'token'
Expand Down Expand Up @@ -708,7 +709,7 @@ def create_data_link(self, ctx, params):
# ctx is the context object
# return variables are: results
#BEGIN create_data_link
duid, sna, update = _create_data_link_params(params)
duid, sna, update, labels = _create_data_link_params(params)
as_admin, user = _get_admin_request_from_object(params, 'as_admin', 'as_user')
_check_admin(
self._user_lookup, ctx[_CTX_TOKEN], _AdminPermission.FULL,
Expand Down Expand Up @@ -887,6 +888,69 @@ def expire_data_link(self, ctx, params):
#END expire_data_link
pass

def label_data_links(self, ctx, params):
"""
Label data links.
The user must have write permissions for the Workspace object.
:param params: instance of type "LabelDataLinksParams"
(label_data_links parameters. links - the the links to be labeled.
add_labels - the labels to be added to the links. remove_labels -
the labels to be removed from the links. as_admin - run the method
as a service administrator. The user must have full administration
permissions. as_user - label the links as a different user.
Ignored if as_admin is not true. Neither the administrator nor the
impersonated user need have permissions to the link if a new
version is saved.) -> structure: parameter "links" of list of type
"DataLink" (A data link from a KBase workspace object to a sample.
upa - the workspace UPA of the linked object. dataid - the dataid
of the linked data, if any, within the object. If omitted the
entire object is linked to the sample. id - the sample id. version
- the sample version. node - the sample node. createdby - the user
that created the link. created - the time the link was created.
expiredby - the user that expired the link, if any. expired - the
time the link was expired, if at all.) -> structure: parameter
"linkid" of type "link_id" (A link ID. Must be globally unique.
Always assigned by the Sample service. Typically only of use to
service admins.), parameter "upa" of type "ws_upa" (A KBase
Workspace service Unique Permanent Address (UPA). E.g. 5/6/7 where
5 is the workspace ID, 6 the object ID, and 7 the object
version.), parameter "dataid" of type "data_id" (An id for a unit
of data within a KBase Workspace object. A single object may
contain many data units. A dataid is expected to be unique within
a single object. Must be less than 255 characters.), parameter
"id" of type "sample_id" (A Sample ID. Must be globally unique.
Always assigned by the Sample service.), parameter "version" of
type "version" (The version of a sample. Always > 0.), parameter
"node" of type "node_id" (A SampleNode ID. Must be unique within a
Sample and be less than 255 characters.), parameter "createdby" of
type "user" (A user's username.), parameter "created" of type
"timestamp" (A timestamp in epoch milliseconds.), parameter
"expiredby" of type "user" (A user's username.), parameter
"expired" of type "timestamp" (A timestamp in epoch
milliseconds.), parameter "add_labels" of list of String,
parameter "remove_labels" of list of String, parameter "as_admin"
of type "boolean" (A boolean value, 0 for false, 1 for true.),
parameter "as_user" of type "user" (A user's username.)
"""
# ctx is the context object
#BEGIN label_data_links
as_admin, user = _get_admin_request_from_object(params, 'as_admin', 'as_user')
_check_admin(
self._user_lookup, ctx[_CTX_TOKEN], _AdminPermission.FULL,
# pretty annoying to test ctx.log_info is working, do it manually
'label_data_links', ctx.log_info, as_user=user, skip_check=not as_admin)

duids = [DataUnitID(dl.get('ws_upa'), dl.get('data_id')) for dl in params.get('links')]

self._samples.label_data_links(
user if user else _UserID(ctx[_CTX_USER]),
duids,
params.get('add_labels',[]),
params.get('remove_labels',[]),
)
#END label_data_links
pass

def get_data_links_from_sample(self, ctx, params):
"""
Get data links to Workspace objects originating from a sample.
Expand Down
2 changes: 2 additions & 0 deletions lib/SampleService/core/api_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,7 @@ def create_data_link_params(params: Dict[str, Any]) -> Tuple[DataUnitID, SampleN
upa - workspace object UPA
dataid - ID of the data within the workspace object
update - whether the link should be updated
labels - list of labels to add to the link

:param params: the parameters.
:returns: a tuple consisting of:
Expand All @@ -541,6 +542,7 @@ def create_data_link_params(params: Dict[str, Any]) -> Tuple[DataUnitID, SampleN
_cast(str, _check_string_int(params, 'node', True))
)
duid = get_data_unit_id_from_object(params)
labels = params.get('labels', [])
return (duid, sna, bool(params.get('update')))


Expand Down
25 changes: 24 additions & 1 deletion lib/SampleService/core/data_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
'''

from __future__ import annotations
from typing import Optional, List

import datetime
import uuid
Expand All @@ -14,6 +15,7 @@
from SampleService.core.user import UserID
from SampleService.core.workspace import DataUnitID

_VALID_CONTROLLED_LABELS = ['canonical']

class DataLink:
'''
Expand All @@ -25,6 +27,7 @@ class DataLink:
:ivar created_by: the user that created the link.
:ivar expired: the expiration time or None if the link is not expired.
:ivar expired_by: the user that expired the link or None if the link is not expired.
:ivar controlled_labels: Controlled vocabulary labels for this link.
'''

def __init__(
Expand All @@ -35,7 +38,8 @@ def __init__(
created: datetime.datetime,
created_by: UserID,
expired: datetime.datetime = None,
expired_by: UserID = None):
expired_by: UserID = None,
controlled_labels: Optional[List[str]] = None):
'''
Create the link. If expired is provided expired_by must also be provided. If expired
is falsy expired_by is ignored.
Expand All @@ -47,6 +51,7 @@ def __init__(
:param created_by: the user that created the link.
:param expired: the expiration time for the link or None if the link is not expired.
:param expired_by: the user that expired the link or None if the link is not expired.
:param controlled_labels: Controlled vocabulary labels for this link.
'''
# may need to make this non ws specific. YAGNI for now.
self.id = _not_falsy(id_, 'id_')
Expand All @@ -56,6 +61,7 @@ def __init__(
self.created_by = _not_falsy(created_by, 'created_by')
self.expired = None
self.expired_by = None
self.controlled_labels = DataLink.validate_controlled_labels(controlled_labels)
if expired:
self.expired = _check_timestamp(expired, 'expired')
if expired < created:
Expand Down Expand Up @@ -93,3 +99,20 @@ def __eq__(self, other):
def __hash__(self):
return hash((self.id, self.duid, self.sample_node_address,
self.created, self.created_by, self.expired, self.expired_by))

@staticmethod
def validate_controlled_labels(labels: List[str] | None):
'''
Validate the controlled vocabulary labels.

:param labels: the labels to validate.
:returns: the validated labels.
'''
if not labels:
return []
normalized = [label.strip().lower() for label in labels]
bad_labels = [label for label in normalized if label not in _VALID_CONTROLLED_LABELS]
if bad_labels:
raise ValueError(f'invalid controlled vocabulary labels: {bad_labels}.'+
f'Valid labels are: {_VALID_CONTROLLED_LABELS}')
return labels
27 changes: 26 additions & 1 deletion lib/SampleService/core/samples.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,7 @@ def create_data_link(
user: UserID,
duid: DataUnitID,
sna: SampleNodeAddress,
labels: List[str],
update: bool = False,
as_admin: bool = False) -> DataLink:
'''
Expand All @@ -367,6 +368,7 @@ def create_data_link(
:param user: the user creating the link.
:param duid: the data unit to link the the sample.
:param sna: the sample node to link to the data unit.
:param labels: the labels to apply to the link.
:param update: True to expire any extant link if it does not link to the provided sample.
If False and a link from the data unit already exists, link creation will fail.
:param as_admin: allow link creation to proceed if user does not have
Expand All @@ -388,14 +390,37 @@ def create_data_link(
_not_falsy(sna, 'sna').sampleid, user, _SampleAccessType.ADMIN, as_admin=as_admin)
wsperm = _WorkspaceAccessType.NONE if as_admin else _WorkspaceAccessType.WRITE
self._ws.has_permission(user, wsperm, upa=duid.upa)
dl = DataLink(self._uuid_gen(), duid, sna, self._now(), user)
dl = DataLink(self._uuid_gen(), duid, sna, self._now(), user, controlled_labels=labels)
expired_id = self._storage.create_data_link(dl, update=update)
if self._kafka:
self._kafka.notify_new_link(dl.id)
if expired_id: # maybe make the notifier accept both notifications & send both?
self._kafka.notify_expired_link(expired_id)
return dl

def label_data_links(self, user: UserID, duids: List[DataUnitID], add_labels: List[str], remove_labels: List[str], as_admin: bool = False) -> None:
'''
Label a list of data links. The user must have admin access to the sample,
since labeling data grants permissions: once labeled, if a user
has access to the data unit, the user also has access to the sample.

:param user: the user labeling the links.
:param links: the links to label.
:param as_admin: allow label creation to proceed if user does not
'''
_not_falsy(user, 'user')
_not_falsy(duids, 'duids')
wsperm = _WorkspaceAccessType.NONE if as_admin else _WorkspaceAccessType.WRITE

# check permissions on the links' data objects
# as a set so we dont check permissions on the same workspace twice
required_workspaces = set(duid.upa.wsid for duid in duids)
for ws_id in required_workspaces:
self._ws.has_permission(user, wsperm, workspace_id=ws_id)

self._storage.label_data_links(duids, add_labels, remove_labels)


def expire_data_link(self, user: UserID, duid: DataUnitID, as_admin: bool = False) -> None:
'''
Expire a data link, ensuring that it will not show up in link queries without an effective
Expand Down
43 changes: 41 additions & 2 deletions lib/SampleService/core/storage/arango_sample_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@
_FLD_LINK_CREATED_BY = 'createby'
_FLD_LINK_EXPIRED = 'expired'
_FLD_LINK_EXPIRED_BY = 'expireby'
_FLD_LINK_CONTROLLED_LABELS = 'clabels'

# see https://www.arangodb.com/2018/07/time-traveling-with-graph-databases/
_ARANGO_MAX_INTEGER = 2**53 - 1
Expand Down Expand Up @@ -1364,7 +1365,8 @@ def _create_link_doc(self, link: DataLink, samplever: UUID):
# recording the integer version saves looking it up in the version doc and it's
# immutable so denormalization is ok here
_FLD_LINK_SAMPLE_INT_VERSION: sna.version,
_FLD_LINK_SAMPLE_NODE: sna.node
_FLD_LINK_SAMPLE_NODE: sna.node,
_FLD_LINK_CONTROLLED_LABELS: link.controlled_labels,
}

def _get_link_doc_from_link_id(self, id_):
Expand All @@ -1388,6 +1390,42 @@ def _get_link_doc_from_duid(self, duid):
raise _NoSuchLinkError(str(duid))
return linkdoc

def label_data_links(
self,
duids: List[DataUnitID],
add_labels: List[str],
remove_labels: List[str]) -> List[DataLink]:
'''Set or remove labels from a data link.'''
# validate labels to be added
normalized_add_labels = DataLink.validate_controlled_labels(add_labels)
# create transaction
tdb = self._db.begin_transaction(
read=self._col_data_link.name,
write=self._col_data_link.name)
try:
tdlc = tdb.collection(self._col_data_link.name)
linkdocs = []
for duid in duids:
linkdoc = self._get_link_doc_from_duid(duid)
labels = linkdoc[_FLD_LINK_CONTROLLED_LABELS]
# add and remove labels from doc
for rem in remove_labels:
if rem in labels:
labels.remove(rem)
for add in normalized_add_labels:
if add not in labels:
labels.append(add)
# update the link doc (in transaction)
tdlc.update(linkdoc)
linkdocs.append(linkdoc)
# nothing thrown, so commit the transaction
self._commit_transaction(tdb)
return linkdocs
finally:
# rollback if an exception was thrown
self._abort_transaction(tdb)


def expire_data_link(
self,
expired: datetime.datetime,
Expand Down Expand Up @@ -1509,7 +1547,8 @@ def _doc_to_link(self, doc) -> DataLink:
self._timestamp_to_datetime(self._timestamp_milliseconds_to_seconds(doc[_FLD_LINK_CREATED])),
UserID(doc[_FLD_LINK_CREATED_BY]),
None if ex == _ARANGO_MAX_INTEGER else self._timestamp_to_datetime(self._timestamp_milliseconds_to_seconds(ex)),
UserID(doc[_FLD_LINK_EXPIRED_BY]) if doc[_FLD_LINK_EXPIRED_BY] else None
UserID(doc[_FLD_LINK_EXPIRED_BY]) if doc[_FLD_LINK_EXPIRED_BY] else None,
doc.get(_FLD_LINK_CONTROLLED_LABELS, [])
)

def _doc_to_dataunit_id(self, doc) -> DataUnitID:
Expand Down
Loading