Skip to content

Commit

Permalink
Multi-Link API (#203)
Browse files Browse the repository at this point in the history
* Delete multiple links

* PUT_Links support for group __setitem__

* Multi-link options for group.get()

* Support retrieving multiple links by name

* Cleanup

---------

Co-authored-by: John Readey <[email protected]>
  • Loading branch information
mattjala and jreadey authored Jun 7, 2024
1 parent c56e40b commit 98c80f4
Show file tree
Hide file tree
Showing 3 changed files with 391 additions and 30 deletions.
166 changes: 143 additions & 23 deletions h5pyd/_hl/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -679,11 +679,23 @@ def __getitem__(self, name, track_order=False):
tgt._name = name
return tgt

def get(self, name, default=None, getclass=False, getlink=False, track_order=False):
def _objectify_link_Json(self, link_json):
if "id" in link_json:
link_obj = HardLink(link_json["id"])
elif "h5path" in link_json and "h5domain" not in link_json:
link_obj = SoftLink(link_json["h5path"])
elif "h5path" in link_json and "h5domain" in link_json:
link_obj = ExternalLink(link_json["h5domain"], link_json["h5path"])
else:
raise ValueError("Invalid link JSON")

return link_obj

def get(self, name, default=None, getclass=False, getlink=False, track_order=False, **kwds):
""" Retrieve an item or other information.
"name" given only:
Return the item, or "default" if it doesn't exist
Return the item with the given name, or "default" if nothing with that name exists
"getclass" is True:
Return the class of object (Group, Dataset, etc.), or "default"
Expand All @@ -697,6 +709,21 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
Return HardLink, SoftLink and ExternalLink classes. Return
"default" if nothing with that name exists.
"limit" is an integer:
If "name" is None, this will return the first "limit" links in the group.
"marker" is a string:
If "name" is None, this will return only the links that come after the marker in the group's link ordering.
"pattern" is a string:
If "name" is None, this will return only the links that match the given pattern
in the target group (and subgroups, if follow_links is provided).
Matching is done according to Unix pathname expansion rules.
"follow_links" is True:
If "name" is None, subgroups of the target group will be recursively searched
for links that match the given names or pattern.
Example:
>>> cls = group.get('foo', getclass=True)
Expand All @@ -709,7 +736,7 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
except KeyError:
return default

if name not in self:
if not isinstance(name, list) and name is not None and name not in self:
return default

elif getclass and not getlink:
Expand All @@ -726,23 +753,80 @@ def get(self, name, default=None, getclass=False, getlink=False, track_order=Fal
raise TypeError("Unknown object type")

elif getlink:
parent_uuid, link_json = self._get_link_json(name)
typecode = link_json['class']
if name is None or isinstance(name, list):
# Get all links in target group(s)
# Retrieve "limit", "marker", and "pattern" from kwds
limit = kwds.get("limit", None)
marker = kwds.get("marker", None)
pattern = kwds.get("pattern", None)
follow_links = kwds.get("follow_links", False)

if name and (limit or marker or pattern or follow_links):
raise ValueError("Cannot specify 'name' along with 'limit', 'marker', 'pattern', or 'follow_links'")

req = "/groups/" + self.id.uuid + "/links"
params = {}

if limit:
params["Limit"] = limit
if marker:
params["Marker"] = marker
if pattern:
params["pattern"] = pattern
if follow_links:
params["follow_links"] = 1
if track_order:
params["CreateOrder"] = 1

if name:
body = {}

titles = [linkname.decode('utf-8') if
isinstance(linkname, bytes) else linkname for linkname in name]
body['titles'] = titles
rsp = self.POST(req, body=body, params=params)
else:
rsp = self.GET(req, params=params)

if "links" in rsp:
# Process list of link objects so they may be accessed by name
links = rsp['links']
links_out = {}
if all([isUUID(k) for k in links]):
# Multiple groups queried, links are returned under group ids
for group_id in links:
group_links = {}

if typecode == 'H5L_TYPE_SOFT':
if getclass:
return SoftLink
for link in links[group_id]:
group_links[link["title"]] = self._objectify_link_Json(link)

return SoftLink(link_json['h5path'])
elif typecode == 'H5L_TYPE_EXTERNAL':
if getclass:
return ExternalLink
links_out[group_id] = group_links

return ExternalLink(link_json['h5domain'], link_json['h5path'])
elif typecode == 'H5L_TYPE_HARD':
return HardLink if getclass else HardLink()
else:
for link in links:
links_out[link["title"]] = self._objectify_link_Json(link)
else:
raise ValueError("Can't parse server response to links query")

return links_out
else:
raise TypeError("Unknown link type")
parent_uuid, link_json = self._get_link_json(name)
typecode = link_json['class']

if typecode == 'H5L_TYPE_SOFT':
if getclass:
return SoftLink

return SoftLink(link_json['h5path'])
elif typecode == 'H5L_TYPE_EXTERNAL':
if getclass:
return ExternalLink

return ExternalLink(link_json['h5domain'], link_json['h5path'])
elif typecode == 'H5L_TYPE_HARD':
return HardLink if getclass else HardLink(link_json['id'])
else:
raise TypeError("Unknown link type")

def __setitem__(self, name, obj):
""" Add an object to the group. The name must not already be in use.
Expand All @@ -768,7 +852,27 @@ def __setitem__(self, name, obj):
values are stored as scalar datasets. Raise ValueError if we
can't understand the resulting array dtype.
"""
if name.find('/') != -1:
if isinstance(name, list) and isinstance(obj, list):
if len(name) != len(obj):
raise ValueError("name and object list lengths do not match")

links = {}

for i in range(len(name)):
if isinstance(obj[i], HLObject):
links[name[i]] = {"id": obj[i].id.uuid}
elif isinstance(obj[i], SoftLink):
links[name[i]] = {"h5path": obj[i].path}
elif isinstance(obj[i], ExternalLink):
links[name[i]] = {"h5path": obj[i].path, "h5domain": obj[i].filename}
else:
raise ValueError("only links are supported for multiple object creation")

body = {"links": links}
req = "/groups/" + self.id.uuid + "/links"
self.PUT(req, body=body)

elif name.find('/') != -1:
parent_path = op.dirname(name)
basename = op.basename(name)
if not basename:
Expand Down Expand Up @@ -855,12 +959,20 @@ def __delitem__(self, name):
raise IOError("Not found")

else:
# delete the link, not an object
req = "/groups/" + self.id.uuid + "/links/" + name
# delete the link(s), not an object
if isinstance(name, list):
# delete multiple links
req = "/groups/" + self.id.uuid + "/links?titles=" + '/'.join(name)
else:
# delete single link
req = "/groups/" + self.id.uuid + "/links/" + name

self.DELETE(req)
if name.find('/') == -1 and name in self._link_db:
# remove from link cache
del self._link_db[name]

for n in name:
if n.find('/') == -1 and n in self._link_db:
# remove from link cache
del self._link_db[name]

def __len__(self):
""" Number of members attached to this group """
Expand Down Expand Up @@ -1186,8 +1298,16 @@ class HardLink(object):
Represents a hard link in an HDF5 file. Provided only so that
Group.get works in a sensible way. Has no other function.
"""
@property
# The uuid of the target object
def id(self):
return self._id

def __init__(self, id=None):
self._id = id

pass
def __repr__(self):
return f'<HardLink to "{self.id}">'


# TODO: implement equality testing for these
Expand Down
10 changes: 7 additions & 3 deletions h5pyd/_hl/httpconn.py
Original file line number Diff line number Diff line change
Expand Up @@ -439,9 +439,9 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):

check_cache = self._cache is not None and use_cache and format == "json"
check_cache = check_cache and params["domain"] == self._domain

if any(param in params for param in no_cache_params):
check_cache = False
check_cache = check_cache and "select" not in params and "query" not in params
check_cache = check_cache and "follow_links" not in params and "pattern" not in params
check_cache = check_cache and "Limit" not in params and "Marker" not in params

if check_cache:
self.log.debug("httpcon - checking cache")
Expand All @@ -453,6 +453,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
self.log.info(
f"GET: {self._endpoint + req} [{params['domain']}] timeout: {self._timeout}"
)

for k in params:
if k != "domain":
v = params[k]
Expand All @@ -467,6 +468,7 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):
stream = False
else:
stream = True

rsp = s.get(
self._endpoint + req,
params=params,
Expand Down Expand Up @@ -502,6 +504,8 @@ def GET(self, req, format="json", params=None, headers=None, use_cache=True):

add_to_cache = content_type and content_type.startswith("application/json")
add_to_cache = add_to_cache and content_length < MAX_CACHE_ITEM_SIZE and not req.endswith("/value")
add_to_cache = add_to_cache and "follow_links" not in params and "pattern" not in params
add_to_cache = add_to_cache and "Limit" not in params and "Marker" not in params

if add_to_cache:
# add to our _cache
Expand Down
Loading

0 comments on commit 98c80f4

Please sign in to comment.