Skip to content

Commit

Permalink
Merge pull request #22 from ICESat2-SlideRule/xarray_groups
Browse files Browse the repository at this point in the history
expand type of groups xarray backend reader can handle
  • Loading branch information
rwegener2 authored Dec 18, 2023
2 parents 81612b3 + 1e0e127 commit 66841c6
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 53 deletions.
113 changes: 63 additions & 50 deletions h5coro/backends/xarray_h5coro.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ def open_dataset(
earthaccess Auth object
log_level: indicates level of debugging output to produce. Passed to h5coro logger.config()
parameter logLevel
col_convs: the conversion dictionary that is used for the delta_time conversion. conversion code
for delta_time is in the datasets/icesat2.py module
'''
# set h5coro config to info
logger.config(log_level)
Expand All @@ -49,59 +51,70 @@ def open_dataset(
variables, group_attr, groups = h5obj.list(group, w_attr=True)
var_paths = [os.path.join(group, name) for name in variables.keys()]

# submit data request for variables and attributes and create data view
promise = h5obj.readDatasets(var_paths, block=True)
view = H5View(promise)
for step in group.split('/'):
if step != '': # First group will be '' if there was a leading `/` in the group path
view = view[step]

# Format the data variables (and coordinate variables)
# if there were variables in that group, retrieve them
variable_dicts = {}
coordinate_names = []
for var in view.keys():
# check dimensionality
if variables[var]['__metadata__'].ndims > 1:
# ignore 2d variables
warnings.warn((f'Variable {var} has more than 1 dimension. Reading variables with'
'more than 1 dimension is not currently supported. This variable will be'
'dropped.'))
continue
else:
# check for coordinate variables and add any coordinates to the coordinate_names list
try:
coord = re.split(';|,| |\n', variables[var]['coordinates'])
coord = [c for c in coord if c]
for c in coord:
if c not in coordinate_names:
coordinate_names.append(c)
except KeyError:
# if no coordinates were listed for that variable then set it's coordinate as itself
coord = [var]

# add the variable contents as a tuple to the data variables dictionary
# (use only the first coordinate since xarray doesn't except more coordinates that dimensions)
if var in col_convs:
variable_dicts[var] = (coord[0], col_convs[var](view[var]), variables[var])
else:
variable_dicts[var] = (coord[0], view[var], variables[var])


# seperate out the coordinate variables from the data variables
coords = {}
for coord_name in coordinate_names:
# drop the coordiante variable from variable_dicts
coordinate = variable_dicts.pop(coord_name)
# add the coordiante variable to the coords dictionary
coords[coord_name] = coordinate
if var_paths:
# submit data request for variables and attributes and create data view
promise = h5obj.readDatasets(var_paths, block=True)
view = H5View(promise)
for step in group.split('/'):
if step != '': # First group will be '' if there was a leading `/` in the group path
view = view[step]

# Ensure consistency of dimension coordinates
dimension_coordinates = [val[0] for val in variable_dicts.values()]
for coord_name, coordinate in coords.items():
# For any of the coordinates that are dimension coordinates, ensure that their own coordinate
# is set to itself
if coord_name in dimension_coordinates:
coords[coord_name] = (coord_name, coordinate[1], coordinate[2])
# Format the data variables (and coordinate variables)
coordinate_names = []
for var in view.keys():
# check dimensionality
if variables[var]['__metadata__'].ndims > 1:
# ignore 2d variables
warnings.warn(
('Variable {} has more than 1 dimension. Reading variables with'
'more than 1 dimension is not currently supported. This variable will be'
'dropped.'.format(var))
)
continue
else:
# check for coordinate variables and add any coordinates to the coordinate_names list
try:
coord = re.split(';|,| |\n', variables[var]['coordinates'])
coord = [c for c in coord if c]
for c in coord:
if c not in coordinate_names:
coordinate_names.append(c)
except KeyError:
# if no coordinates were listed for that variable then set it's coordinate as itself
coord = [var]

# add the variable contents as a tuple to the data variables dictionary
# (use only the first coordinate since xarray doesn't except more coordinates that dimensions)
if var in col_convs:
# convert delta_time column to
variable_dicts[var] = (coord[0], col_convs[var](view[var]), variables[var])
else:
data = view[var]
if isinstance(data, np.ndarray):
variable_dicts[var] = (coord[0], data, variables[var])
else:
warnings.warn(
'Unable to read variable {} from the file. Skipping this variable'.format(var)
)


# seperate out the coordinate variables from the data variables
for coord_name in coordinate_names:
# drop the coordiante variable from variable_dicts
coordinate = variable_dicts.pop(coord_name)
# add the coordiante variable to the coords dictionary
coords[coord_name] = coordinate

# Ensure consistency of dimension coordinates
dimension_coordinates = [val[0] for val in variable_dicts.values()]
for coord_name, coordinate in coords.items():
# For any of the coordinates that are dimension coordinates, ensure that their own coordinate
# is set to itself
if coord_name in dimension_coordinates:
coords[coord_name] = (coord_name, coordinate[1], coordinate[2])

return xr.Dataset(
variable_dicts,
Expand Down
8 changes: 5 additions & 3 deletions h5coro/h5coro.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os

from h5coro.h5dataset import H5Dataset
from h5coro.h5promise import H5Promise, massagePath
from h5coro.h5metadata import H5Metadata
Expand Down Expand Up @@ -152,10 +154,10 @@ def inspectPath(self, path, w_attr=True):
metadata = self.metadataTable[path]

# read each attribute
attr_paths = [f'{path}/{attribute}' for attribute in attributes]
attr_paths = [os.path.join(path, attribute) for attribute in attributes]
promise = self.readDatasets(attr_paths, enableAttributes=True)
for attribute in attributes:
attributes[attribute] = promise.datasets[f'{path}/{attribute}'].values
attributes[attribute] = promise.datasets[os.path.join(path, attribute)].values

# return results
return links, attributes, metadata
Expand All @@ -176,7 +178,7 @@ def list(self, path, w_attr=True):
# inspect each link to get metadata, attributes, group info, etc
if len(links) > 0:
executor = concurrent.futures.ThreadPoolExecutor(max_workers=(len(links) + len(attributes)))
futures = [executor.submit(inspectThread, self, f'{path}/{link}', w_attr) for link in links]
futures = [executor.submit(inspectThread, self, os.path.join(path, link), w_attr) for link in links]
for future in concurrent.futures.as_completed(futures):
name, metadata, attrs = future.result() # overwrites attribute set
element = isolateElement(name, path)
Expand Down

0 comments on commit 66841c6

Please sign in to comment.