From c4a5aa2e2fb7e7bebe03c1c4af6b7e0e9e2a9e3d Mon Sep 17 00:00:00 2001 From: Rachel Wegener Date: Mon, 18 Dec 2023 15:21:12 +0000 Subject: [PATCH 1/3] add if statement to ignore groups in formatting --- h5coro/backends/xarray_h5coro.py | 102 ++++++++++++++++--------------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/h5coro/backends/xarray_h5coro.py b/h5coro/backends/xarray_h5coro.py index c6575aa..d5eac64 100644 --- a/h5coro/backends/xarray_h5coro.py +++ b/h5coro/backends/xarray_h5coro.py @@ -49,59 +49,61 @@ def open_dataset( variables, group_attr, groups = h5obj.list(group, w_attr=True) var_paths = [os.path.join(group, name) for name in variables.keys()] - # submit data request for variables and attributes and create data view - promise = h5obj.readDatasets(var_paths, block=True) - view = H5View(promise) - for step in group.split('/'): - if step != '': # First group will be '' if there was a leading `/` in the group path - view = view[step] - - # Format the data variables (and coordinate variables) + # if there were variables in that group, retrieve them variable_dicts = {} - coordinate_names = [] - for var in view.keys(): - # check dimensionality - if variables[var]['__metadata__'].ndims > 1: - # ignore 2d variables - warnings.warn((f'Variable {var} has more than 1 dimension. Reading variables with' - 'more than 1 dimension is not currently supported. This variable will be' - 'dropped.')) - continue - else: - # check for coordinate variables and add any coordinates to the coordinate_names list - try: - coord = re.split(';|,| |\n', variables[var]['coordinates']) - coord = [c for c in coord if c] - for c in coord: - if c not in coordinate_names: - coordinate_names.append(c) - except KeyError: - # if no coordinates were listed for that variable then set it's coordinate as itself - coord = [var] - - # add the variable contents as a tuple to the data variables dictionary - # (use only the first coordinate since xarray doesn't except more coordinates that dimensions) - if var in col_convs: - variable_dicts[var] = (coord[0], col_convs[var](view[var]), variables[var]) - else: - variable_dicts[var] = (coord[0], view[var], variables[var]) - - - # seperate out the coordinate variables from the data variables coords = {} - for coord_name in coordinate_names: - # drop the coordiante variable from variable_dicts - coordinate = variable_dicts.pop(coord_name) - # add the coordiante variable to the coords dictionary - coords[coord_name] = coordinate + if var_paths: + # submit data request for variables and attributes and create data view + promise = h5obj.readDatasets(var_paths, block=True) + view = H5View(promise) + for step in group.split('/'): + if step != '': # First group will be '' if there was a leading `/` in the group path + view = view[step] - # Ensure consistency of dimension coordinates - dimension_coordinates = [val[0] for val in variable_dicts.values()] - for coord_name, coordinate in coords.items(): - # For any of the coordinates that are dimension coordinates, ensure that their own coordinate - # is set to itself - if coord_name in dimension_coordinates: - coords[coord_name] = (coord_name, coordinate[1], coordinate[2]) + # Format the data variables (and coordinate variables) + coordinate_names = [] + for var in view.keys(): + # check dimensionality + if variables[var]['__metadata__'].ndims > 1: + # ignore 2d variables + warnings.warn((f'Variable {var} has more than 1 dimension. Reading variables with' + 'more than 1 dimension is not currently supported. This variable will be' + 'dropped.')) + continue + else: + # check for coordinate variables and add any coordinates to the coordinate_names list + try: + coord = re.split(';|,| |\n', variables[var]['coordinates']) + coord = [c for c in coord if c] + for c in coord: + if c not in coordinate_names: + coordinate_names.append(c) + except KeyError: + # if no coordinates were listed for that variable then set it's coordinate as itself + coord = [var] + + # add the variable contents as a tuple to the data variables dictionary + # (use only the first coordinate since xarray doesn't except more coordinates that dimensions) + if var in col_convs: + variable_dicts[var] = (coord[0], col_convs[var](view[var]), variables[var]) + else: + variable_dicts[var] = (coord[0], view[var], variables[var]) + + + # seperate out the coordinate variables from the data variables + for coord_name in coordinate_names: + # drop the coordiante variable from variable_dicts + coordinate = variable_dicts.pop(coord_name) + # add the coordiante variable to the coords dictionary + coords[coord_name] = coordinate + + # Ensure consistency of dimension coordinates + dimension_coordinates = [val[0] for val in variable_dicts.values()] + for coord_name, coordinate in coords.items(): + # For any of the coordinates that are dimension coordinates, ensure that their own coordinate + # is set to itself + if coord_name in dimension_coordinates: + coords[coord_name] = (coord_name, coordinate[1], coordinate[2]) return xr.Dataset( variable_dicts, From 1579b1d78d6162261ff60253e0a73d0b3cc9476f Mon Sep 17 00:00:00 2001 From: Rachel Wegener Date: Mon, 18 Dec 2023 15:26:57 +0000 Subject: [PATCH 2/3] switch to path joins --- h5coro/h5coro.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/h5coro/h5coro.py b/h5coro/h5coro.py index 5b97a99..bbc9725 100644 --- a/h5coro/h5coro.py +++ b/h5coro/h5coro.py @@ -27,6 +27,8 @@ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os + from h5coro.h5dataset import H5Dataset from h5coro.h5promise import H5Promise, massagePath from h5coro.h5metadata import H5Metadata @@ -152,10 +154,10 @@ def inspectPath(self, path, w_attr=True): metadata = self.metadataTable[path] # read each attribute - attr_paths = [f'{path}/{attribute}' for attribute in attributes] + attr_paths = [os.path.join(path, attribute) for attribute in attributes] promise = self.readDatasets(attr_paths, enableAttributes=True) for attribute in attributes: - attributes[attribute] = promise.datasets[f'{path}/{attribute}'].values + attributes[attribute] = promise.datasets[os.path.join(path, attribute)].values # return results return links, attributes, metadata @@ -176,7 +178,7 @@ def list(self, path, w_attr=True): # inspect each link to get metadata, attributes, group info, etc if len(links) > 0: executor = concurrent.futures.ThreadPoolExecutor(max_workers=(len(links) + len(attributes))) - futures = [executor.submit(inspectThread, self, f'{path}/{link}', w_attr) for link in links] + futures = [executor.submit(inspectThread, self, os.path.join(path, link), w_attr) for link in links] for future in concurrent.futures.as_completed(futures): name, metadata, attrs = future.result() # overwrites attribute set element = isolateElement(name, path) From 1e0e12730abdfc549b15fb87d47556f89a336194 Mon Sep 17 00:00:00 2001 From: Rachel Wegener Date: Mon, 18 Dec 2023 15:57:48 +0000 Subject: [PATCH 3/3] skip variables that don't include any data --- h5coro/backends/xarray_h5coro.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/h5coro/backends/xarray_h5coro.py b/h5coro/backends/xarray_h5coro.py index d5eac64..e1f245b 100644 --- a/h5coro/backends/xarray_h5coro.py +++ b/h5coro/backends/xarray_h5coro.py @@ -34,6 +34,8 @@ def open_dataset( earthaccess Auth object log_level: indicates level of debugging output to produce. Passed to h5coro logger.config() parameter logLevel + col_convs: the conversion dictionary that is used for the delta_time conversion. conversion code + for delta_time is in the datasets/icesat2.py module ''' # set h5coro config to info logger.config(log_level) @@ -66,9 +68,11 @@ def open_dataset( # check dimensionality if variables[var]['__metadata__'].ndims > 1: # ignore 2d variables - warnings.warn((f'Variable {var} has more than 1 dimension. Reading variables with' - 'more than 1 dimension is not currently supported. This variable will be' - 'dropped.')) + warnings.warn( + ('Variable {} has more than 1 dimension. Reading variables with' + 'more than 1 dimension is not currently supported. This variable will be' + 'dropped.'.format(var)) + ) continue else: # check for coordinate variables and add any coordinates to the coordinate_names list @@ -85,9 +89,16 @@ def open_dataset( # add the variable contents as a tuple to the data variables dictionary # (use only the first coordinate since xarray doesn't except more coordinates that dimensions) if var in col_convs: + # convert delta_time column to variable_dicts[var] = (coord[0], col_convs[var](view[var]), variables[var]) else: - variable_dicts[var] = (coord[0], view[var], variables[var]) + data = view[var] + if isinstance(data, np.ndarray): + variable_dicts[var] = (coord[0], data, variables[var]) + else: + warnings.warn( + 'Unable to read variable {} from the file. Skipping this variable'.format(var) + ) # seperate out the coordinate variables from the data variables