diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 1550d81..525b038 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -5,14 +5,13 @@ import numpy as np from netCDF4 import Dataset - -# from numpy import ndarray +from pyproj import CRS, Transformer from varinfo import VariableFromDmr, VarInfoFromDmr from hoss.exceptions import ( IncompatibleCoordinateVariables, + InvalidCoordinateData, InvalidCoordinateDataset, - InvalidCoordinateVariable, MissingCoordinateVariable, MissingVariable, ) @@ -97,24 +96,28 @@ def get_coordinate_variables( varinfo: VarInfoFromDmr, requested_variables: list, ) -> tuple[list, list]: - """This function returns latitude and longitude variables listed in the - CF-Convention coordinates metadata attribute. It returns them in a specific - order [latitude, longitude]" + """This function returns latitude and longitude variable names from + latitude and longitude variables listed in the CF-Convention coordinates + metadata attribute. It returns them in a specific + order [latitude_name, longitude_name]" """ - coordinate_variables_list = varinfo.get_references_for_attribute( + coordinate_variables = varinfo.get_references_for_attribute( requested_variables, 'coordinates' ) + latitude_coordinate_variables = [ coordinate - for coordinate in coordinate_variables_list - if varinfo.get_variable(coordinate).is_latitude() + for coordinate in coordinate_variables + if varinfo.get_variable(coordinate) is not None + and varinfo.get_variable(coordinate).is_latitude() ] longitude_coordinate_variables = [ coordinate - for coordinate in coordinate_variables_list - if varinfo.get_variable(coordinate).is_longitude() + for coordinate in coordinate_variables + if varinfo.get_variable(coordinate) is not None + and varinfo.get_variable(coordinate).is_longitude() ] return latitude_coordinate_variables, longitude_coordinate_variables @@ -162,12 +165,17 @@ def get_coordinate_array( return coordinate_array -def get_1D_dim_array_data_from_dimvalues( - dim_values: np.ndarray, dim_indices: np.ndarray, dim_size: int +def get_1d_dim_array_data_from_dimvalues( + dim_values: np.ndarray, + dim_indices: np.ndarray, + dim_size: int, ) -> np.ndarray: """ - return a full dimension data array based on the 2 projected points and - grid size + return a full dimension data array based upon 2 valid projected values + (x and y) given in dim_values which are within the indices given in + dim_indices and the full dimension size provided in dim_size. The + dim_indices need to be between 0 and less than the dim_size. + returns a 1D array of size = dim_size with proper dimension array values """ if (dim_indices[1] != dim_indices[0]) and (dim_values[1] != dim_values[0]): @@ -175,46 +183,216 @@ def get_1D_dim_array_data_from_dimvalues( dim_indices[1] - dim_indices[0] ) else: - raise InvalidCoordinateDataset(dim_values[0], dim_indices[0]) + raise InvalidCoordinateData( + 'No distinct valid coordinate points - ' + f'dim_index={dim_indices[0]}, dim_value={dim_values[0]}' + ) dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1])) + return np.linspace(dim_min, dim_max, dim_size) def get_valid_indices( - coordinate_row_col: np.ndarray, coordinate: VariableFromDmr + lat_lon_array: np.ndarray, coordinate: VariableFromDmr ) -> np.ndarray: """ - Returns indices of a valid array without fill values if the fill - value is provided. If it is not provided, we check for valid values - for latitude and longitude + Returns an array of boolean values + - true, false - indicating a valid value (non-fill, within range) + for a given coordinate variable. A value of True means the + value is valid + - latitude or longitude - or + returns an empty ndarray of size (0,0) for any other variable. + """ + # get_attribute_value returns a value of type `str` coordinate_fill = coordinate.get_attribute_value('_FillValue') if coordinate_fill is not None: - is_not_fill = ~np.isclose(coordinate_row_col, float(coordinate_fill)) + is_not_fill = ~np.isclose(lat_lon_array, float(coordinate_fill)) else: # Creates an entire array of `True` values. - is_not_fill = np.ones_like(coordinate_row_col, dtype=bool) + is_not_fill = np.ones_like(lat_lon_array, dtype=bool) if coordinate.is_longitude(): - valid_indices = np.where( - np.logical_and( - is_not_fill, - np.logical_and( - coordinate_row_col >= -180.0, coordinate_row_col <= 360.0 - ), - ) - )[0] + valid_indices = np.logical_and( + is_not_fill, + np.logical_and(lat_lon_array >= -180.0, lat_lon_array <= 360.0), + ) elif coordinate.is_latitude(): - valid_indices = np.where( - np.logical_and( - is_not_fill, - np.logical_and(coordinate_row_col >= -90.0, coordinate_row_col <= 90.0), - ) - )[0] + valid_indices = np.logical_and( + is_not_fill, + np.logical_and(lat_lon_array >= -90.0, lat_lon_array <= 90.0), + ) else: - valid_indices = np.empty((0, 0)) + raise InvalidCoordinateDataset(coordinate.full_name_path) return valid_indices + + +def get_x_y_values_from_geographic_points( + points: list[tuple], + crs: CRS, +) -> list[tuple]: + """Takes an input list of (longitude, latitude) coordinates and CRS object + from PyProj and projects those points to the target grid. Then returns + the list of x-y points as tuple in (x,y) order + + """ + point_longitudes, point_latitudes = zip(*list(points)) + + from_geo_transformer = Transformer.from_crs(4326, crs) + points_x, points_y = ( # pylint: disable=unpacking-non-sequence + from_geo_transformer.transform(point_latitudes, point_longitudes) + ) + + return list(zip(points_x, points_y)) + + +def get_valid_row_col_pairs( + lat_arr: np.ndarray, + lon_arr: np.ndarray, + lat_coordinate: VariableFromDmr, + lon_coordinate: VariableFromDmr, +) -> tuple[list, list]: + """ + This function finds a set of indices maximally spread across + a row, and the set maximally spread across a column, with the + indices being valid in both the latitude and longitude datasets. + When interpolating between these points, the maximal spread + ensures the greatest interpolation accuracy. + """ + valid_lat_lon_mask = np.logical_and( + get_valid_indices(lat_arr, lat_coordinate), + get_valid_indices(lon_arr, lon_coordinate), + ) + + # get maximally spread points within rows + max_x_spread_pts = get_max_x_spread_pts(~valid_lat_lon_mask) + + # Doing the same for the columns is done by transposing the valid_mask + # and then fixing the results from [x, y] to [y, x]. + max_y_spread_trsp = get_max_x_spread_pts(np.transpose(~valid_lat_lon_mask)) + max_y_spread_pts = [ + list(np.flip(max_y_spread_trsp[0])), + list(np.flip(max_y_spread_trsp[1])), + ] + + return max_y_spread_pts, max_x_spread_pts + + +def get_max_x_spread_pts( + valid_geospatial_mask: np.ndarray, +) -> list[list]: + """ + This function returns two data points by x, y indices that are spread farthest + from each other in the same row, i.e., have the greatest delta-x value - and + are valid data points from the valid_geospatial_mask array passed in. The input array + must be a 2D Numpy mask array providing the valid data points, e.g., filtering + out fill values and out-of-range values. + - input is Numpy Mask Array, e.g., invalid latitudes & longitudes + - returns 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] + """ + # fill a sample array with x-index values, x_ind[i, j] = j + x_ind = np.indices( + (valid_geospatial_mask.shape[0], valid_geospatial_mask.shape[1]) + )[1] + + # mask x_ind to hide the invalid data points + valid_x_ind = np.ma.array(x_ind, mask=valid_geospatial_mask) + + if valid_x_ind.count() == 0: + raise InvalidCoordinateData("No valid coordinate data") + + # ptp (peak-to-peak) finds the greatest delta-x value amongst valid points + # for each row. Result is 1D + x_ind_spread = valid_x_ind.ptp(axis=1) + + # This finds which row has the greatest spread (delta-x) + max_x_spread_row = np.argmax(x_ind_spread) + + # Using the row reference, find the min-x and max-x + min_x_ind = np.min(valid_x_ind[max_x_spread_row]) + max_x_ind = np.max(valid_x_ind[max_x_spread_row]) + + # There is just one valid point + if min_x_ind == max_x_ind: + raise InvalidCoordinateData("Only one valid point in coordinate data") + + return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] + + +def create_dimension_arrays_from_coordinates( + prefetch_dataset: Dataset, + latitude_coordinate: VariableFromDmr, + longitude_coordinate: VariableFromDmr, + crs: CRS, + projected_dimension_names: list, +) -> dict[str, np.ndarray]: + """Generate artificial 1D dimensions scales for each + 2D dimension or coordinate variable. + 1) Get 2 valid geo grid points + 2) convert them to a projected x-y extent + 3) Generate the x-y dimscale array and return to the calling method + + """ + lat_arr = get_coordinate_array( + prefetch_dataset, + latitude_coordinate.full_name_path, + ) + lon_arr = get_coordinate_array( + prefetch_dataset, + longitude_coordinate.full_name_path, + ) + row_size, col_size = get_row_col_sizes_from_coordinate_datasets( + lat_arr, + lon_arr, + ) + + row_indices, col_indices = get_valid_row_col_pairs( + lat_arr, lon_arr, latitude_coordinate, longitude_coordinate + ) + + y_dim = get_dimension_array_from_geo_points( + lat_arr, lon_arr, crs, row_indices, row_size, True + ) + + x_dim = get_dimension_array_from_geo_points( + lat_arr, lon_arr, crs, col_indices, col_size, False + ) + + projected_y, projected_x = tuple(projected_dimension_names) + return {projected_y: y_dim, projected_x: x_dim} + + +def get_dimension_array_from_geo_points( + lat_arr: np.ndarray, + lon_arr: np.ndarray, + crs: CRS, + dimension_indices: list, + dimension_size: int, + is_row=True, +) -> np.ndarray: + """This function uses the list of lat/lon points corresponding + to a list of array indices and reprojects it with the CRS + provided and scales the x/y values to a dimension array with the dimension + size provided + + """ + if is_row: + index_for_dimension = 0 + x_or_y_index = 1 + else: + index_for_dimension = 1 + x_or_y_index = 0 + + geo_grid_points = [ + (lon_arr[row, col], lat_arr[row, col]) for row, col in dimension_indices + ] + x_y_values = get_x_y_values_from_geographic_points(geo_grid_points, crs) + indices_for_dimension = np.transpose(dimension_indices)[index_for_dimension] + dimension_values = np.transpose(x_y_values)[x_or_y_index] + return get_1d_dim_array_data_from_dimvalues( + dimension_values, indices_for_dimension, dimension_size + ) diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index 9ee8d02..61c634f 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -22,8 +22,14 @@ from numpy.ma.core import MaskedArray from varinfo import VariableFromDmr, VarInfoFromDmr -from hoss.bbox_utilities import flatten_list -from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange +from hoss.coordinate_utilities import ( + get_coordinate_variables, + get_projected_dimension_names_from_coordinate_variables, +) +from hoss.exceptions import ( + InvalidNamedDimension, + InvalidRequestedRange, +) from hoss.utilities import ( format_variable_set_string, get_opendap_nc4, @@ -55,7 +61,7 @@ def is_index_subset(message: Message) -> bool: ) -def prefetch_dimension_variables( +def get_prefetch_variables( opendap_url: str, varinfo: VarInfoFromDmr, required_variables: Set[str], @@ -64,48 +70,45 @@ def prefetch_dimension_variables( access_token: str, config: Config, ) -> str: - """Determine the dimensions that need to be "pre-fetched" from OPeNDAP in + """Determine the variables that need to be "pre-fetched" from OPeNDAP in order to derive index ranges upon them. Initially, this was just spatial and temporal dimensions, but to support generic dimension subsets, all required dimensions must be prefetched, along with any associated bounds variables referred to via the "bounds" metadata - attribute. + attribute. In cases where dimension variables do not exist, coordinate + variables will be prefetched and used to calculate dimension-scale values """ - required_dimensions = varinfo.get_required_dimensions(required_variables) - - # Iterate through all requested dimensions and extract a list of bounds - # references for each that has any. This will produce a list of lists, - # which should be flattened into a single list and then combined into a set - # to remove duplicates. - bounds = set( - flatten_list( - [ - list(varinfo.get_variable(dimension).references.get('bounds')) - for dimension in required_dimensions - if varinfo.get_variable(dimension).references.get('bounds') is not None - ] + prefetch_variables = varinfo.get_required_dimensions(required_variables) + if prefetch_variables: + prefetch_variables.update( + varinfo.get_references_for_attribute(prefetch_variables, 'bounds') + ) + else: + latitude_coordinates, longitude_coordinates = get_coordinate_variables( + varinfo, required_variables ) - ) - required_dimensions.update(bounds) + if latitude_coordinates and longitude_coordinates: + prefetch_variables = set(latitude_coordinates + longitude_coordinates) logger.info( 'Variables being retrieved in prefetch request: ' - f'{format_variable_set_string(required_dimensions)}' + f'{format_variable_set_string(prefetch_variables)}' ) - required_dimensions_nc4 = get_opendap_nc4( - opendap_url, required_dimensions, output_dir, logger, access_token, config + prefetch_variables_nc4 = get_opendap_nc4( + opendap_url, prefetch_variables, output_dir, logger, access_token, config ) # Create bounds variables if necessary. - add_bounds_variables(required_dimensions_nc4, required_dimensions, varinfo, logger) - - return required_dimensions_nc4 + check_add_artificial_bounds( + prefetch_variables_nc4, prefetch_variables, varinfo, logger + ) + return prefetch_variables_nc4 -def add_bounds_variables( +def check_add_artificial_bounds( dimensions_nc4: str, required_dimensions: Set[str], varinfo: VarInfoFromDmr, @@ -409,7 +412,9 @@ def get_dimension_indices_from_bounds( def add_index_range( - variable_name: str, varinfo: VarInfoFromDmr, index_ranges: IndexRanges + variable_name: str, + varinfo: VarInfoFromDmr, + index_ranges: IndexRanges, ) -> str: """Append the index ranges of each dimension for the specified variable. If there are no dimensions with listed index ranges, then the full @@ -418,29 +423,54 @@ def add_index_range( the antimeridian or Prime Meridian) will have a minimum index greater than the maximum index. In this case the full dimension range should be requested, as the related values will be masked before returning the - output to the user. + output to the user. When a variable does not have named dimensions, + the index_ranges cache is checked for dimensions derived from the + coordinates CF-Conventions metadata attribute. """ variable = varinfo.get_variable(variable_name) - range_strings = [] - for dimension in variable.dimensions: - dimension_range = index_ranges.get(dimension) + if variable.dimensions: + variable_dimensions = variable.dimensions + else: + # Anonymous dimensions, so check for dimension derived from coordinates: + variable_dimensions = get_projected_dimension_names_from_coordinate_variables( + varinfo, variable_name + ) - if dimension_range is not None and dimension_range[0] <= dimension_range[1]: - range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]') - else: - range_strings.append('[]') + range_strings = get_range_strings(variable_dimensions, index_ranges) if all(range_string == '[]' for range_string in range_strings): indices_string = '' else: indices_string = ''.join(range_strings) - return f'{variable_name}{indices_string}' +def get_range_strings( + variable_dimensions: list, + index_ranges: IndexRanges, +) -> list: + """Retrieves index ranges which is a list of string elements + [min:max] from cache. If there is not an index range in the + cache for a dimension, the returned string is []. A bounding box + can cross the longitudinal edge of the grid. In those cases the + minimum dimension index is greater than the maximum dimension + index and this function will return []. HOSS will request the + full dimension range from OPeNDAP when the index range is []. + """ + range_strings = [] + for dimension in variable_dimensions: + dimension_range = index_ranges.get(dimension) + if dimension_range is not None and dimension_range[0] <= dimension_range[1]: + range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]') + else: + range_strings.append('[]') + + return range_strings + + def get_fill_slice(dimension: str, fill_ranges: IndexRanges) -> slice: """Check the dictionary of dimensions that need to be filled for the given dimension. If present, the minimum index will be greater than the @@ -549,7 +579,12 @@ def get_dimension_bounds( be returned. """ - bounds = varinfo.get_variable(dimension_name).references.get('bounds') + try: + # For pseudo-variables, `varinfo.get_variable` returns `None` and + # therefore has no `references` attribute. + bounds = varinfo.get_variable(dimension_name).references.get('bounds') + except AttributeError: + bounds = None if bounds is not None: try: diff --git a/hoss/exceptions.py b/hoss/exceptions.py index 26ba3f8..18ab700 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -165,19 +165,31 @@ def __init__(self, longitude_shape, latitude_shape): ) -class InvalidCoordinateDataset(CustomError): - """This exception is raised when the two values passed to - the function computing the resolution are equal. This could - occur when there are too many fill values and distinct valid +class InvalidCoordinateData(CustomError): + """This exception is raised when the data does not contain at least. + two valid points. This could occur when there are too many fill values and distinct valid indices could not be obtained """ - def __init__(self, dim_value, dim_index): + def __init__(self, custom_msg): + super().__init__( + 'InvalidCoordinateData', + f'{custom_msg}', + ) + + +class InvalidCoordinateDataset(CustomError): + """This exception is raised when there are too + many fill values and two distinct valid indices + could not be obtained + + """ + + def __init__(self, coordinate_name): super().__init__( 'InvalidCoordinateDataset', - 'Cannot compute the dimension resolution for ' - f'dim_value: "{dim_value}" dim_index: "{dim_index}"', + f'Cannot get valid indices for {coordinate_name}', ) diff --git a/hoss/projection_utilities.py b/hoss/projection_utilities.py index bdacdc0..cc21f9a 100644 --- a/hoss/projection_utilities.py +++ b/hoss/projection_utilities.py @@ -49,6 +49,11 @@ def get_variable_crs(variable: str, varinfo: VarInfoFromDmr) -> CRS: another are stored in the `Variable.references` dictionary attribute as sets. There should only be one reference in the `grid_mapping` attribute value, so the first element of the set is retrieved. + If the grid mapping variable, as referred to in the grid_mapping + CF-Convention metadata attribute, does not exist in the file then + the earthdata-varinfo configuration file is checked, as it may + contain metadata overrides specified for that non-existent variable + name. """ grid_mapping = next( @@ -57,9 +62,21 @@ def get_variable_crs(variable: str, varinfo: VarInfoFromDmr) -> CRS: if grid_mapping is not None: try: - crs = CRS.from_cf(varinfo.get_variable(grid_mapping).attributes) + grid_mapping_variable = varinfo.get_variable(grid_mapping) + if grid_mapping_variable is not None: + cf_attributes = grid_mapping_variable.attributes + else: + # check for any overrides + cf_attributes = varinfo.get_missing_variable_attributes(grid_mapping) + + if cf_attributes: + crs = CRS.from_cf(cf_attributes) + else: + raise MissingGridMappingVariable(grid_mapping, variable) + except AttributeError as exception: raise MissingGridMappingVariable(grid_mapping, variable) from exception + else: raise MissingGridMappingMetadata(variable) diff --git a/hoss/spatial.py b/hoss/spatial.py index 79eb5c2..917c526 100644 --- a/hoss/spatial.py +++ b/hoss/spatial.py @@ -27,7 +27,7 @@ from harmony.message import Message from netCDF4 import Dataset from numpy.ma.core import MaskedArray -from varinfo import VarInfoFromDmr +from varinfo import VariableFromDmr, VarInfoFromDmr from hoss.bbox_utilities import ( BBox, @@ -35,6 +35,12 @@ get_harmony_message_bbox, get_shape_file_geojson, ) +from hoss.coordinate_utilities import ( + create_dimension_arrays_from_coordinates, + get_coordinate_variables, + get_projected_dimension_names_from_coordinate_variables, + get_variables_with_anonymous_dims, +) from hoss.dimension_utilities import ( IndexRange, IndexRanges, @@ -78,6 +84,10 @@ def get_spatial_index_ranges( around the exterior of the user-defined GeoJSON shape, to ensure the correct extents are derived. + If geographic and projected dimensions are not specified in the granule, + the coordinate datasets are used to calculate the x-y dimensions and the index ranges + are calculated similar to a projected grid. + """ bounding_box = get_harmony_message_bbox(harmony_message) index_ranges = {} @@ -91,7 +101,7 @@ def get_spatial_index_ranges( ) with Dataset(dimensions_path, 'r') as dimensions_file: - if len(geographic_dimensions) > 0: + if geographic_dimensions: # If there is no bounding box, but there is a shape file, calculate # a bounding box to encapsulate the GeoJSON shape: if bounding_box is None and shape_file_path is not None: @@ -103,7 +113,7 @@ def get_spatial_index_ranges( dimension, varinfo, dimensions_file, bounding_box ) - if len(projected_dimensions) > 0: + if projected_dimensions: for non_spatial_variable in non_spatial_variables: index_ranges.update( get_projected_x_y_index_ranges( @@ -115,6 +125,26 @@ def get_spatial_index_ranges( shape_file_path=shape_file_path, ) ) + variables_with_anonymous_dims = get_variables_with_anonymous_dims( + varinfo, required_variables + ) + for variable_with_anonymous_dims in variables_with_anonymous_dims: + latitude_coordinates, longitude_coordinates = get_coordinate_variables( + varinfo, [variable_with_anonymous_dims] + ) + if latitude_coordinates and longitude_coordinates: + index_ranges.update( + get_x_y_index_ranges_from_coordinates( + variable_with_anonymous_dims, + varinfo, + dimensions_file, + varinfo.get_variable(latitude_coordinates[0]), + varinfo.get_variable(longitude_coordinates[0]), + index_ranges, + bounding_box=bounding_box, + shape_file_path=shape_file_path, + ) + ) return index_ranges @@ -187,6 +217,79 @@ def get_projected_x_y_index_ranges( return x_y_index_ranges +def get_x_y_index_ranges_from_coordinates( + non_spatial_variable: str, + varinfo: VarInfoFromDmr, + prefetch_coordinate_datasets: Dataset, + latitude_coordinate: VariableFromDmr, + longitude_coordinate: VariableFromDmr, + index_ranges: IndexRanges, + bounding_box: BBox = None, + shape_file_path: str = None, +) -> IndexRanges: + """This function returns a dictionary containing the minimum and maximum + index ranges for the projected_x and projected_y recalculated dimension scales + + index_ranges = {'projected_x': (20, 42), 'projected_y': (31, 53)} + + This method is called when the CF standards are not followed + in the source granule and only coordinate datasets are provided. + The coordinate datasets along with the crs is used to calculate + the x-y projected dimension scales. The dimensions of the input, + non-spatial variable are checked for associated coordinates. If + these are present, and they have not already been added to the + `index_ranges` cache, the extents of the input spatial subset + are determined in these projected coordinates. The minimum and + maximum values are then derived from these projected coordinate + points. + + """ + + crs = get_variable_crs(non_spatial_variable, varinfo) + + projected_dimension_names = get_projected_dimension_names_from_coordinate_variables( + varinfo, non_spatial_variable + ) + + dimension_arrays = create_dimension_arrays_from_coordinates( + prefetch_coordinate_datasets, + latitude_coordinate, + longitude_coordinate, + crs, + projected_dimension_names, + ) + + projected_y, projected_x = dimension_arrays.keys() + + if not set((projected_x, projected_y)).issubset(set(index_ranges.keys())): + + x_y_extents = get_projected_x_y_extents( + dimension_arrays[projected_x][:], + dimension_arrays[projected_y][:], + crs, + shape_file=shape_file_path, + bounding_box=bounding_box, + ) + + x_index_ranges = get_dimension_index_range( + dimension_arrays[projected_x][:], + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=None, + ) + y_index_ranges = get_dimension_index_range( + dimension_arrays[projected_y][:], + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=None, + ) + x_y_index_ranges = {projected_x: x_index_ranges, projected_y: y_index_ranges} + else: + x_y_index_ranges = {} + + return x_y_index_ranges + + def get_geographic_index_range( dimension: str, varinfo: VarInfoFromDmr, diff --git a/hoss/subset.py b/hoss/subset.py index 8a01321..a08e590 100644 --- a/hoss/subset.py +++ b/hoss/subset.py @@ -21,13 +21,17 @@ IndexRanges, add_index_range, get_fill_slice, + get_prefetch_variables, get_requested_index_ranges, is_index_subset, - prefetch_dimension_variables, ) from hoss.spatial import get_spatial_index_ranges from hoss.temporal import get_temporal_index_ranges -from hoss.utilities import download_url, format_variable_set_string, get_opendap_nc4 +from hoss.utilities import ( + download_url, + format_variable_set_string, + get_opendap_nc4, +) def subset_granule( @@ -87,7 +91,7 @@ def subset_granule( if request_is_index_subset: # Prefetch all dimension variables in full: - dimensions_path = prefetch_dimension_variables( + dimensions_path = get_prefetch_variables( opendap_url, varinfo, required_variables, @@ -122,6 +126,7 @@ def subset_granule( shape_file_path = get_request_shape_file( harmony_message, output_dir, logger, config ) + index_ranges.update( get_spatial_index_ranges( required_variables, diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 1c7aa04..dc0b2a1 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -7,22 +7,27 @@ from harmony.util import config from netCDF4 import Dataset from numpy.testing import assert_array_equal +from pyproj import CRS from varinfo import VarInfoFromDmr from hoss.coordinate_utilities import ( any_absent_dimension_variables, - get_1D_dim_array_data_from_dimvalues, + get_1d_dim_array_data_from_dimvalues, get_coordinate_array, get_coordinate_variables, + get_dimension_array_from_geo_points, + get_max_x_spread_pts, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, get_row_col_sizes_from_coordinate_datasets, get_valid_indices, + get_valid_row_col_pairs, get_variables_with_anonymous_dims, + get_x_y_values_from_geographic_points, ) from hoss.exceptions import ( IncompatibleCoordinateVariables, - InvalidCoordinateDataset, + InvalidCoordinateData, InvalidCoordinateVariable, MissingCoordinateVariable, MissingVariable, @@ -74,48 +79,6 @@ def setUpClass(cls): ] ) - cls.lon_arr_reversed = np.array( - [ - [ - -179.3, - -179.3, - -179.3, - -179.3, - -9999, - -9999, - -179.3, - -179.3, - -179.3, - -179.3, - ], - [ - -120.2, - -120.2, - -120.2, - -9999, - -9999, - -120.2, - -120.2, - -120.2, - -120.2, - -120.2, - ], - [20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, -9999, -9999], - [150.5, 150.5, 150.5, 150.5, 150.5, 150.5, -9999, -9999, 150.5, 150.5], - [178.4, 178.4, 178.4, 178.4, 178.4, 178.4, 178.4, -9999, 178.4, 178.4], - ] - ) - - cls.lat_arr_reversed = np.array( - [ - [89.3, 79.3, -9999, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, 79.3, 60.3, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, -9999, 60.3, 59.3, 29.3, 2.1, -9999, -9999, -9999, -89.3], - [-9999, 79.3, -60.3, -9999, -9999, -9999, -60.2, -59.3, -79.3, -89.3], - [-89.3, 79.3, -60.3, -9999, -9999, -9999, -60.2, -59.3, -79.3, -9999], - ] - ) - def setUp(self): """Create fixtures that should be unique per test.""" @@ -162,7 +125,7 @@ def test_get_coordinate_variables(self): for expected_variable in expected_coordinate_variables[1]: self.assertIn(expected_variable, actual_coordinate_variables[1]) - def test_get_1D_dim_array_data_from_dimvalues(self): + def test_get_1d_dim_array_data_from_dimvalues(self): """Ensure that the dimension scale generated from the provided dimension values are accurate for ascending and descending scales @@ -173,7 +136,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 expected_dim_asc = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]) - dim_array_values = get_1D_dim_array_data_from_dimvalues( + dim_array_values = get_1d_dim_array_data_from_dimvalues( dim_values_asc, dim_indices_asc, dim_size_asc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_asc)) @@ -184,7 +147,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_size_desc = 10 expected_dim_desc = np.array([120, 110, 100, 90, 80, 70, 60, 50, 40, 30]) - dim_array_values = get_1D_dim_array_data_from_dimvalues( + dim_array_values = get_1d_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_desc, dim_size_desc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_desc)) @@ -193,28 +156,26 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_values_invalid = np.array([2, 2]) dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 - with self.assertRaises(InvalidCoordinateDataset) as context: - get_1D_dim_array_data_from_dimvalues( + with self.assertRaises(InvalidCoordinateData) as context: + get_1d_dim_array_data_from_dimvalues( dim_values_invalid, dim_indices_asc, dim_size_asc ) self.assertEqual( context.exception.message, - 'Cannot compute the dimension resolution for ' - 'dim_value: "2" dim_index: "0"', + 'No distinct valid coordinate points - ' 'dim_index=0, dim_value=2', ) with self.subTest('invalid dimension indices'): dim_values_desc = np.array([100, 70]) dim_indices_invalid = np.array([5, 5]) dim_size_desc = 10 - with self.assertRaises(InvalidCoordinateDataset) as context: - get_1D_dim_array_data_from_dimvalues( + with self.assertRaises(InvalidCoordinateData) as context: + get_1d_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_invalid, dim_size_desc ) self.assertEqual( context.exception.message, - 'Cannot compute the dimension resolution for ' - 'dim_value: "100" dim_index: "5"', + 'No distinct valid coordinate points - ' 'dim_index=5, dim_value=100', ) def test_get_coordinate_array(self): @@ -367,7 +328,7 @@ def test_get_row_col_sizes_from_coordinate_datasets(self): get_row_col_sizes_from_coordinate_datasets(self.lat_arr, self.lon_arr), expected_row_col_sizes, ) - with self.subTest('Retrieves the expected row col sizes for the 1d array'): + with self.subTest('Retrieves the expected row col sizes for the dim array'): self.assertEqual( get_row_col_sizes_from_coordinate_datasets( np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8, 9]) @@ -433,15 +394,14 @@ def test_get_valid_indices(self): ascending or descending. """ - expected_valid_indices_lat_arr_with_fill = np.array([1, 2, 3, 4]) - expected_valid_indices_lon_arr_with_fill = np.array([0, 1, 2, 6, 7, 8, 9]) - - expected_valid_indices_lat_arr_over_range = np.array([0, 1, 2]) expected_valid_indices_lon_arr_over_range = np.array([0, 1, 2, 6, 7, 8, 9]) fill_array = np.array([-9999.0, -9999.0, -9999.0, -9999.0]) with self.subTest('valid indices for latitude with fill values'): + expected_valid_indices_lat_arr_with_fill = np.array( + [False, True, True, True, True] + ) valid_indices_lat_arr = get_valid_indices( self.lat_arr[:, 2], self.varinfo.get_variable(self.latitude) ) @@ -449,6 +409,9 @@ def test_get_valid_indices(self): valid_indices_lat_arr, expected_valid_indices_lat_arr_with_fill ) with self.subTest('valid indices for longitude with fill values'): + expected_valid_indices_lon_arr_with_fill = np.array( + [True, True, True, False, False, False, True, True, True, True] + ) valid_indices_lon_arr = get_valid_indices( self.lon_arr[0, :], self.varinfo.get_variable(self.longitude) ) @@ -456,6 +419,9 @@ def test_get_valid_indices(self): valid_indices_lon_arr, expected_valid_indices_lon_arr_with_fill ) with self.subTest('latitude values beyond valid range'): + expected_valid_indices_lat_arr_over_range = np.array( + [True, True, True, False, False] + ) valid_indices_lat_arr = get_valid_indices( self.lat_arr[:, 3], self.varinfo.get_variable(self.latitude) ) @@ -463,6 +429,9 @@ def test_get_valid_indices(self): valid_indices_lat_arr, expected_valid_indices_lat_arr_over_range ) with self.subTest('longitude values beyond valid range'): + expected_valid_indices_lon_arr_over_range = np.array( + [True, True, True, False, False, False, True, True, True, True] + ) valid_indices_lon_arr = get_valid_indices( self.lon_arr[1, :], self.varinfo.get_variable(self.longitude) ) @@ -470,10 +439,13 @@ def test_get_valid_indices(self): valid_indices_lon_arr, expected_valid_indices_lon_arr_over_range ) with self.subTest('all fill values - no valid indices'): - valid_indices = get_valid_indices( + expected_valid_indices_fill_values = np.array([False, False, False, False]) + valid_indices_all_fill = get_valid_indices( fill_array, self.varinfo.get_variable(self.longitude) ) - self.assertEqual(valid_indices.size, 0) + np.testing.assert_array_equal( + valid_indices_all_fill, expected_valid_indices_fill_values + ) def test_get_variables_with_anonymous_dims(self): """Ensure that variables with no dimensions are @@ -543,3 +515,246 @@ def test_any_absent_dimension_variables(self): self.test_varinfo, '/Soil_Moisture_Retrieval_Data_AM/variable_has_dim' ) self.assertFalse(variable_has_fake_dims) + + def test_get_max_x_spread_pts(self): + """Ensure that two valid sets of indices are returned by the function + with a masked dataset as input + + """ + + with self.subTest('Get two sets of valid indices for points from coordinates'): + valid_values = np.array( + [ + [True, True, True, True, False, False, True, True, True, True], + [True, True, True, False, False, False, True, True, True, True], + [True, True, True, False, True, False, True, True, True, True], + [True, True, True, False, False, False, True, True, True, True], + [True, True, False, False, False, False, True, True, True, True], + ] + ) + expected_indices = [[0, 0], [0, 9]] + actual_indices = get_max_x_spread_pts(~valid_values) + self.assertTrue(actual_indices[0] == expected_indices[0]) + self.assertTrue(actual_indices[1] == expected_indices[1]) + + with self.subTest('With just one valid index in the coordinates'): + valid_values = np.array( + [ + [False, False, False], + [False, True, False], + [False, False, False], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_max_x_spread_pts(~valid_values) + self.assertEqual( + context.exception.message, + 'Only one valid point in coordinate data', + ) + + with self.subTest('No valid points from coordinates'): + valid_values = np.array( + [ + [False, False, False], + [False, False, False], + [False, False, False], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_max_x_spread_pts(~valid_values) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + + def test_get_valid_row_col_pairs(self): + """Ensure that two sets of valid indices are + returned by the method with a set of lat/lon coordinates as input + + """ + with self.subTest('Get two sets of valid indices from coordinates dataset'): + expected_grid_indices = ( + [(0, 0), (0, 9)], + [(0, 0), (4, 0)], + ) + actual_grid_indices = get_valid_row_col_pairs( + self.lat_arr, + self.lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + + self.assertTrue(actual_grid_indices[0], expected_grid_indices[0]) + self.assertTrue(actual_grid_indices[1], expected_grid_indices[1]) + + with self.subTest('Only a single valid point in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, 40.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('valid points in one row in coordinates dataset'): + lat_arr = np.array( + [ + [40.1, 40.1, 40.1, 40.1, 40.1], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-179.0, -10.0, 100.1, 130.0, 179.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('valid points in one column in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, 40.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -50.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('no valid points in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + + def test_get_x_y_values_from_geographic_points(self): + """Ensure that the correct x-values are returned by the function + with a set of geographic coordinates as input. + + """ + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + + with self.subTest('Get valid x-y points from coordinates in a row'): + two_col_points_in_a_row = [(-179.3, 89.3), (178.4, 89.3)] + + expected_x_y_points = [ + (-17299990.048985746, 7341677.255608977), + (17213152.396759935, 7341677.255608977), + ] + actual_x_y_points = get_x_y_values_from_geographic_points( + two_col_points_in_a_row, crs + ) + self.assertListEqual(actual_x_y_points, expected_x_y_points) + + with self.subTest('Get valid x-y points from coordinates in a col'): + two_row_points_in_a_col = (-179.3, 89.3), (-179.3, -88.1) + expected_x_y_points = [ + (-17299990.048985746, 7341677.255608977), + (-17299990.048985746, -7338157.219843731), + ] + actual_x_y_points = get_x_y_values_from_geographic_points( + two_row_points_in_a_col, crs + ) + self.assertListEqual(actual_x_y_points, expected_x_y_points) + + def test_get_dimension_array_from_geo_points(self): + """Ensure that a valid x/y dimension array is returned + with a geo array of lat/lon values + + """ + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + with self.subTest('Get y dimension array from geo coordinates'): + row_indices = [[0, 0], [4, 0]] + ymax = 7341677.255608977 + ymin = -25687950.314159617 + + dim_array = get_dimension_array_from_geo_points( + self.lat_arr, self.lon_arr, crs, row_indices, 10, True + ) + self.assertEqual(dim_array.size, 10) + self.assertEqual(dim_array[0], ymax) + self.assertEqual(dim_array[-1], ymin) + + with self.subTest('Get x dimension array from geo coordinates'): + col_indices = [[0, 0], [0, 9]] + xmin = -17299990.048985746 + xmax = -1960815.628654331 + dim_array = get_dimension_array_from_geo_points( + self.lat_arr, self.lon_arr, crs, col_indices, 5, False + ) + self.assertEqual(dim_array.size, 5) + self.assertEqual(dim_array[0], xmin) + self.assertEqual(dim_array[-1], xmax) diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 5ad7a21..e280914 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -14,8 +14,8 @@ from varinfo import VarInfoFromDmr from hoss.dimension_utilities import ( - add_bounds_variables, add_index_range, + check_add_artificial_bounds, get_bounds_array, get_dimension_bounds, get_dimension_extents, @@ -23,12 +23,13 @@ get_dimension_indices_from_bounds, get_dimension_indices_from_values, get_fill_slice, + get_prefetch_variables, + get_range_strings, get_requested_index_ranges, is_almost_in, is_dimension_ascending, is_index_subset, needs_bounds, - prefetch_dimension_variables, write_bounds, ) from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange @@ -313,6 +314,45 @@ def test_add_index_range(self): '/sst_dtime[][12:34][]', ) + def test_get_range_strings(self): + """Ensure the correct combinations of range_strings are added as + suffixes to the input variable based upon that variable's dimensions. + If a dimension range has the lower index > upper index, that + indicates the bounding box crosses the edge of the grid. In this + instance, the full range of the variable should be retrieved. + + """ + with self.subTest('all dimensions found in index_ranges'): + variable_list = ['/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (600, 699), '/Grid/lon': (2200, 2299)} + expected_range_strings = range_strings = ['[2200:2299]', '[600:699]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), expected_range_strings + ) + + with self.subTest('only some dimensions found in index range'): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (600, 699), '/Grid/lon': (2200, 2299)} + expected_range_strings = ['[]', '[2200:2299]', '[600:699]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), + expected_range_strings, + ) + + with self.subTest('No variables found in index ranges'): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + self.assertEqual(get_range_strings(variable_list, {}), ['[]', '[]', '[]']) + with self.subTest( + 'when dimension range lower index is greater than upper index' + ): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (699, 600), '/Grid/lon': (2200, 2299)} + expected_range_strings = ['[]', '[2200:2299]', '[]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), + expected_range_strings, + ) + def test_get_fill_slice(self): """Ensure that a slice object is correctly formed for a requested dimension. @@ -326,10 +366,10 @@ def test_get_fill_slice(self): with self.subTest('A filled dimension returns slice(start, stop).'): self.assertEqual(get_fill_slice('/longitude', fill_ranges), slice(16, 200)) - @patch('hoss.dimension_utilities.add_bounds_variables') + @patch('hoss.dimension_utilities.check_add_artificial_bounds') @patch('hoss.dimension_utilities.get_opendap_nc4') - def test_prefetch_dimension_variables( - self, mock_get_opendap_nc4, mock_add_bounds_variables + def test_get_prefetch_variables( + self, mock_get_opendap_nc4, mock_check_add_artificial_bounds ): """Ensure that when a list of required variables is specified, a request to OPeNDAP will be sent requesting only those that are @@ -349,7 +389,7 @@ def test_prefetch_dimension_variables( required_dimensions = {'/latitude', '/longitude', '/time'} self.assertEqual( - prefetch_dimension_variables( + get_prefetch_variables( url, self.varinfo, required_variables, @@ -364,14 +404,148 @@ def test_prefetch_dimension_variables( mock_get_opendap_nc4.assert_called_once_with( url, required_dimensions, output_dir, self.logger, access_token, self.config ) - - mock_add_bounds_variables.assert_called_once_with( + mock_check_add_artificial_bounds.assert_called_once_with( prefetch_path, required_dimensions, self.varinfo, self.logger ) + @patch('hoss.dimension_utilities.check_add_artificial_bounds') + @patch('hoss.dimension_utilities.get_opendap_nc4') + def test_get_prefetch_variables_with_anonymous_dimensions( + self, + mock_get_opendap_nc4, + mock_check_add_artificial_bounds, + ): + """Ensure that when a list of required variables is specified, + and the required dimension variables are not present, + checks and retrieves coordinate variables which is used + in the opendap prefetch request. + + """ + prefetch_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + mock_get_opendap_nc4.return_value = prefetch_path + access_token = 'access' + output_dir = 'tests/output' + url = 'https://url_to_opendap_granule' + prefetch_variables = { + '/Soil_Moisture_Retrieval_Data_AM/latitude', + '/Soil_Moisture_Retrieval_Data_AM/longitude', + } + requested_variables = { + '/Soil_Moisture_Retrieval_Data_AM/albedo', + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + } + varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + config_file='hoss/hoss_config.json', + ) + + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, prefetch_variables, output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, prefetch_variables, varinfo, self.logger + ) + + @patch('hoss.dimension_utilities.get_coordinate_variables') + @patch('hoss.dimension_utilities.check_add_artificial_bounds') + @patch('hoss.dimension_utilities.get_opendap_nc4') + def test_get_prefetch_variables_with_no_anonymous_dimensions( + self, + mock_get_opendap_nc4, + mock_check_add_artificial_bounds, + mock_get_coordinate_variables, + ): + """Ensure that when a list of required variables is specified, a + If two coordinate variables are also not present, the opendap prefetch request will not include any + dimension variables. + """ + prefetch_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + mock_get_opendap_nc4.return_value = prefetch_path + access_token = 'access' + output_dir = 'tests/output' + url = 'https://url_to_opendap_granule' + + requested_variables = { + '/Soil_Moisture_Retrieval_Data_AM/albedo', + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + } + varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + config_file='hoss/hoss_config.json', + ) + with self.subTest('No coordinate variables'): + mock_get_coordinate_variables.return_value = ([], []) + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_coordinate_variables.assert_called_once_with( + varinfo, + requested_variables, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, set(), output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, set(), varinfo, self.logger + ) + mock_get_coordinate_variables.reset_mock() + mock_get_opendap_nc4.reset_mock() + mock_check_add_artificial_bounds.reset_mock() + + with self.subTest('Only one coordinate variable'): + mock_get_coordinate_variables.return_value = ( + ['/Soil_Moisture_Retrieval_Data_AM/latitude'], + [], + ) + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_coordinate_variables.assert_called_once_with( + varinfo, + requested_variables, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, set(), output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, set(), varinfo, self.logger + ) + @patch('hoss.dimension_utilities.needs_bounds') @patch('hoss.dimension_utilities.write_bounds') - def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): + def test_check_add_artificial_bounds(self, mock_write_bounds, mock_needs_bounds): """Ensure that `write_bounds` is called when it's needed, and that it's not called when it's not needed. @@ -389,7 +563,7 @@ def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): with self.subTest('Bounds need to be written'): mock_needs_bounds.return_value = True - add_bounds_variables( + check_add_artificial_bounds( prefetch_dataset_name, required_dimensions, varinfo_prefetch, @@ -402,7 +576,7 @@ def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): with self.subTest('Bounds should not be written'): mock_needs_bounds.return_value = False - add_bounds_variables( + check_add_artificial_bounds( prefetch_dataset_name, required_dimensions, varinfo_prefetch, @@ -569,7 +743,7 @@ def test_prefetch_dimensions_with_bounds(self, mock_get_opendap_nc4): } self.assertEqual( - prefetch_dimension_variables( + get_prefetch_variables( url, self.varinfo_with_bounds, required_variables, diff --git a/tests/unit/test_projection_utilities.py b/tests/unit/test_projection_utilities.py index 0da8f66..164eb9e 100644 --- a/tests/unit/test_projection_utilities.py +++ b/tests/unit/test_projection_utilities.py @@ -194,6 +194,28 @@ def test_get_variable_crs(self): 'present in granule .dmr file.', ) + with self.subTest( + 'attributes for missing grid_mapping retrieved from earthdata-varinfo configuration file' + ): + smap_varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + 'hoss/hoss_config.json', + ) + expected_crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + actual_crs = get_variable_crs( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', smap_varinfo + ) + self.assertEqual(actual_crs, expected_crs) + def test_get_projected_x_y_extents(self): """Ensure that the expected values for the x and y dimension extents are recovered for a known projected grid and requested input. diff --git a/tests/unit/test_spatial.py b/tests/unit/test_spatial.py index e5ee6d3..b10e925 100644 --- a/tests/unit/test_spatial.py +++ b/tests/unit/test_spatial.py @@ -17,6 +17,7 @@ get_longitude_in_grid, get_projected_x_y_index_ranges, get_spatial_index_ranges, + get_x_y_index_ranges_from_coordinates, ) @@ -182,6 +183,126 @@ def test_get_spatial_index_ranges_geographic(self): {'/latitude': (5, 44), '/longitude': (160, 199)}, ) + @patch('hoss.spatial.get_dimension_index_range') + @patch('hoss.spatial.get_projected_x_y_extents') + def test_get_x_y_index_ranges_from_coordinates( + self, + mock_get_x_y_extents, + mock_get_dimension_index_range, + ): + """Ensure that x and y index ranges are only requested only when there are + no projected dimensions and when there are coordinate datasets, + and the values have not already been calculated. + + The example used in this test is for the SMAP SPL3SMP collection, + (SMAP L3 Radiometer Global Daily 36 km EASE-Grid Soil Moisture) + which has a Equal-Area Scalable Earth Grid (EASE-Grid 2.0) CRS for + a projected grid which is lambert_cylindrical_equal_area projection + + """ + smap_varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + 'hoss/hoss_config.json', + ) + smap_file_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + expected_index_ranges = { + '/Soil_Moisture_Retrieval_Data_AM/projected_x': (487, 595), + '/Soil_Moisture_Retrieval_Data_AM/projected_y': (9, 38), + } + bbox = BBox(2, 54, 42, 72) + + latitude_coordinate = smap_varinfo.get_variable( + '/Soil_Moisture_Retrieval_Data_AM/latitude' + ) + longitude_coordinate = smap_varinfo.get_variable( + '/Soil_Moisture_Retrieval_Data_AM/longitude' + ) + + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + + x_y_extents = { + 'x_min': 192972.56050179302, + 'x_max': 4052423.7705376535, + 'y_min': 5930779.396449475, + 'y_max': 6979878.9118312765, + } + + mock_get_x_y_extents.return_value = x_y_extents + + # When ranges are derived, they are first calculated for x, then y: + mock_get_dimension_index_range.side_effect = [(487, 595), (9, 38)] + + with self.subTest( + 'Projected grid from coordinates gets expected dimension ranges' + ): + with Dataset(smap_file_path, 'r') as smap_prefetch: + self.assertDictEqual( + get_x_y_index_ranges_from_coordinates( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + smap_varinfo, + smap_prefetch, + latitude_coordinate, + longitude_coordinate, + {}, + bounding_box=bbox, + shape_file_path=None, + ), + expected_index_ranges, + ) + + mock_get_x_y_extents.assert_called_once_with( + ANY, ANY, crs, shape_file=None, bounding_box=bbox + ) + + self.assertEqual(mock_get_dimension_index_range.call_count, 2) + mock_get_dimension_index_range.assert_has_calls( + [ + call( + ANY, + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=None, + ), + call( + ANY, + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=None, + ), + ] + ) + + mock_get_x_y_extents.reset_mock() + mock_get_dimension_index_range.reset_mock() + + with self.subTest('Function does not rederive known index ranges'): + with Dataset(smap_file_path, 'r') as smap_prefetch: + self.assertDictEqual( + get_x_y_index_ranges_from_coordinates( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + smap_varinfo, + smap_prefetch, + latitude_coordinate, + longitude_coordinate, + expected_index_ranges, + bounding_box=bbox, + shape_file_path=None, + ), + {}, + ) + + mock_get_x_y_extents.assert_not_called() + mock_get_dimension_index_range.assert_not_called() + @patch('hoss.spatial.get_dimension_index_range') @patch('hoss.spatial.get_projected_x_y_extents') def test_get_projected_x_y_index_ranges( diff --git a/tests/unit/test_subset.py b/tests/unit/test_subset.py index abbb5f1..7c6e979 100644 --- a/tests/unit/test_subset.py +++ b/tests/unit/test_subset.py @@ -62,7 +62,7 @@ def tearDown(self): @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_not_geo( self, @@ -126,7 +126,7 @@ def test_subset_granule_not_geo( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_geo( self, @@ -216,7 +216,7 @@ def test_subset_granule_geo( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_non_geo_no_variables( self, @@ -290,7 +290,7 @@ def test_subset_non_geo_no_variables( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_geo_no_variables( self, @@ -404,7 +404,7 @@ def test_subset_geo_no_variables( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_non_variable_dimensions( self, @@ -537,7 +537,7 @@ def test_subset_non_variable_dimensions( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_bounds_reference( self, @@ -638,7 +638,7 @@ def test_subset_bounds_reference( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_temporal( self, @@ -742,7 +742,7 @@ def test_subset_temporal( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_geo_temporal( self, @@ -860,7 +860,7 @@ def test_subset_geo_temporal( @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.get_request_shape_file') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_shape( self, @@ -971,7 +971,7 @@ def test_subset_granule_shape( @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.get_request_shape_file') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_shape_and_bbox( self, @@ -1083,7 +1083,7 @@ def test_subset_granule_shape_and_bbox( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_geo_named( self,