From 1ad0173ecef996e17b021fcb0e22af914262ca5f Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Wed, 6 Nov 2024 14:47:04 -0500 Subject: [PATCH 01/16] DAS-2236 - added functions to get end to end test working --- hoss/coordinate_utilities.py | 222 +++++++++++++++++++++++- hoss/dimension_utilities.py | 100 +++++++---- hoss/projection_utilities.py | 14 +- hoss/spatial.py | 110 +++++++++++- hoss/subset.py | 11 +- tests/unit/test_coordinate_utilities.py | 166 +++++++++++++----- tests/unit/test_dimension_utilities.py | 8 +- tests/unit/test_projection_utilities.py | 20 +++ tests/unit/test_spatial.py | 121 +++++++++++++ tests/unit/test_subset.py | 22 +-- 10 files changed, 683 insertions(+), 111 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 1550d81..7760a7c 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -3,10 +3,11 @@ coordinate variable data to projected x/y dimension values """ +from typing import Dict + import numpy as np from netCDF4 import Dataset - -# from numpy import ndarray +from pyproj import CRS, Transformer from varinfo import VariableFromDmr, VarInfoFromDmr from hoss.exceptions import ( @@ -101,20 +102,23 @@ def get_coordinate_variables( CF-Convention coordinates metadata attribute. It returns them in a specific order [latitude, longitude]" """ - - coordinate_variables_list = varinfo.get_references_for_attribute( + # varinfo returns a set and not a list + coordinate_variables = varinfo.get_references_for_attribute( requested_variables, 'coordinates' ) + latitude_coordinate_variables = [ coordinate - for coordinate in coordinate_variables_list - if varinfo.get_variable(coordinate).is_latitude() + for coordinate in coordinate_variables + if varinfo.get_variable(coordinate) is not None + and varinfo.get_variable(coordinate).is_latitude() ] longitude_coordinate_variables = [ coordinate - for coordinate in coordinate_variables_list - if varinfo.get_variable(coordinate).is_longitude() + for coordinate in coordinate_variables + if varinfo.get_variable(coordinate) is not None + and varinfo.get_variable(coordinate).is_longitude() ] return latitude_coordinate_variables, longitude_coordinate_variables @@ -190,6 +194,7 @@ def get_valid_indices( value is provided. If it is not provided, we check for valid values for latitude and longitude """ + # print(f'coordinate_row_col={coordinate_row_col}') # get_attribute_value returns a value of type `str` coordinate_fill = coordinate.get_attribute_value('_FillValue') if coordinate_fill is not None: @@ -216,5 +221,204 @@ def get_valid_indices( )[0] else: valid_indices = np.empty((0, 0)) - + print(f'valid_indices={valid_indices}') return valid_indices + + +def get_two_valid_geo_grid_points( + lat_arr: np.ndarray, + lon_arr: np.ndarray, + latitude_coordinate: VariableFromDmr, + longitude_coordinate: VariableFromDmr, + row_size: float, + col_size: float, +) -> dict[int, tuple]: + """ + This method is used to return two valid lat lon points from a 2D + coordinate dataset. It gets the row and column of the latitude and longitude + arrays to get two valid points. This does a check for fill values and + This method does not go down to the next row and col. if the selected row and + column all have fills, it will raise an exception in those cases. + """ + first_row_col_index = -1 + first_row_row_index = 0 + last_col_row_index = -1 + last_col_col_index = col_size - 1 + lat_row_valid_indices = lon_row_valid_indices = np.empty((0, 0)) + + # get the first row with points that are valid in the lat and lon rows + first_row_row_index, lat_row_valid_indices = get_valid_indices_in_dataset( + lat_arr, row_size, latitude_coordinate, 'row', first_row_row_index + ) + first_row_row_index1, lon_row_valid_indices = get_valid_indices_in_dataset( + lon_arr, row_size, longitude_coordinate, 'row', first_row_row_index + ) + # get a point that is common on both row datasets + if ( + (first_row_row_index == first_row_row_index1) + and (lat_row_valid_indices.size > 0) + and (lon_row_valid_indices.size > 0) + ): + first_row_col_index = np.intersect1d( + lat_row_valid_indices, lon_row_valid_indices + )[0] + + print(f'first_row_row_index={first_row_row_index}') + print(f'first_row_col_index={first_row_col_index}') + + # get a valid column from the latitude and longitude datasets + last_col_col_index, lon_col_valid_indices = get_valid_indices_in_dataset( + lon_arr, col_size, longitude_coordinate, 'col', last_col_col_index + ) + last_col_col_index1, lat_col_valid_indices = get_valid_indices_in_dataset( + lat_arr, col_size, latitude_coordinate, 'col', last_col_col_index + ) + + # get a point that is common to both column datasets + if ( + (last_col_col_index == last_col_col_index1) + and (lat_col_valid_indices.size > 0) + and (lon_col_valid_indices.size > 0) + ): + last_col_row_index = np.intersect1d( + lat_col_valid_indices, lon_col_valid_indices + )[-1] + + print(f'last_col_col_index={last_col_col_index}') + print(f'last_col_row_index={last_col_row_index}') + + # if the whole row and whole column has no valid indices + # we throw an exception now. This can be extended to move + # to the next row/col + if first_row_col_index == -1: + raise InvalidCoordinateVariable('latitude/longitude') + if last_col_row_index == -1: + raise InvalidCoordinateVariable('latitude/longitude') + + geo_grid_indexes = [ + (first_row_row_index, first_row_col_index), + (last_col_row_index, last_col_col_index), + ] + + geo_grid_points = [ + ( + lon_arr[first_row_row_index][first_row_col_index], + lat_arr[first_row_row_index][first_row_col_index], + ), + ( + lon_arr[last_col_row_index][last_col_col_index], + lat_arr[last_col_row_index][last_col_col_index], + ), + ] + + return { + geo_grid_indexes[0]: geo_grid_points[0], + geo_grid_indexes[1]: geo_grid_points[1], + } + + +def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, tuple]: + """Take an input list of (longitude, latitude) coordinates and project + those points to the target grid. Then return the x-y dimscales + + """ + point_longitudes, point_latitudes = zip(*list(points.values())) + + from_geo_transformer = Transformer.from_crs(4326, crs) + points_x, points_y = ( # pylint: disable=unpacking-non-sequence + from_geo_transformer.transform(point_latitudes, point_longitudes) + ) + + x_y_points = {} + for index, point_x, point_y in zip(list(points.keys()), points_x, points_y): + x_y_points.update({index: (point_x, point_y)}) + + return x_y_points + + +def get_valid_indices_in_dataset( + coordinate_arr: np.ndarray, + dim_size: int, + coordinate: VariableFromDmr, + span_type: str, + start_index: int, +) -> tuple[int, np.ndarray]: + """ + This method gets valid indices in a row or column of a + coordinate dataset + """ + coordinate_index = start_index + valid_indices = [] + if span_type == 'row': + valid_indices = get_valid_indices( + coordinate_arr[coordinate_index, :], coordinate + ) + else: + valid_indices = get_valid_indices( + coordinate_arr[:, coordinate_index], coordinate + ) + + while valid_indices.size == 0: + if span_type == 'row': + if coordinate_index < dim_size: + coordinate_index = coordinate_index + 1 + valid_indices = get_valid_indices( + coordinate_arr[coordinate_index, :], + coordinate, + ) + else: + raise InvalidCoordinateVariable(coordinate.full_name_path) + else: + if coordinate_index > 0: + coordinate_index = coordinate_index - 1 + valid_indices = get_valid_indices( + coordinate_arr[:, coordinate_index], coordinate + ) + else: + raise InvalidCoordinateVariable(coordinate.full_name_path) + return coordinate_index, valid_indices + + +def create_dimension_array_from_coordinates( + prefetch_dataset: Dataset, + latitude_coordinate: VariableFromDmr, + longitude_coordinate: VariableFromDmr, + crs: CRS, + projected_dimension_names: list, +) -> Dict[str, np.ndarray]: + """Generate artificial 1D dimensions scales for each + 2D dimension or coordinate variable. + 1) Get 2 valid geo grid points + 2) convert them to a projected x-y extent + 3) Generate the x-y dimscale array and return to the calling method + + """ + lat_arr = get_coordinate_array( + prefetch_dataset, + latitude_coordinate.full_name_path, + ) + lon_arr = get_coordinate_array( + prefetch_dataset, + longitude_coordinate.full_name_path, + ) + + row_size, col_size = get_row_col_sizes_from_coordinate_datasets( + lat_arr, + lon_arr, + ) + geo_grid_points = get_two_valid_geo_grid_points( + lat_arr, lon_arr, latitude_coordinate, longitude_coordinate, row_size, col_size + ) + + x_y_values = get_x_y_values_from_geographic_points(geo_grid_points, crs) + + row_indices, col_indices = zip(*list(x_y_values.keys())) + + x_values, y_values = zip(*list(x_y_values.values())) + + y_dim = get_1D_dim_array_data_from_dimvalues(y_values, row_indices, row_size) + + x_dim = get_1D_dim_array_data_from_dimvalues(x_values, col_indices, col_size) + + projected_y, projected_x = tuple(projected_dimension_names) + return {projected_y: y_dim, projected_x: x_dim} diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index 9ee8d02..432941a 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -22,8 +22,14 @@ from numpy.ma.core import MaskedArray from varinfo import VariableFromDmr, VarInfoFromDmr -from hoss.bbox_utilities import flatten_list -from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange +from hoss.coordinate_utilities import ( + get_coordinate_variables, + get_projected_dimension_names_from_coordinate_variables, +) +from hoss.exceptions import ( + InvalidNamedDimension, + InvalidRequestedRange, +) from hoss.utilities import ( format_variable_set_string, get_opendap_nc4, @@ -55,7 +61,7 @@ def is_index_subset(message: Message) -> bool: ) -def prefetch_dimension_variables( +def get_prefetch_variables( opendap_url: str, varinfo: VarInfoFromDmr, required_variables: Set[str], @@ -64,45 +70,39 @@ def prefetch_dimension_variables( access_token: str, config: Config, ) -> str: - """Determine the dimensions that need to be "pre-fetched" from OPeNDAP in + """Determine the variables that need to be "pre-fetched" from OPeNDAP in order to derive index ranges upon them. Initially, this was just spatial and temporal dimensions, but to support generic dimension subsets, all required dimensions must be prefetched, along with any associated bounds variables referred to via the "bounds" metadata - attribute. + attribute. In cases where dimension variables do not exist, coordinate + variables will be prefetched and used to calculate dimension-scale values """ - required_dimensions = varinfo.get_required_dimensions(required_variables) - - # Iterate through all requested dimensions and extract a list of bounds - # references for each that has any. This will produce a list of lists, - # which should be flattened into a single list and then combined into a set - # to remove duplicates. - bounds = set( - flatten_list( - [ - list(varinfo.get_variable(dimension).references.get('bounds')) - for dimension in required_dimensions - if varinfo.get_variable(dimension).references.get('bounds') is not None - ] + prefetch_variables = varinfo.get_required_dimensions(required_variables) + if prefetch_variables: + bounds = varinfo.get_references_for_attribute(prefetch_variables, 'bounds') + prefetch_variables.update(bounds) + else: + latitude_coordinates, longitude_coordinates = get_coordinate_variables( + varinfo, required_variables ) - ) - required_dimensions.update(bounds) + if latitude_coordinates and longitude_coordinates: + prefetch_variables = set(latitude_coordinates + longitude_coordinates) logger.info( 'Variables being retrieved in prefetch request: ' - f'{format_variable_set_string(required_dimensions)}' + f'{format_variable_set_string(prefetch_variables)}' ) - required_dimensions_nc4 = get_opendap_nc4( - opendap_url, required_dimensions, output_dir, logger, access_token, config + prefetch_variables_nc4 = get_opendap_nc4( + opendap_url, prefetch_variables, output_dir, logger, access_token, config ) # Create bounds variables if necessary. - add_bounds_variables(required_dimensions_nc4, required_dimensions, varinfo, logger) - - return required_dimensions_nc4 + add_bounds_variables(prefetch_variables_nc4, prefetch_variables, varinfo, logger) + return prefetch_variables_nc4 def add_bounds_variables( @@ -409,7 +409,9 @@ def get_dimension_indices_from_bounds( def add_index_range( - variable_name: str, varinfo: VarInfoFromDmr, index_ranges: IndexRanges + variable_name: str, + varinfo: VarInfoFromDmr, + index_ranges: IndexRanges, ) -> str: """Append the index ranges of each dimension for the specified variable. If there are no dimensions with listed index ranges, then the full @@ -418,27 +420,44 @@ def add_index_range( the antimeridian or Prime Meridian) will have a minimum index greater than the maximum index. In this case the full dimension range should be requested, as the related values will be masked before returning the - output to the user. + output to the user. When the dimensions are not available, the coordinate + variables are used to calculate the projected dimensions. """ variable = varinfo.get_variable(variable_name) - range_strings = [] + if variable.dimensions: + range_strings = get_range_strings(variable.dimensions, index_ranges) + else: + override_dimensions = get_projected_dimension_names_from_coordinate_variables( + varinfo, variable_name + ) + if override_dimensions: + range_strings = get_range_strings(override_dimensions, index_ranges) + + if all(range_string == '[]' for range_string in range_strings): + indices_string = '' + else: + indices_string = ''.join(range_strings) + return f'{variable_name}{indices_string}' - for dimension in variable.dimensions: - dimension_range = index_ranges.get(dimension) +def get_range_strings( + variable_dimensions: list, + index_ranges: IndexRanges, +) -> list: + """Calculates the index ranges for each dimension of the variable + and returns the list of index ranges + """ + range_strings = [] + for dimension in variable_dimensions: + dimension_range = index_ranges.get(dimension) if dimension_range is not None and dimension_range[0] <= dimension_range[1]: range_strings.append(f'[{dimension_range[0]}:{dimension_range[1]}]') else: range_strings.append('[]') - if all(range_string == '[]' for range_string in range_strings): - indices_string = '' - else: - indices_string = ''.join(range_strings) - - return f'{variable_name}{indices_string}' + return range_strings def get_fill_slice(dimension: str, fill_ranges: IndexRanges) -> slice: @@ -549,7 +568,12 @@ def get_dimension_bounds( be returned. """ - bounds = varinfo.get_variable(dimension_name).references.get('bounds') + try: + # For pseudo-variables, `varinfo.get_variable` returns `None` and + # therefore has no `references` attribute. + bounds = varinfo.get_variable(dimension_name).references.get('bounds') + except AttributeError: + bounds = None if bounds is not None: try: diff --git a/hoss/projection_utilities.py b/hoss/projection_utilities.py index bdacdc0..d84c64d 100644 --- a/hoss/projection_utilities.py +++ b/hoss/projection_utilities.py @@ -57,9 +57,21 @@ def get_variable_crs(variable: str, varinfo: VarInfoFromDmr) -> CRS: if grid_mapping is not None: try: - crs = CRS.from_cf(varinfo.get_variable(grid_mapping).attributes) + grid_mapping_variable = varinfo.get_variable(grid_mapping) + if grid_mapping_variable is not None: + cf_attributes = grid_mapping_variable.attributes + else: + # check for any overrides + cf_attributes = varinfo.get_missing_variable_attributes(grid_mapping) + + if cf_attributes: + crs = CRS.from_cf(cf_attributes) + else: + raise MissingGridMappingVariable(grid_mapping, variable) + except AttributeError as exception: raise MissingGridMappingVariable(grid_mapping, variable) from exception + else: raise MissingGridMappingMetadata(variable) diff --git a/hoss/spatial.py b/hoss/spatial.py index 79eb5c2..8652d3c 100644 --- a/hoss/spatial.py +++ b/hoss/spatial.py @@ -27,7 +27,7 @@ from harmony.message import Message from netCDF4 import Dataset from numpy.ma.core import MaskedArray -from varinfo import VarInfoFromDmr +from varinfo import VariableFromDmr, VarInfoFromDmr from hoss.bbox_utilities import ( BBox, @@ -35,6 +35,12 @@ get_harmony_message_bbox, get_shape_file_geojson, ) +from hoss.coordinate_utilities import ( + create_dimension_array_from_coordinates, + get_coordinate_variables, + get_projected_dimension_names_from_coordinate_variables, + get_variables_with_anonymous_dims, +) from hoss.dimension_utilities import ( IndexRange, IndexRanges, @@ -78,6 +84,10 @@ def get_spatial_index_ranges( around the exterior of the user-defined GeoJSON shape, to ensure the correct extents are derived. + if geographic and projected dimensions are not specified in the granule, + the coordinate datasets are used to calculate the x-y dimensions and the index ranges + are calculated similar to a projected grid. + """ bounding_box = get_harmony_message_bbox(harmony_message) index_ranges = {} @@ -91,7 +101,7 @@ def get_spatial_index_ranges( ) with Dataset(dimensions_path, 'r') as dimensions_file: - if len(geographic_dimensions) > 0: + if geographic_dimensions: # If there is no bounding box, but there is a shape file, calculate # a bounding box to encapsulate the GeoJSON shape: if bounding_box is None and shape_file_path is not None: @@ -103,7 +113,7 @@ def get_spatial_index_ranges( dimension, varinfo, dimensions_file, bounding_box ) - if len(projected_dimensions) > 0: + if projected_dimensions: for non_spatial_variable in non_spatial_variables: index_ranges.update( get_projected_x_y_index_ranges( @@ -115,6 +125,27 @@ def get_spatial_index_ranges( shape_file_path=shape_file_path, ) ) + variables_with_anonymous_dims = get_variables_with_anonymous_dims( + varinfo, required_variables + ) + for variable_with_anonymous_dims in variables_with_anonymous_dims: + print(f'variable_with_anonymous_dims={variable_with_anonymous_dims}') + latitude_coordinates, longitude_coordinates = get_coordinate_variables( + varinfo, [variable_with_anonymous_dims] + ) + if latitude_coordinates and longitude_coordinates: + index_ranges.update( + get_x_y_index_ranges_from_coordinates( + variable_with_anonymous_dims, + varinfo, + dimensions_file, + varinfo.get_variable(latitude_coordinates[0]), + varinfo.get_variable(longitude_coordinates[0]), + index_ranges, + bounding_box=bounding_box, + shape_file_path=shape_file_path, + ) + ) return index_ranges @@ -187,6 +218,79 @@ def get_projected_x_y_index_ranges( return x_y_index_ranges +def get_x_y_index_ranges_from_coordinates( + non_spatial_variable: str, + varinfo: VarInfoFromDmr, + prefetch_coordinate_datasets: Dataset, + latitude_coordinate: VariableFromDmr, + longitude_coordinate: VariableFromDmr, + index_ranges: IndexRanges, + bounding_box: BBox = None, + shape_file_path: str = None, +) -> IndexRanges: + """This function returns a dictionary containing the minimum and maximum + index ranges for the projected_x and projected_y recalculated dimension scales + + index_ranges = {'projected_x': (20, 42), 'projected_y': (31, 53)} + + This method is called when the CF standards are not followed + in the source granule and only coordinate datasets are provided. + The coordinate datasets along with the crs is used to calculate + the x-y projected dimension scales. The dimensions of the input, + non-spatial variable are checked for associated coordinates. If + these are present, and they have not already been added to the + `index_ranges` cache, the extents of the input spatial subset + are determined in these projected coordinates. The minimum and + maximum values are then derived from these projected coordinate + points. + + """ + + crs = get_variable_crs(non_spatial_variable, varinfo) + + projected_dimension_names = get_projected_dimension_names_from_coordinate_variables( + varinfo, non_spatial_variable + ) + + dimension_arrays = create_dimension_array_from_coordinates( + prefetch_coordinate_datasets, + latitude_coordinate, + longitude_coordinate, + crs, + projected_dimension_names, + ) + + projected_y, projected_x = dimension_arrays.keys() + + if not set((projected_x, projected_y)).issubset(set(index_ranges.keys())): + + x_y_extents = get_projected_x_y_extents( + dimension_arrays[projected_x][:], + dimension_arrays[projected_y][:], + crs, + shape_file=shape_file_path, + bounding_box=bounding_box, + ) + + x_index_ranges = get_dimension_index_range( + dimension_arrays[projected_x][:], + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=None, + ) + y_index_ranges = get_dimension_index_range( + dimension_arrays[projected_y][:], + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=None, + ) + x_y_index_ranges = {projected_x: x_index_ranges, projected_y: y_index_ranges} + else: + x_y_index_ranges = {} + + return x_y_index_ranges + + def get_geographic_index_range( dimension: str, varinfo: VarInfoFromDmr, diff --git a/hoss/subset.py b/hoss/subset.py index 8a01321..a08e590 100644 --- a/hoss/subset.py +++ b/hoss/subset.py @@ -21,13 +21,17 @@ IndexRanges, add_index_range, get_fill_slice, + get_prefetch_variables, get_requested_index_ranges, is_index_subset, - prefetch_dimension_variables, ) from hoss.spatial import get_spatial_index_ranges from hoss.temporal import get_temporal_index_ranges -from hoss.utilities import download_url, format_variable_set_string, get_opendap_nc4 +from hoss.utilities import ( + download_url, + format_variable_set_string, + get_opendap_nc4, +) def subset_granule( @@ -87,7 +91,7 @@ def subset_granule( if request_is_index_subset: # Prefetch all dimension variables in full: - dimensions_path = prefetch_dimension_variables( + dimensions_path = get_prefetch_variables( opendap_url, varinfo, required_variables, @@ -122,6 +126,7 @@ def subset_granule( shape_file_path = get_request_shape_file( harmony_message, output_dir, logger, config ) + index_ranges.update( get_spatial_index_ranges( required_variables, diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 1c7aa04..620ca24 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -17,8 +17,11 @@ get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, get_row_col_sizes_from_coordinate_datasets, + get_two_valid_geo_grid_points, get_valid_indices, + get_valid_indices_in_dataset, get_variables_with_anonymous_dims, + get_x_y_values_from_geographic_points, ) from hoss.exceptions import ( IncompatibleCoordinateVariables, @@ -74,48 +77,6 @@ def setUpClass(cls): ] ) - cls.lon_arr_reversed = np.array( - [ - [ - -179.3, - -179.3, - -179.3, - -179.3, - -9999, - -9999, - -179.3, - -179.3, - -179.3, - -179.3, - ], - [ - -120.2, - -120.2, - -120.2, - -9999, - -9999, - -120.2, - -120.2, - -120.2, - -120.2, - -120.2, - ], - [20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, -9999, -9999], - [150.5, 150.5, 150.5, 150.5, 150.5, 150.5, -9999, -9999, 150.5, 150.5], - [178.4, 178.4, 178.4, 178.4, 178.4, 178.4, 178.4, -9999, 178.4, 178.4], - ] - ) - - cls.lat_arr_reversed = np.array( - [ - [89.3, 79.3, -9999, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, 79.3, 60.3, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, -9999, 60.3, 59.3, 29.3, 2.1, -9999, -9999, -9999, -89.3], - [-9999, 79.3, -60.3, -9999, -9999, -9999, -60.2, -59.3, -79.3, -89.3], - [-89.3, 79.3, -60.3, -9999, -9999, -9999, -60.2, -59.3, -79.3, -9999], - ] - ) - def setUp(self): """Create fixtures that should be unique per test.""" @@ -543,3 +504,124 @@ def test_any_absent_dimension_variables(self): self.test_varinfo, '/Soil_Moisture_Retrieval_Data_AM/variable_has_dim' ) self.assertFalse(variable_has_fake_dims) + + def test_get_two_valid_geo_grid_points(self): + """Ensure that two valid lat/lon points returned by the method + with a set of lat/lon coordinates as input + + """ + + with self.subTest('Get two valid geo grid points from coordinates'): + expected_geo_grid_points = [(-179.3, 89.3), (178.4, -88.1)] + actual_geo_grid_points = get_two_valid_geo_grid_points( + self.lat_arr, + self.lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + 5, + 10, + ) + for actual_geo_grid_point, expected_geo_grid_point in zip( + actual_geo_grid_points.values(), expected_geo_grid_points + ): + self.assertEqual(actual_geo_grid_point, expected_geo_grid_point) + + with self.subTest('Get two valid geo grid points from reversed coordinates'): + lon_arr_reversed = np.array( + [ + [ + -179.3, + -179.3, + -179.3, + -179.3, + -9999, + -9999, + -179.3, + -179.3, + -179.3, + -179.3, + ], + [ + -120.2, + -120.2, + -120.2, + -9999, + -9999, + -120.2, + -120.2, + -120.2, + -120.2, + -120.2, + ], + [20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, -9999, -9999], + [ + 150.5, + 150.5, + 150.5, + 150.5, + 150.5, + 150.5, + -9999, + -9999, + 150.5, + 150.5, + ], + [ + 178.4, + 178.4, + 178.4, + 178.4, + 178.4, + 178.4, + 178.4, + -9999, + 178.4, + 178.4, + ], + ] + ) + + lat_arr_reversed = np.array( + [ + [89.3, 79.3, -9999, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], + [89.3, 79.3, 60.3, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], + [89.3, -9999, 60.3, 59.3, 29.3, 2.1, -9999, -9999, -9999, -89.3], + [ + -9999, + 79.3, + -60.3, + -9999, + -9999, + -9999, + -60.2, + -59.3, + -79.3, + -89.3, + ], + [ + -89.3, + 79.3, + -60.3, + -9999, + -9999, + -9999, + -60.2, + -59.3, + -79.3, + -9999, + ], + ] + ) + expected_geo_grid_points_reversed = [(-179.3, 89.3), (150.5, -89.3)] + actual_geo_grid_points = get_two_valid_geo_grid_points( + lat_arr_reversed, + lon_arr_reversed, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + 5, + 10, + ) + for actual_geo_grid_point, expected_geo_grid_point in zip( + actual_geo_grid_points.values(), expected_geo_grid_points_reversed + ): + self.assertEqual(actual_geo_grid_point, expected_geo_grid_point) diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 5ad7a21..9be9f07 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -23,12 +23,12 @@ get_dimension_indices_from_bounds, get_dimension_indices_from_values, get_fill_slice, + get_prefetch_variables, get_requested_index_ranges, is_almost_in, is_dimension_ascending, is_index_subset, needs_bounds, - prefetch_dimension_variables, write_bounds, ) from hoss.exceptions import InvalidNamedDimension, InvalidRequestedRange @@ -328,7 +328,7 @@ def test_get_fill_slice(self): @patch('hoss.dimension_utilities.add_bounds_variables') @patch('hoss.dimension_utilities.get_opendap_nc4') - def test_prefetch_dimension_variables( + def test_get_prefetch_variables( self, mock_get_opendap_nc4, mock_add_bounds_variables ): """Ensure that when a list of required variables is specified, a @@ -349,7 +349,7 @@ def test_prefetch_dimension_variables( required_dimensions = {'/latitude', '/longitude', '/time'} self.assertEqual( - prefetch_dimension_variables( + get_prefetch_variables( url, self.varinfo, required_variables, @@ -569,7 +569,7 @@ def test_prefetch_dimensions_with_bounds(self, mock_get_opendap_nc4): } self.assertEqual( - prefetch_dimension_variables( + get_prefetch_variables( url, self.varinfo_with_bounds, required_variables, diff --git a/tests/unit/test_projection_utilities.py b/tests/unit/test_projection_utilities.py index 0da8f66..7a08135 100644 --- a/tests/unit/test_projection_utilities.py +++ b/tests/unit/test_projection_utilities.py @@ -194,6 +194,26 @@ def test_get_variable_crs(self): 'present in granule .dmr file.', ) + with self.subTest('grid_mapping override with json configuration'): + smap_varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + 'hoss/hoss_config.json', + ) + expected_crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + actual_crs = get_variable_crs( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', smap_varinfo + ) + self.assertEqual(actual_crs, expected_crs) + def test_get_projected_x_y_extents(self): """Ensure that the expected values for the x and y dimension extents are recovered for a known projected grid and requested input. diff --git a/tests/unit/test_spatial.py b/tests/unit/test_spatial.py index e5ee6d3..b10e925 100644 --- a/tests/unit/test_spatial.py +++ b/tests/unit/test_spatial.py @@ -17,6 +17,7 @@ get_longitude_in_grid, get_projected_x_y_index_ranges, get_spatial_index_ranges, + get_x_y_index_ranges_from_coordinates, ) @@ -182,6 +183,126 @@ def test_get_spatial_index_ranges_geographic(self): {'/latitude': (5, 44), '/longitude': (160, 199)}, ) + @patch('hoss.spatial.get_dimension_index_range') + @patch('hoss.spatial.get_projected_x_y_extents') + def test_get_x_y_index_ranges_from_coordinates( + self, + mock_get_x_y_extents, + mock_get_dimension_index_range, + ): + """Ensure that x and y index ranges are only requested only when there are + no projected dimensions and when there are coordinate datasets, + and the values have not already been calculated. + + The example used in this test is for the SMAP SPL3SMP collection, + (SMAP L3 Radiometer Global Daily 36 km EASE-Grid Soil Moisture) + which has a Equal-Area Scalable Earth Grid (EASE-Grid 2.0) CRS for + a projected grid which is lambert_cylindrical_equal_area projection + + """ + smap_varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + 'hoss/hoss_config.json', + ) + smap_file_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + expected_index_ranges = { + '/Soil_Moisture_Retrieval_Data_AM/projected_x': (487, 595), + '/Soil_Moisture_Retrieval_Data_AM/projected_y': (9, 38), + } + bbox = BBox(2, 54, 42, 72) + + latitude_coordinate = smap_varinfo.get_variable( + '/Soil_Moisture_Retrieval_Data_AM/latitude' + ) + longitude_coordinate = smap_varinfo.get_variable( + '/Soil_Moisture_Retrieval_Data_AM/longitude' + ) + + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + + x_y_extents = { + 'x_min': 192972.56050179302, + 'x_max': 4052423.7705376535, + 'y_min': 5930779.396449475, + 'y_max': 6979878.9118312765, + } + + mock_get_x_y_extents.return_value = x_y_extents + + # When ranges are derived, they are first calculated for x, then y: + mock_get_dimension_index_range.side_effect = [(487, 595), (9, 38)] + + with self.subTest( + 'Projected grid from coordinates gets expected dimension ranges' + ): + with Dataset(smap_file_path, 'r') as smap_prefetch: + self.assertDictEqual( + get_x_y_index_ranges_from_coordinates( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + smap_varinfo, + smap_prefetch, + latitude_coordinate, + longitude_coordinate, + {}, + bounding_box=bbox, + shape_file_path=None, + ), + expected_index_ranges, + ) + + mock_get_x_y_extents.assert_called_once_with( + ANY, ANY, crs, shape_file=None, bounding_box=bbox + ) + + self.assertEqual(mock_get_dimension_index_range.call_count, 2) + mock_get_dimension_index_range.assert_has_calls( + [ + call( + ANY, + x_y_extents['x_min'], + x_y_extents['x_max'], + bounds_values=None, + ), + call( + ANY, + x_y_extents['y_min'], + x_y_extents['y_max'], + bounds_values=None, + ), + ] + ) + + mock_get_x_y_extents.reset_mock() + mock_get_dimension_index_range.reset_mock() + + with self.subTest('Function does not rederive known index ranges'): + with Dataset(smap_file_path, 'r') as smap_prefetch: + self.assertDictEqual( + get_x_y_index_ranges_from_coordinates( + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + smap_varinfo, + smap_prefetch, + latitude_coordinate, + longitude_coordinate, + expected_index_ranges, + bounding_box=bbox, + shape_file_path=None, + ), + {}, + ) + + mock_get_x_y_extents.assert_not_called() + mock_get_dimension_index_range.assert_not_called() + @patch('hoss.spatial.get_dimension_index_range') @patch('hoss.spatial.get_projected_x_y_extents') def test_get_projected_x_y_index_ranges( diff --git a/tests/unit/test_subset.py b/tests/unit/test_subset.py index abbb5f1..7c6e979 100644 --- a/tests/unit/test_subset.py +++ b/tests/unit/test_subset.py @@ -62,7 +62,7 @@ def tearDown(self): @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_not_geo( self, @@ -126,7 +126,7 @@ def test_subset_granule_not_geo( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_geo( self, @@ -216,7 +216,7 @@ def test_subset_granule_geo( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_non_geo_no_variables( self, @@ -290,7 +290,7 @@ def test_subset_non_geo_no_variables( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_geo_no_variables( self, @@ -404,7 +404,7 @@ def test_subset_geo_no_variables( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_non_variable_dimensions( self, @@ -537,7 +537,7 @@ def test_subset_non_variable_dimensions( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_bounds_reference( self, @@ -638,7 +638,7 @@ def test_subset_bounds_reference( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_temporal( self, @@ -742,7 +742,7 @@ def test_subset_temporal( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_geo_temporal( self, @@ -860,7 +860,7 @@ def test_subset_geo_temporal( @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.get_request_shape_file') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_shape( self, @@ -971,7 +971,7 @@ def test_subset_granule_shape( @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') @patch('hoss.subset.get_request_shape_file') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_shape_and_bbox( self, @@ -1083,7 +1083,7 @@ def test_subset_granule_shape_and_bbox( @patch('hoss.subset.get_requested_index_ranges') @patch('hoss.subset.get_temporal_index_ranges') @patch('hoss.subset.get_spatial_index_ranges') - @patch('hoss.subset.prefetch_dimension_variables') + @patch('hoss.subset.get_prefetch_variables') @patch('hoss.subset.get_varinfo') def test_subset_granule_geo_named( self, From 2fb332ddc7ea9f7d12743795c390d33de480da20 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Thu, 7 Nov 2024 21:49:17 -0500 Subject: [PATCH 02/16] DAS-2232 - updated the get_valid_geo_grid_points function to get maximum distances --- hoss/coordinate_utilities.py | 227 ++++++++++++------------ hoss/spatial.py | 1 - tests/unit/test_coordinate_utilities.py | 40 +++-- 3 files changed, 142 insertions(+), 126 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 7760a7c..7044ee3 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -183,6 +183,7 @@ def get_1D_dim_array_data_from_dimvalues( dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1])) + return np.linspace(dim_min, dim_max, dim_size) @@ -194,7 +195,7 @@ def get_valid_indices( value is provided. If it is not provided, we check for valid values for latitude and longitude """ - # print(f'coordinate_row_col={coordinate_row_col}') + # get_attribute_value returns a value of type `str` coordinate_fill = coordinate.get_attribute_value('_FillValue') if coordinate_fill is not None: @@ -221,18 +222,18 @@ def get_valid_indices( )[0] else: valid_indices = np.empty((0, 0)) - print(f'valid_indices={valid_indices}') + return valid_indices -def get_two_valid_geo_grid_points( +def get_row_col_geo_grid_points( lat_arr: np.ndarray, lon_arr: np.ndarray, latitude_coordinate: VariableFromDmr, longitude_coordinate: VariableFromDmr, row_size: float, col_size: float, -) -> dict[int, tuple]: +) -> tuple[dict, dict]: """ This method is used to return two valid lat lon points from a 2D coordinate dataset. It gets the row and column of the latitude and longitude @@ -240,81 +241,53 @@ def get_two_valid_geo_grid_points( This method does not go down to the next row and col. if the selected row and column all have fills, it will raise an exception in those cases. """ - first_row_col_index = -1 - first_row_row_index = 0 - last_col_row_index = -1 - last_col_col_index = col_size - 1 - lat_row_valid_indices = lon_row_valid_indices = np.empty((0, 0)) - - # get the first row with points that are valid in the lat and lon rows - first_row_row_index, lat_row_valid_indices = get_valid_indices_in_dataset( - lat_arr, row_size, latitude_coordinate, 'row', first_row_row_index - ) - first_row_row_index1, lon_row_valid_indices = get_valid_indices_in_dataset( - lon_arr, row_size, longitude_coordinate, 'row', first_row_row_index - ) - # get a point that is common on both row datasets - if ( - (first_row_row_index == first_row_row_index1) - and (lat_row_valid_indices.size > 0) - and (lon_row_valid_indices.size > 0) - ): - first_row_col_index = np.intersect1d( - lat_row_valid_indices, lon_row_valid_indices - )[0] - print(f'first_row_row_index={first_row_row_index}') - print(f'first_row_col_index={first_row_col_index}') - - # get a valid column from the latitude and longitude datasets - last_col_col_index, lon_col_valid_indices = get_valid_indices_in_dataset( - lon_arr, col_size, longitude_coordinate, 'col', last_col_col_index - ) - last_col_col_index1, lat_col_valid_indices = get_valid_indices_in_dataset( - lat_arr, col_size, latitude_coordinate, 'col', last_col_col_index + geo_grid_indexes_row, geo_grid_indexes_col = get_row_col_valid_indices_in_dataset( + lat_arr, lon_arr, row_size, col_size, latitude_coordinate, longitude_coordinate ) + # 2 points in same row - column indices are changing + # point1_row_index, point1_col_index = geo_grid_indexes_row[0] + # point2_row_index, point2_col_index = geo_grid_indexes_row[1] - # get a point that is common to both column datasets - if ( - (last_col_col_index == last_col_col_index1) - and (lat_col_valid_indices.size > 0) - and (lon_col_valid_indices.size > 0) - ): - last_col_row_index = np.intersect1d( - lat_col_valid_indices, lon_col_valid_indices - )[-1] - - print(f'last_col_col_index={last_col_col_index}') - print(f'last_col_row_index={last_col_row_index}') - - # if the whole row and whole column has no valid indices - # we throw an exception now. This can be extended to move - # to the next row/col - if first_row_col_index == -1: - raise InvalidCoordinateVariable('latitude/longitude') - if last_col_row_index == -1: - raise InvalidCoordinateVariable('latitude/longitude') - - geo_grid_indexes = [ - (first_row_row_index, first_row_col_index), - (last_col_row_index, last_col_col_index), - ] + # 2 points in same column - row indices are changing + # point3_row_index, point3_col_index = geo_grid_indexes_col[0] + # point4_row_index, point4_col_index = geo_grid_indexes_col[1] - geo_grid_points = [ + geo_grid_col_points = [ ( - lon_arr[first_row_row_index][first_row_col_index], - lat_arr[first_row_row_index][first_row_col_index], + lon_arr[geo_grid_indexes_row[0][0]][geo_grid_indexes_row[0][1]], + lat_arr[geo_grid_indexes_row[0][0]][geo_grid_indexes_row[0][1]], ), ( - lon_arr[last_col_row_index][last_col_col_index], - lat_arr[last_col_row_index][last_col_col_index], + lon_arr[geo_grid_indexes_row[1][0]][geo_grid_indexes_row[1][1]], + lat_arr[geo_grid_indexes_row[1][0]][geo_grid_indexes_row[1][1]], ), ] - return { - geo_grid_indexes[0]: geo_grid_points[0], - geo_grid_indexes[1]: geo_grid_points[1], - } + geo_grid_row_points = [ + ( + # lon_arr[point3_row_index][point3_col_index], + # lat_arr[point3_row_index][point3_col_index], + lon_arr[geo_grid_indexes_col[0][0]][geo_grid_indexes_col[0][1]], + lat_arr[geo_grid_indexes_col[0][0]][geo_grid_indexes_col[0][1]], + ), + ( + # lon_arr[point4_row_index][point4_col_index], + # lat_arr[point4_row_index][point4_col_index], + lon_arr[geo_grid_indexes_col[1][0]][geo_grid_indexes_col[1][1]], + lat_arr[geo_grid_indexes_col[1][0]][geo_grid_indexes_col[1][1]], + ), + ] + return ( + { + geo_grid_indexes_col[0]: geo_grid_row_points[0], + geo_grid_indexes_col[1]: geo_grid_row_points[1], + }, + { + geo_grid_indexes_row[0]: geo_grid_col_points[0], + geo_grid_indexes_row[1]: geo_grid_col_points[1], + }, + ) def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, tuple]: @@ -336,47 +309,79 @@ def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, return x_y_points -def get_valid_indices_in_dataset( - coordinate_arr: np.ndarray, - dim_size: int, - coordinate: VariableFromDmr, - span_type: str, - start_index: int, -) -> tuple[int, np.ndarray]: +def get_row_col_valid_indices_in_dataset( + lat_arr: np.ndarray, + lon_arr: np.ndarray, + row_size: int, + col_size: int, + lat_coordinate: VariableFromDmr, + lon_coordinate: VariableFromDmr, +) -> tuple[list, list]: """ This method gets valid indices in a row or column of a coordinate dataset """ - coordinate_index = start_index - valid_indices = [] - if span_type == 'row': - valid_indices = get_valid_indices( - coordinate_arr[coordinate_index, :], coordinate + # coordinate_index = start_index + valid_row_indices = valid_col_indices = np.empty((0, 0)) + # if span_type == 'row': + # get 2 points in the row + # while valid_col_indices.size < 2: + # if span_type == 'row': + max_col_distance = 0 + valid_row_index = row_coordinate_index = -1 + max_range_valid_col_indices = np.empty(0) + while row_coordinate_index < row_size - 1: + row_coordinate_index = row_coordinate_index + 1 + valid_col_indices = np.intersect1d( + get_valid_indices(lat_arr[row_coordinate_index, :], lat_coordinate), + get_valid_indices(lon_arr[row_coordinate_index, :], lon_coordinate), ) - else: - valid_indices = get_valid_indices( - coordinate_arr[:, coordinate_index], coordinate + + if valid_col_indices.size >= 2 and ( + max_col_distance < (valid_col_indices[-1] - valid_col_indices[0]) + ): + max_col_distance = valid_col_indices[-1] - valid_col_indices[0] + max_range_valid_col_indices = valid_col_indices + valid_row_index = row_coordinate_index + + if valid_row_index < 0 or max_range_valid_col_indices.size < 2: + raise InvalidCoordinateVariable(lat_coordinate.full_name_path) + + # span type is col + # get 2 points in a column + # while valid_row_indices.size < 2: + max_row_distance = 0 + valid_col_index = col_coordinate_index = col_size + max_range_valid_row_indices = np.empty(0) + while col_coordinate_index > 0: + col_coordinate_index = col_coordinate_index - 1 + valid_row_indices = np.intersect1d( + get_valid_indices(lat_arr[:, col_coordinate_index], lat_coordinate), + get_valid_indices(lon_arr[:, col_coordinate_index], lon_coordinate), ) + if valid_row_indices.size >= 2 and ( + max_row_distance < (valid_row_indices[-1] - valid_row_indices[0]) + ): + max_row_distance = valid_row_indices[-1] - valid_row_indices[0] + max_range_valid_row_indices = valid_row_indices + valid_col_index = col_coordinate_index + + if col_coordinate_index >= col_size or max_range_valid_row_indices.size < 2: + raise InvalidCoordinateVariable(lon_coordinate.full_name_path) + + # same row . 2 column points + geo_grid_indexes_row = [ + (valid_row_index, max_range_valid_col_indices[0]), + (valid_row_index, max_range_valid_col_indices[-1]), + ] + + # same column, 2 row points + geo_grid_indexes_col = [ + (max_range_valid_row_indices[0], valid_col_index), + (max_range_valid_row_indices[-1], valid_col_index), + ] - while valid_indices.size == 0: - if span_type == 'row': - if coordinate_index < dim_size: - coordinate_index = coordinate_index + 1 - valid_indices = get_valid_indices( - coordinate_arr[coordinate_index, :], - coordinate, - ) - else: - raise InvalidCoordinateVariable(coordinate.full_name_path) - else: - if coordinate_index > 0: - coordinate_index = coordinate_index - 1 - valid_indices = get_valid_indices( - coordinate_arr[:, coordinate_index], coordinate - ) - else: - raise InvalidCoordinateVariable(coordinate.full_name_path) - return coordinate_index, valid_indices + return geo_grid_indexes_row, geo_grid_indexes_col def create_dimension_array_from_coordinates( @@ -406,18 +411,22 @@ def create_dimension_array_from_coordinates( lat_arr, lon_arr, ) - geo_grid_points = get_two_valid_geo_grid_points( + + geo_grid_row_points, geo_grid_col_points = get_row_col_geo_grid_points( lat_arr, lon_arr, latitude_coordinate, longitude_coordinate, row_size, col_size ) - x_y_values = get_x_y_values_from_geographic_points(geo_grid_points, crs) - - row_indices, col_indices = zip(*list(x_y_values.keys())) + x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) + # y value changes across the row indices + row_indices = [list(x_y_values1.keys())[0][0], list(x_y_values1.keys())[1][0]] + y_values = [list(x_y_values1.values())[0][1], list(x_y_values1.values())[1][1]] - x_values, y_values = zip(*list(x_y_values.values())) + x_y_values2 = get_x_y_values_from_geographic_points(geo_grid_col_points, crs) + # x value changes across the col indices + col_indices = [list(x_y_values2.keys())[0][1], list(x_y_values2.keys())[1][1]] + x_values = [list(x_y_values2.values())[0][0], list(x_y_values2.values())[1][0]] y_dim = get_1D_dim_array_data_from_dimvalues(y_values, row_indices, row_size) - x_dim = get_1D_dim_array_data_from_dimvalues(x_values, col_indices, col_size) projected_y, projected_x = tuple(projected_dimension_names) diff --git a/hoss/spatial.py b/hoss/spatial.py index 8652d3c..ad72f42 100644 --- a/hoss/spatial.py +++ b/hoss/spatial.py @@ -129,7 +129,6 @@ def get_spatial_index_ranges( varinfo, required_variables ) for variable_with_anonymous_dims in variables_with_anonymous_dims: - print(f'variable_with_anonymous_dims={variable_with_anonymous_dims}') latitude_coordinates, longitude_coordinates = get_coordinate_variables( varinfo, [variable_with_anonymous_dims] ) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 620ca24..82e0b3c 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -16,10 +16,10 @@ get_coordinate_variables, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, + get_row_col_geo_grid_points, get_row_col_sizes_from_coordinate_datasets, - get_two_valid_geo_grid_points, + get_row_col_valid_indices_in_dataset, get_valid_indices, - get_valid_indices_in_dataset, get_variables_with_anonymous_dims, get_x_y_values_from_geographic_points, ) @@ -505,15 +505,18 @@ def test_any_absent_dimension_variables(self): ) self.assertFalse(variable_has_fake_dims) - def test_get_two_valid_geo_grid_points(self): + def test_get_row_col_geo_grid_points(self): """Ensure that two valid lat/lon points returned by the method with a set of lat/lon coordinates as input """ - with self.subTest('Get two valid geo grid points from coordinates'): - expected_geo_grid_points = [(-179.3, 89.3), (178.4, -88.1)] - actual_geo_grid_points = get_two_valid_geo_grid_points( + with self.subTest('Get two sets of valid geo grid points from coordinates'): + expected_geo_grid_points = ( + {(0, 9): (178.4, 89.3), (4, 9): (178.4, -88.1)}, + {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, + ) + actual_geo_grid_points = get_row_col_geo_grid_points( self.lat_arr, self.lon_arr, self.varinfo.get_variable(self.latitude), @@ -521,10 +524,9 @@ def test_get_two_valid_geo_grid_points(self): 5, 10, ) - for actual_geo_grid_point, expected_geo_grid_point in zip( - actual_geo_grid_points.values(), expected_geo_grid_points - ): - self.assertEqual(actual_geo_grid_point, expected_geo_grid_point) + + self.assertDictEqual(actual_geo_grid_points[0], expected_geo_grid_points[0]) + self.assertDictEqual(actual_geo_grid_points[1], expected_geo_grid_points[1]) with self.subTest('Get two valid geo grid points from reversed coordinates'): lon_arr_reversed = np.array( @@ -612,8 +614,11 @@ def test_get_two_valid_geo_grid_points(self): ], ] ) - expected_geo_grid_points_reversed = [(-179.3, 89.3), (150.5, -89.3)] - actual_geo_grid_points = get_two_valid_geo_grid_points( + expected_geo_grid_points_reversed = ( + {(0, 8): (-179.3, -79.3), (4, 8): (178.4, -79.3)}, + {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, + ) + actual_geo_grid_points_reversed = get_row_col_geo_grid_points( lat_arr_reversed, lon_arr_reversed, self.varinfo.get_variable(self.latitude), @@ -621,7 +626,10 @@ def test_get_two_valid_geo_grid_points(self): 5, 10, ) - for actual_geo_grid_point, expected_geo_grid_point in zip( - actual_geo_grid_points.values(), expected_geo_grid_points_reversed - ): - self.assertEqual(actual_geo_grid_point, expected_geo_grid_point) + + self.assertDictEqual( + actual_geo_grid_points_reversed[0], expected_geo_grid_points_reversed[0] + ) + self.assertDictEqual( + actual_geo_grid_points_reversed[1], expected_geo_grid_points_reversed[1] + ) From 51c392814b1aab734e641f781bf412ad06d7bd55 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Thu, 14 Nov 2024 10:04:57 -0500 Subject: [PATCH 03/16] DAS-2236 - updated the method to get the points --- hoss/coordinate_utilities.py | 204 +++++++++++------------- tests/unit/test_coordinate_utilities.py | 54 ++++--- 2 files changed, 128 insertions(+), 130 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 7044ee3..8e62b77 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -13,7 +13,6 @@ from hoss.exceptions import ( IncompatibleCoordinateVariables, InvalidCoordinateDataset, - InvalidCoordinateVariable, MissingCoordinateVariable, MissingVariable, ) @@ -166,7 +165,7 @@ def get_coordinate_array( return coordinate_array -def get_1D_dim_array_data_from_dimvalues( +def get_dim_array_data_from_dimvalues( dim_values: np.ndarray, dim_indices: np.ndarray, dim_size: int ) -> np.ndarray: """ @@ -188,7 +187,7 @@ def get_1D_dim_array_data_from_dimvalues( def get_valid_indices( - coordinate_row_col: np.ndarray, coordinate: VariableFromDmr + lat_lon_array: np.ndarray, coordinate: VariableFromDmr ) -> np.ndarray: """ Returns indices of a valid array without fill values if the fill @@ -199,31 +198,25 @@ def get_valid_indices( # get_attribute_value returns a value of type `str` coordinate_fill = coordinate.get_attribute_value('_FillValue') if coordinate_fill is not None: - is_not_fill = ~np.isclose(coordinate_row_col, float(coordinate_fill)) + is_not_fill = ~np.isclose(lat_lon_array, float(coordinate_fill)) else: # Creates an entire array of `True` values. - is_not_fill = np.ones_like(coordinate_row_col, dtype=bool) + is_not_fill = np.ones_like(lat_lon_array, dtype=bool) if coordinate.is_longitude(): - valid_indices = np.where( - np.logical_and( - is_not_fill, - np.logical_and( - coordinate_row_col >= -180.0, coordinate_row_col <= 360.0 - ), - ) - )[0] + valid_indices = np.logical_and( + is_not_fill, + np.logical_and(lat_lon_array >= -180.0, lat_lon_array <= 360.0), + ) elif coordinate.is_latitude(): - valid_indices = np.where( - np.logical_and( - is_not_fill, - np.logical_and(coordinate_row_col >= -90.0, coordinate_row_col <= 90.0), - ) - )[0] + valid_indices = np.logical_and( + is_not_fill, + np.logical_and(lat_lon_array >= -90.0, lat_lon_array <= 90.0), + ) else: valid_indices = np.empty((0, 0)) - return valid_indices + return valid_indices # throw an exception def get_row_col_geo_grid_points( @@ -231,8 +224,6 @@ def get_row_col_geo_grid_points( lon_arr: np.ndarray, latitude_coordinate: VariableFromDmr, longitude_coordinate: VariableFromDmr, - row_size: float, - col_size: float, ) -> tuple[dict, dict]: """ This method is used to return two valid lat lon points from a 2D @@ -242,50 +233,52 @@ def get_row_col_geo_grid_points( column all have fills, it will raise an exception in those cases. """ - geo_grid_indexes_row, geo_grid_indexes_col = get_row_col_valid_indices_in_dataset( - lat_arr, lon_arr, row_size, col_size, latitude_coordinate, longitude_coordinate + geo_grid_indices_row, geo_grid_indices_col = get_row_col_valid_indices_in_dataset( + lat_arr, lon_arr, latitude_coordinate, longitude_coordinate ) - # 2 points in same row - column indices are changing - # point1_row_index, point1_col_index = geo_grid_indexes_row[0] - # point2_row_index, point2_col_index = geo_grid_indexes_row[1] - - # 2 points in same column - row indices are changing - # point3_row_index, point3_col_index = geo_grid_indexes_col[0] - # point4_row_index, point4_col_index = geo_grid_indexes_col[1] geo_grid_col_points = [ ( - lon_arr[geo_grid_indexes_row[0][0]][geo_grid_indexes_row[0][1]], - lat_arr[geo_grid_indexes_row[0][0]][geo_grid_indexes_row[0][1]], + lon_arr[geo_grid_indices_row[0][0]][geo_grid_indices_row[0][1]], + lat_arr[geo_grid_indices_row[0][0]][geo_grid_indices_row[0][1]], ), ( - lon_arr[geo_grid_indexes_row[1][0]][geo_grid_indexes_row[1][1]], - lat_arr[geo_grid_indexes_row[1][0]][geo_grid_indexes_row[1][1]], + lon_arr[geo_grid_indices_row[1][0]][geo_grid_indices_row[1][1]], + lat_arr[geo_grid_indices_row[1][0]][geo_grid_indices_row[1][1]], ), ] geo_grid_row_points = [ ( - # lon_arr[point3_row_index][point3_col_index], - # lat_arr[point3_row_index][point3_col_index], - lon_arr[geo_grid_indexes_col[0][0]][geo_grid_indexes_col[0][1]], - lat_arr[geo_grid_indexes_col[0][0]][geo_grid_indexes_col[0][1]], + lon_arr[geo_grid_indices_col[0][0]][geo_grid_indices_col[0][1]], + lat_arr[geo_grid_indices_col[0][0]][geo_grid_indices_col[0][1]], ), ( - # lon_arr[point4_row_index][point4_col_index], - # lat_arr[point4_row_index][point4_col_index], - lon_arr[geo_grid_indexes_col[1][0]][geo_grid_indexes_col[1][1]], - lat_arr[geo_grid_indexes_col[1][0]][geo_grid_indexes_col[1][1]], + lon_arr[geo_grid_indices_col[1][0]][geo_grid_indices_col[1][1]], + lat_arr[geo_grid_indices_col[1][0]][geo_grid_indices_col[1][1]], ), ] + return ( { - geo_grid_indexes_col[0]: geo_grid_row_points[0], - geo_grid_indexes_col[1]: geo_grid_row_points[1], + ( + geo_grid_indices_col[0][0], + geo_grid_indices_col[0][1], + ): geo_grid_row_points[0], + ( + geo_grid_indices_col[1][0], + geo_grid_indices_col[1][1], + ): geo_grid_row_points[1], }, { - geo_grid_indexes_row[0]: geo_grid_col_points[0], - geo_grid_indexes_row[1]: geo_grid_col_points[1], + ( + geo_grid_indices_row[0][0], + geo_grid_indices_row[0][1], + ): geo_grid_col_points[0], + ( + geo_grid_indices_row[1][0], + geo_grid_indices_row[1][1], + ): geo_grid_col_points[1], }, ) @@ -295,6 +288,7 @@ def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, those points to the target grid. Then return the x-y dimscales """ + point_longitudes, point_latitudes = zip(*list(points.values())) from_geo_transformer = Transformer.from_crs(4326, crs) @@ -312,8 +306,6 @@ def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, def get_row_col_valid_indices_in_dataset( lat_arr: np.ndarray, lon_arr: np.ndarray, - row_size: int, - col_size: int, lat_coordinate: VariableFromDmr, lon_coordinate: VariableFromDmr, ) -> tuple[list, list]: @@ -321,67 +313,58 @@ def get_row_col_valid_indices_in_dataset( This method gets valid indices in a row or column of a coordinate dataset """ - # coordinate_index = start_index - valid_row_indices = valid_col_indices = np.empty((0, 0)) - # if span_type == 'row': - # get 2 points in the row - # while valid_col_indices.size < 2: - # if span_type == 'row': - max_col_distance = 0 - valid_row_index = row_coordinate_index = -1 - max_range_valid_col_indices = np.empty(0) - while row_coordinate_index < row_size - 1: - row_coordinate_index = row_coordinate_index + 1 - valid_col_indices = np.intersect1d( - get_valid_indices(lat_arr[row_coordinate_index, :], lat_coordinate), - get_valid_indices(lon_arr[row_coordinate_index, :], lon_coordinate), - ) + valid_lat_lon_mask = np.logical_and( + get_valid_indices(lat_arr, lat_coordinate), + get_valid_indices(lon_arr, lon_coordinate), + ) - if valid_col_indices.size >= 2 and ( - max_col_distance < (valid_col_indices[-1] - valid_col_indices[0]) - ): - max_col_distance = valid_col_indices[-1] - valid_col_indices[0] - max_range_valid_col_indices = valid_col_indices - valid_row_index = row_coordinate_index - - if valid_row_index < 0 or max_range_valid_col_indices.size < 2: - raise InvalidCoordinateVariable(lat_coordinate.full_name_path) - - # span type is col - # get 2 points in a column - # while valid_row_indices.size < 2: - max_row_distance = 0 - valid_col_index = col_coordinate_index = col_size - max_range_valid_row_indices = np.empty(0) - while col_coordinate_index > 0: - col_coordinate_index = col_coordinate_index - 1 - valid_row_indices = np.intersect1d( - get_valid_indices(lat_arr[:, col_coordinate_index], lat_coordinate), - get_valid_indices(lon_arr[:, col_coordinate_index], lon_coordinate), - ) - if valid_row_indices.size >= 2 and ( - max_row_distance < (valid_row_indices[-1] - valid_row_indices[0]) - ): - max_row_distance = valid_row_indices[-1] - valid_row_indices[0] - max_range_valid_row_indices = valid_row_indices - valid_col_index = col_coordinate_index - - if col_coordinate_index >= col_size or max_range_valid_row_indices.size < 2: - raise InvalidCoordinateVariable(lon_coordinate.full_name_path) - - # same row . 2 column points - geo_grid_indexes_row = [ - (valid_row_index, max_range_valid_col_indices[0]), - (valid_row_index, max_range_valid_col_indices[-1]), - ] + # get maximally spread points within rows + max_x_spread_pts = get_max_x_spread_pts(~valid_lat_lon_mask) - # same column, 2 row points - geo_grid_indexes_col = [ - (max_range_valid_row_indices[0], valid_col_index), - (max_range_valid_row_indices[-1], valid_col_index), + # Doing the same for the columns is done by transposing the valid_mask + # and then fixing the results from [x, y] to [y, x]. + max_y_spread_trsp = get_max_x_spread_pts(np.transpose(~valid_lat_lon_mask)) + max_y_spread_pts = [ + np.flip(max_y_spread_trsp[0]), + np.flip(max_y_spread_trsp[1]), ] - return geo_grid_indexes_row, geo_grid_indexes_col + return max_y_spread_pts, max_x_spread_pts + + +def get_max_x_spread_pts( + valid_mask: np.ndarray, # Numpy Mask Array, e.g., invalid latitudes & longitudes +) -> tuple: # 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] + """ + # This function returns two data points by x, y indices that are spread farthest + # from each other in the same row, i.e., have the greatest delta-x value - and + # are valid data points from the valid_mask array passed in. The input array + # must be a 2D Numpy mask array providing the valid data points, e.g., filtering + # out fill values and out-of-range values. + """ + # fill a sample array with x-index values, x_ind[i, j] = j + x_ind = np.array( + [ + # [j for j in range(valid_mask.shape[1])] + list(range(valid_mask.shape[1])) + for i in range(valid_mask.shape[0]) + ] + ) + # mask x_ind to hide the invalid data points + valid_x_ind = np.ma.array(x_ind, mask=valid_mask) + + # ptp (peak-to-peak) finds the greatest delta-x value amongst valid points + # for each row. Result is 1D + x_ind_spread = valid_x_ind.ptp(axis=1) + + # This finds which row has the greatest spread (delta-x) + max_x_spread_row = np.argmax(x_ind_spread) + + # Using the row reference, find the min-x and max-x + min_x_ind = np.min(valid_x_ind[max_x_spread_row]) + max_x_ind = np.max(valid_x_ind[max_x_spread_row]) + + return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] def create_dimension_array_from_coordinates( @@ -412,10 +395,9 @@ def create_dimension_array_from_coordinates( lon_arr, ) - geo_grid_row_points, geo_grid_col_points = get_row_col_geo_grid_points( - lat_arr, lon_arr, latitude_coordinate, longitude_coordinate, row_size, col_size + geo_grid_col_points, geo_grid_row_points = get_row_col_geo_grid_points( + lat_arr, lon_arr, latitude_coordinate, longitude_coordinate ) - x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) # y value changes across the row indices row_indices = [list(x_y_values1.keys())[0][0], list(x_y_values1.keys())[1][0]] @@ -426,8 +408,8 @@ def create_dimension_array_from_coordinates( col_indices = [list(x_y_values2.keys())[0][1], list(x_y_values2.keys())[1][1]] x_values = [list(x_y_values2.values())[0][0], list(x_y_values2.values())[1][0]] - y_dim = get_1D_dim_array_data_from_dimvalues(y_values, row_indices, row_size) - x_dim = get_1D_dim_array_data_from_dimvalues(x_values, col_indices, col_size) + y_dim = get_dim_array_data_from_dimvalues(y_values, row_indices, row_size) + x_dim = get_dim_array_data_from_dimvalues(x_values, col_indices, col_size) projected_y, projected_x = tuple(projected_dimension_names) return {projected_y: y_dim, projected_x: x_dim} diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 82e0b3c..612ac7f 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -11,9 +11,9 @@ from hoss.coordinate_utilities import ( any_absent_dimension_variables, - get_1D_dim_array_data_from_dimvalues, get_coordinate_array, get_coordinate_variables, + get_dim_array_data_from_dimvalues, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, get_row_col_geo_grid_points, @@ -123,7 +123,7 @@ def test_get_coordinate_variables(self): for expected_variable in expected_coordinate_variables[1]: self.assertIn(expected_variable, actual_coordinate_variables[1]) - def test_get_1D_dim_array_data_from_dimvalues(self): + def test_get_dim_array_data_from_dimvalues(self): """Ensure that the dimension scale generated from the provided dimension values are accurate for ascending and descending scales @@ -134,7 +134,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 expected_dim_asc = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]) - dim_array_values = get_1D_dim_array_data_from_dimvalues( + dim_array_values = get_dim_array_data_from_dimvalues( dim_values_asc, dim_indices_asc, dim_size_asc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_asc)) @@ -145,7 +145,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_size_desc = 10 expected_dim_desc = np.array([120, 110, 100, 90, 80, 70, 60, 50, 40, 30]) - dim_array_values = get_1D_dim_array_data_from_dimvalues( + dim_array_values = get_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_desc, dim_size_desc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_desc)) @@ -155,7 +155,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 with self.assertRaises(InvalidCoordinateDataset) as context: - get_1D_dim_array_data_from_dimvalues( + get_dim_array_data_from_dimvalues( dim_values_invalid, dim_indices_asc, dim_size_asc ) self.assertEqual( @@ -169,7 +169,7 @@ def test_get_1D_dim_array_data_from_dimvalues(self): dim_indices_invalid = np.array([5, 5]) dim_size_desc = 10 with self.assertRaises(InvalidCoordinateDataset) as context: - get_1D_dim_array_data_from_dimvalues( + get_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_invalid, dim_size_desc ) self.assertEqual( @@ -328,7 +328,7 @@ def test_get_row_col_sizes_from_coordinate_datasets(self): get_row_col_sizes_from_coordinate_datasets(self.lat_arr, self.lon_arr), expected_row_col_sizes, ) - with self.subTest('Retrieves the expected row col sizes for the 1d array'): + with self.subTest('Retrieves the expected row col sizes for the dim array'): self.assertEqual( get_row_col_sizes_from_coordinate_datasets( np.array([1, 2, 3, 4]), np.array([5, 6, 7, 8, 9]) @@ -394,15 +394,17 @@ def test_get_valid_indices(self): ascending or descending. """ - expected_valid_indices_lat_arr_with_fill = np.array([1, 2, 3, 4]) - expected_valid_indices_lon_arr_with_fill = np.array([0, 1, 2, 6, 7, 8, 9]) - - expected_valid_indices_lat_arr_over_range = np.array([0, 1, 2]) + # expected_valid_indices_lat_arr_with_fill = np.array([1, 2, 3, 4]) + # expected_valid_indices_lon_arr_with_fill = np.array([0, 1, 2, 6, 7, 8, 9]) + # expected_valid_indices_lat_arr_over_range = np.array([0, 1, 2]) expected_valid_indices_lon_arr_over_range = np.array([0, 1, 2, 6, 7, 8, 9]) fill_array = np.array([-9999.0, -9999.0, -9999.0, -9999.0]) with self.subTest('valid indices for latitude with fill values'): + expected_valid_indices_lat_arr_with_fill = np.array( + [False, True, True, True, True] + ) valid_indices_lat_arr = get_valid_indices( self.lat_arr[:, 2], self.varinfo.get_variable(self.latitude) ) @@ -410,6 +412,9 @@ def test_get_valid_indices(self): valid_indices_lat_arr, expected_valid_indices_lat_arr_with_fill ) with self.subTest('valid indices for longitude with fill values'): + expected_valid_indices_lon_arr_with_fill = np.array( + [True, True, True, False, False, False, True, True, True, True] + ) valid_indices_lon_arr = get_valid_indices( self.lon_arr[0, :], self.varinfo.get_variable(self.longitude) ) @@ -417,6 +422,9 @@ def test_get_valid_indices(self): valid_indices_lon_arr, expected_valid_indices_lon_arr_with_fill ) with self.subTest('latitude values beyond valid range'): + expected_valid_indices_lat_arr_over_range = np.array( + [True, True, True, False, False] + ) valid_indices_lat_arr = get_valid_indices( self.lat_arr[:, 3], self.varinfo.get_variable(self.latitude) ) @@ -424,6 +432,9 @@ def test_get_valid_indices(self): valid_indices_lat_arr, expected_valid_indices_lat_arr_over_range ) with self.subTest('longitude values beyond valid range'): + expected_valid_indices_lon_arr_over_range = np.array( + [True, True, True, False, False, False, True, True, True, True] + ) valid_indices_lon_arr = get_valid_indices( self.lon_arr[1, :], self.varinfo.get_variable(self.longitude) ) @@ -431,10 +442,13 @@ def test_get_valid_indices(self): valid_indices_lon_arr, expected_valid_indices_lon_arr_over_range ) with self.subTest('all fill values - no valid indices'): - valid_indices = get_valid_indices( + expected_valid_indices_fill_values = np.array([False, False, False, False]) + valid_indices_all_fill = get_valid_indices( fill_array, self.varinfo.get_variable(self.longitude) ) - self.assertEqual(valid_indices.size, 0) + np.testing.assert_array_equal( + valid_indices_all_fill, expected_valid_indices_fill_values + ) def test_get_variables_with_anonymous_dims(self): """Ensure that variables with no dimensions are @@ -513,16 +527,17 @@ def test_get_row_col_geo_grid_points(self): with self.subTest('Get two sets of valid geo grid points from coordinates'): expected_geo_grid_points = ( - {(0, 9): (178.4, 89.3), (4, 9): (178.4, -88.1)}, + # {(0, 9): (178.4, 89.3), (4, 9): (178.4, -88.1)}, + # {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, + {(0, 0): (-179.3, 89.3), (4, 0): (-179.3, -88.1)}, + # {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, ) actual_geo_grid_points = get_row_col_geo_grid_points( self.lat_arr, self.lon_arr, self.varinfo.get_variable(self.latitude), self.varinfo.get_variable(self.longitude), - 5, - 10, ) self.assertDictEqual(actual_geo_grid_points[0], expected_geo_grid_points[0]) @@ -615,16 +630,17 @@ def test_get_row_col_geo_grid_points(self): ] ) expected_geo_grid_points_reversed = ( - {(0, 8): (-179.3, -79.3), (4, 8): (178.4, -79.3)}, + # {(0, 8): (-179.3, -79.3), (4, 8): (178.4, -79.3)}, + # {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, + {(0, 0): (-179.3, 89.3), (4, 0): (178.4, -89.3)}, + # {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, ) actual_geo_grid_points_reversed = get_row_col_geo_grid_points( lat_arr_reversed, lon_arr_reversed, self.varinfo.get_variable(self.latitude), self.varinfo.get_variable(self.longitude), - 5, - 10, ) self.assertDictEqual( From 089f6a5b8e562aa87d46c4eb3ed581007cf6d378 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Thu, 14 Nov 2024 15:34:05 -0500 Subject: [PATCH 04/16] DAS-2236 - Added missing unit tests --- hoss/coordinate_utilities.py | 6 +- tests/unit/test_coordinate_utilities.py | 88 +++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 9 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 8e62b77..2dd5adf 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -310,8 +310,8 @@ def get_row_col_valid_indices_in_dataset( lon_coordinate: VariableFromDmr, ) -> tuple[list, list]: """ - This method gets valid indices in a row or column of a - coordinate dataset + This function gets valid indices across rows and columns of + both the latitude and longitude datasets """ valid_lat_lon_mask = np.logical_and( get_valid_indices(lat_arr, lat_coordinate), @@ -334,7 +334,7 @@ def get_row_col_valid_indices_in_dataset( def get_max_x_spread_pts( valid_mask: np.ndarray, # Numpy Mask Array, e.g., invalid latitudes & longitudes -) -> tuple: # 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] +) -> list[list]: # 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] """ # This function returns two data points by x, y indices that are spread farthest # from each other in the same row, i.e., have the greatest delta-x value - and diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 612ac7f..595918e 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -7,6 +7,7 @@ from harmony.util import config from netCDF4 import Dataset from numpy.testing import assert_array_equal +from pyproj import CRS from varinfo import VarInfoFromDmr from hoss.coordinate_utilities import ( @@ -14,6 +15,7 @@ get_coordinate_array, get_coordinate_variables, get_dim_array_data_from_dimvalues, + get_max_x_spread_pts, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, get_row_col_geo_grid_points, @@ -527,11 +529,8 @@ def test_get_row_col_geo_grid_points(self): with self.subTest('Get two sets of valid geo grid points from coordinates'): expected_geo_grid_points = ( - # {(0, 9): (178.4, 89.3), (4, 9): (178.4, -88.1)}, - # {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, {(0, 0): (-179.3, 89.3), (4, 0): (-179.3, -88.1)}, - # {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, ) actual_geo_grid_points = get_row_col_geo_grid_points( self.lat_arr, @@ -630,11 +629,8 @@ def test_get_row_col_geo_grid_points(self): ] ) expected_geo_grid_points_reversed = ( - # {(0, 8): (-179.3, -79.3), (4, 8): (178.4, -79.3)}, - # {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, {(0, 0): (-179.3, 89.3), (4, 0): (178.4, -89.3)}, - # {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, ) actual_geo_grid_points_reversed = get_row_col_geo_grid_points( lat_arr_reversed, @@ -649,3 +645,83 @@ def test_get_row_col_geo_grid_points(self): self.assertDictEqual( actual_geo_grid_points_reversed[1], expected_geo_grid_points_reversed[1] ) + + def test_get_max_x_spread_pts(self): + """Ensure that two valid sets of indices are returned by the function + with a masked dataset as input + + """ + + with self.subTest('Get two sets of valid indices for points from coordinates'): + valid_values = np.array( + [ + [True, True, True, True, False, False, True, True, True, True], + [True, True, True, False, False, False, True, True, True, True], + [True, True, True, False, True, False, True, True, True, True], + [True, True, True, False, False, False, True, True, True, True], + [True, True, False, False, False, False, True, True, True, True], + ] + ) + expected_indices = [[0, 0], [0, 9]] + + actual_indices = get_max_x_spread_pts(~valid_values) + + self.assertTrue(actual_indices[0] == expected_indices[0]) + self.assertTrue(actual_indices[1] == expected_indices[1]) + + def test_get_row_col_valid_indices_in_dataset(self): + """Ensure that two sets of valid indices are + returned by the method with a set of lat/lon coordinates as input + + """ + with self.subTest('Get two sets of valid indices from coordinates dataset'): + expected_grid_indices = ( + [(0, 0), (0, 9)], + [(0, 0), (4, 0)], + ) + actual_grid_indices = get_row_col_valid_indices_in_dataset( + self.lat_arr, + self.lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + + self.assertTrue(actual_grid_indices[0], expected_grid_indices[0]) + self.assertTrue(actual_grid_indices[1], expected_grid_indices[1]) + + def test_get_x_y_values_from_geographic_points(self): + """Ensure that the correct x-values are returned by the function + with a set of geographic coordinates as input. + + """ + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + + with self.subTest('Get valid x-y points from coordinates in a row'): + two_col_points_in_a_row = {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)} + expected_x_y_points = { + (0, 0): (-17299990.048985746, 7341677.255608977), + (0, 9): (17213152.396759935, 7341677.255608977), + } + actual_x_y_points = get_x_y_values_from_geographic_points( + two_col_points_in_a_row, crs + ) + self.assertDictEqual(actual_x_y_points, expected_x_y_points) + + with self.subTest('Get valid x-y points from coordinates in a col'): + two_row_points_in_a_col = {(0, 0): (-179.3, 89.3), (4, 0): (-179.3, -88.1)} + expected_x_y_points = { + (0, 0): (-17299990.048985746, 7341677.255608977), + (4, 0): (-17299990.048985746, -7338157.219843731), + } + actual_x_y_points = get_x_y_values_from_geographic_points( + two_row_points_in_a_col, crs + ) + self.assertDictEqual(actual_x_y_points, expected_x_y_points) From 2a7e61ea2b4d9b46311d2de62d8fa821acb70882 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Fri, 15 Nov 2024 10:47:17 -0500 Subject: [PATCH 05/16] tests/unit/test_coordinate_utilities.py --- hoss/coordinate_utilities.py | 90 +++++++++++++++--------------------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 2dd5adf..ea32977 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -224,7 +224,8 @@ def get_row_col_geo_grid_points( lon_arr: np.ndarray, latitude_coordinate: VariableFromDmr, longitude_coordinate: VariableFromDmr, -) -> tuple[dict, dict]: + # ) -> tuple[dict, dict]: +) -> tuple[list, list]: """ This method is used to return two valid lat lon points from a 2D coordinate dataset. It gets the row and column of the latitude and longitude @@ -238,52 +239,35 @@ def get_row_col_geo_grid_points( ) geo_grid_col_points = [ - ( - lon_arr[geo_grid_indices_row[0][0]][geo_grid_indices_row[0][1]], - lat_arr[geo_grid_indices_row[0][0]][geo_grid_indices_row[0][1]], - ), - ( - lon_arr[geo_grid_indices_row[1][0]][geo_grid_indices_row[1][1]], - lat_arr[geo_grid_indices_row[1][0]][geo_grid_indices_row[1][1]], - ), + (lon_arr[row, col], lat_arr[row, col]) for row, col in geo_grid_indices_row ] - geo_grid_row_points = [ - ( - lon_arr[geo_grid_indices_col[0][0]][geo_grid_indices_col[0][1]], - lat_arr[geo_grid_indices_col[0][0]][geo_grid_indices_col[0][1]], - ), - ( - lon_arr[geo_grid_indices_col[1][0]][geo_grid_indices_col[1][1]], - lat_arr[geo_grid_indices_col[1][0]][geo_grid_indices_col[1][1]], - ), + (lon_arr[row, col], lat_arr[row, col]) for row, col in geo_grid_indices_col ] - return ( - { - ( - geo_grid_indices_col[0][0], - geo_grid_indices_col[0][1], - ): geo_grid_row_points[0], - ( - geo_grid_indices_col[1][0], - geo_grid_indices_col[1][1], - ): geo_grid_row_points[1], - }, - { - ( - geo_grid_indices_row[0][0], - geo_grid_indices_row[0][1], - ): geo_grid_col_points[0], - ( - geo_grid_indices_row[1][0], - geo_grid_indices_row[1][1], - ): geo_grid_col_points[1], - }, + return geo_grid_row_points, geo_grid_col_points + + +def get_x_y_values_from_geographic_points(points: list, crs: CRS) -> list[tuple]: + """Take an input list of (longitude, latitude) coordinates and project + those points to the target grid. Then return the x-y dimscales + + """ + + point_longitudes, point_latitudes = zip(*list(points)) + + from_geo_transformer = Transformer.from_crs(4326, crs) + points_x, points_y = ( # pylint: disable=unpacking-non-sequence + from_geo_transformer.transform(point_latitudes, point_longitudes) ) + x_y_points = list(zip(points_x, points_y)) + + return x_y_points -def get_x_y_values_from_geographic_points(points: Dict, crs: CRS) -> Dict[tuple, tuple]: +def get_x_y_values_from_geographic_points1( + points: Dict, crs: CRS +) -> Dict[tuple, tuple]: """Take an input list of (longitude, latitude) coordinates and project those points to the target grid. Then return the x-y dimscales @@ -389,27 +373,29 @@ def create_dimension_array_from_coordinates( prefetch_dataset, longitude_coordinate.full_name_path, ) - row_size, col_size = get_row_col_sizes_from_coordinate_datasets( lat_arr, lon_arr, ) - - geo_grid_col_points, geo_grid_row_points = get_row_col_geo_grid_points( + row_indices, col_indices = get_row_col_valid_indices_in_dataset( lat_arr, lon_arr, latitude_coordinate, longitude_coordinate ) + + geo_grid_row_points = [ + (lon_arr[row, col], lat_arr[row, col]) for row, col in col_indices + ] x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) - # y value changes across the row indices - row_indices = [list(x_y_values1.keys())[0][0], list(x_y_values1.keys())[1][0]] - y_values = [list(x_y_values1.values())[0][1], list(x_y_values1.values())[1][1]] + col_indices_for_x = [col_index[1] for col_index in col_indices] + x_values = [x_y_value[0] for x_y_value in list(x_y_values1)] + x_dim = get_dim_array_data_from_dimvalues(x_values, col_indices_for_x, col_size) + geo_grid_col_points = [ + (lon_arr[row, col], lat_arr[row, col]) for row, col in row_indices + ] x_y_values2 = get_x_y_values_from_geographic_points(geo_grid_col_points, crs) - # x value changes across the col indices - col_indices = [list(x_y_values2.keys())[0][1], list(x_y_values2.keys())[1][1]] - x_values = [list(x_y_values2.values())[0][0], list(x_y_values2.values())[1][0]] - - y_dim = get_dim_array_data_from_dimvalues(y_values, row_indices, row_size) - x_dim = get_dim_array_data_from_dimvalues(x_values, col_indices, col_size) + row_indices_for_y = [row_index[0] for row_index in row_indices] + y_values = [x_y_value[1] for x_y_value in list(x_y_values2)] + y_dim = get_dim_array_data_from_dimvalues(y_values, row_indices_for_y, row_size) projected_y, projected_x = tuple(projected_dimension_names) return {projected_y: y_dim, projected_x: x_dim} From 5b9105c762484dedbfd6288aec4c52ecf55cefa6 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Fri, 15 Nov 2024 10:49:17 -0500 Subject: [PATCH 06/16] DAS-2236 - refactoring and cleanup --- tests/unit/test_coordinate_utilities.py | 41 +++++++++++++------------ 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 595918e..494b4ab 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -529,8 +529,8 @@ def test_get_row_col_geo_grid_points(self): with self.subTest('Get two sets of valid geo grid points from coordinates'): expected_geo_grid_points = ( - {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)}, - {(0, 0): (-179.3, 89.3), (4, 0): (-179.3, -88.1)}, + [(-179.3, 89.3), (178.4, 89.3)], + [(-179.3, 89.3), (-179.3, -88.1)], ) actual_geo_grid_points = get_row_col_geo_grid_points( self.lat_arr, @@ -539,8 +539,8 @@ def test_get_row_col_geo_grid_points(self): self.varinfo.get_variable(self.longitude), ) - self.assertDictEqual(actual_geo_grid_points[0], expected_geo_grid_points[0]) - self.assertDictEqual(actual_geo_grid_points[1], expected_geo_grid_points[1]) + self.assertListEqual(actual_geo_grid_points[0], expected_geo_grid_points[0]) + self.assertListEqual(actual_geo_grid_points[1], expected_geo_grid_points[1]) with self.subTest('Get two valid geo grid points from reversed coordinates'): lon_arr_reversed = np.array( @@ -629,8 +629,8 @@ def test_get_row_col_geo_grid_points(self): ] ) expected_geo_grid_points_reversed = ( - {(0, 0): (-179.3, 89.3), (0, 9): (-179.3, -89.3)}, - {(0, 0): (-179.3, 89.3), (4, 0): (178.4, -89.3)}, + [(-179.3, 89.3), (-179.3, -89.3)], + [(-179.3, 89.3), (178.4, -89.3)], ) actual_geo_grid_points_reversed = get_row_col_geo_grid_points( lat_arr_reversed, @@ -639,10 +639,10 @@ def test_get_row_col_geo_grid_points(self): self.varinfo.get_variable(self.longitude), ) - self.assertDictEqual( + self.assertListEqual( actual_geo_grid_points_reversed[0], expected_geo_grid_points_reversed[0] ) - self.assertDictEqual( + self.assertListEqual( actual_geo_grid_points_reversed[1], expected_geo_grid_points_reversed[1] ) @@ -705,23 +705,24 @@ def test_get_x_y_values_from_geographic_points(self): ) with self.subTest('Get valid x-y points from coordinates in a row'): - two_col_points_in_a_row = {(0, 0): (-179.3, 89.3), (0, 9): (178.4, 89.3)} - expected_x_y_points = { - (0, 0): (-17299990.048985746, 7341677.255608977), - (0, 9): (17213152.396759935, 7341677.255608977), - } + two_col_points_in_a_row = [(-179.3, 89.3), (178.4, 89.3)] + + expected_x_y_points = [ + (-17299990.048985746, 7341677.255608977), + (17213152.396759935, 7341677.255608977), + ] actual_x_y_points = get_x_y_values_from_geographic_points( two_col_points_in_a_row, crs ) - self.assertDictEqual(actual_x_y_points, expected_x_y_points) + self.assertListEqual(actual_x_y_points, expected_x_y_points) with self.subTest('Get valid x-y points from coordinates in a col'): - two_row_points_in_a_col = {(0, 0): (-179.3, 89.3), (4, 0): (-179.3, -88.1)} - expected_x_y_points = { - (0, 0): (-17299990.048985746, 7341677.255608977), - (4, 0): (-17299990.048985746, -7338157.219843731), - } + two_row_points_in_a_col = (-179.3, 89.3), (-179.3, -88.1) + expected_x_y_points = [ + (-17299990.048985746, 7341677.255608977), + (-17299990.048985746, -7338157.219843731), + ] actual_x_y_points = get_x_y_values_from_geographic_points( two_row_points_in_a_col, crs ) - self.assertDictEqual(actual_x_y_points, expected_x_y_points) + self.assertListEqual(actual_x_y_points, expected_x_y_points) From f0e283a0211293a5f2f891db23af9bbd2ec6c4e1 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Fri, 15 Nov 2024 12:04:01 -0500 Subject: [PATCH 07/16] DAS-2236 - updates based on PR feedback --- hoss/coordinate_utilities.py | 25 +++++++++++++++---------- tests/unit/test_coordinate_utilities.py | 12 ++++++------ 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index ea32977..cfd29df 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -165,12 +165,14 @@ def get_coordinate_array( return coordinate_array -def get_dim_array_data_from_dimvalues( - dim_values: np.ndarray, dim_indices: np.ndarray, dim_size: int -) -> np.ndarray: +def get_1d_dim_array_data_from_dimvalues( + dim_values: np.ndarray, # 2 element 1D array [ , ] + dim_indices: np.ndarray, # 2 element 1D array [ , ] + dim_size: int, # all dim_indices values => 0, <= dim_size +) -> np.ndarray: # 1D of size = dim_size, with proper dimension array values """ - return a full dimension data array based on the 2 projected points and - grid size + return a full dimension data array based upon 2 valid projected values + within - indices given - and the full dimension size """ if (dim_indices[1] != dim_indices[0]) and (dim_values[1] != dim_values[0]): @@ -190,9 +192,12 @@ def get_valid_indices( lat_lon_array: np.ndarray, coordinate: VariableFromDmr ) -> np.ndarray: """ - Returns indices of a valid array without fill values if the fill - value is provided. If it is not provided, we check for valid values - for latitude and longitude + Returns an array of boolean values + - true, false - indicating a valid value (non-fill, within range) + for a given coordinate variable + - latitude or longitude - or + returns an empty ndarray of size (0,0) for any other variable. + Note a numpy mask is the opposite of a valids array. """ # get_attribute_value returns a value of type `str` @@ -387,7 +392,7 @@ def create_dimension_array_from_coordinates( x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) col_indices_for_x = [col_index[1] for col_index in col_indices] x_values = [x_y_value[0] for x_y_value in list(x_y_values1)] - x_dim = get_dim_array_data_from_dimvalues(x_values, col_indices_for_x, col_size) + x_dim = get_1d_dim_array_data_from_dimvalues(x_values, col_indices_for_x, col_size) geo_grid_col_points = [ (lon_arr[row, col], lat_arr[row, col]) for row, col in row_indices @@ -395,7 +400,7 @@ def create_dimension_array_from_coordinates( x_y_values2 = get_x_y_values_from_geographic_points(geo_grid_col_points, crs) row_indices_for_y = [row_index[0] for row_index in row_indices] y_values = [x_y_value[1] for x_y_value in list(x_y_values2)] - y_dim = get_dim_array_data_from_dimvalues(y_values, row_indices_for_y, row_size) + y_dim = get_1d_dim_array_data_from_dimvalues(y_values, row_indices_for_y, row_size) projected_y, projected_x = tuple(projected_dimension_names) return {projected_y: y_dim, projected_x: x_dim} diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 494b4ab..37fca8a 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -12,9 +12,9 @@ from hoss.coordinate_utilities import ( any_absent_dimension_variables, + get_1d_dim_array_data_from_dimvalues, get_coordinate_array, get_coordinate_variables, - get_dim_array_data_from_dimvalues, get_max_x_spread_pts, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, @@ -125,7 +125,7 @@ def test_get_coordinate_variables(self): for expected_variable in expected_coordinate_variables[1]: self.assertIn(expected_variable, actual_coordinate_variables[1]) - def test_get_dim_array_data_from_dimvalues(self): + def test_get_1d_dim_array_data_from_dimvalues(self): """Ensure that the dimension scale generated from the provided dimension values are accurate for ascending and descending scales @@ -136,7 +136,7 @@ def test_get_dim_array_data_from_dimvalues(self): dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 expected_dim_asc = np.array([2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]) - dim_array_values = get_dim_array_data_from_dimvalues( + dim_array_values = get_1d_dim_array_data_from_dimvalues( dim_values_asc, dim_indices_asc, dim_size_asc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_asc)) @@ -147,7 +147,7 @@ def test_get_dim_array_data_from_dimvalues(self): dim_size_desc = 10 expected_dim_desc = np.array([120, 110, 100, 90, 80, 70, 60, 50, 40, 30]) - dim_array_values = get_dim_array_data_from_dimvalues( + dim_array_values = get_1d_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_desc, dim_size_desc ) self.assertTrue(np.array_equal(dim_array_values, expected_dim_desc)) @@ -157,7 +157,7 @@ def test_get_dim_array_data_from_dimvalues(self): dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 with self.assertRaises(InvalidCoordinateDataset) as context: - get_dim_array_data_from_dimvalues( + get_1d_dim_array_data_from_dimvalues( dim_values_invalid, dim_indices_asc, dim_size_asc ) self.assertEqual( @@ -171,7 +171,7 @@ def test_get_dim_array_data_from_dimvalues(self): dim_indices_invalid = np.array([5, 5]) dim_size_desc = 10 with self.assertRaises(InvalidCoordinateDataset) as context: - get_dim_array_data_from_dimvalues( + get_1d_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_invalid, dim_size_desc ) self.assertEqual( From 0d6c01b70791b9f4eff1d79fd069ecbcd03d6144 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Fri, 15 Nov 2024 21:13:00 -0500 Subject: [PATCH 08/16] DAS-2236 - comment update based on PR feedback --- hoss/coordinate_utilities.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index cfd29df..74142d9 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -253,12 +253,14 @@ def get_row_col_geo_grid_points( return geo_grid_row_points, geo_grid_col_points -def get_x_y_values_from_geographic_points(points: list, crs: CRS) -> list[tuple]: +def get_x_y_values_from_geographic_points( + points: list[tuple], # list of data points as tuple in (lon, lat) order + crs: CRS, # CRS object from PyProj +) -> list[tuple]: # list of X-Y points as tuple in (X, Y) order """Take an input list of (longitude, latitude) coordinates and project those points to the target grid. Then return the x-y dimscales """ - point_longitudes, point_latitudes = zip(*list(points)) from_geo_transformer = Transformer.from_crs(4326, crs) From 1c8751f9df41672b1414d98f8d0ae3af56f2a362 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Mon, 18 Nov 2024 16:32:59 -0500 Subject: [PATCH 09/16] DAS-2236 - updates based on David's PR feedback --- hoss/coordinate_utilities.py | 85 ++++++++++--------------- tests/unit/test_coordinate_utilities.py | 52 ++++++++------- 2 files changed, 64 insertions(+), 73 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 74142d9..e496b37 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -229,28 +229,31 @@ def get_row_col_geo_grid_points( lon_arr: np.ndarray, latitude_coordinate: VariableFromDmr, longitude_coordinate: VariableFromDmr, - # ) -> tuple[dict, dict]: -) -> tuple[list, list]: +) -> tuple[list, list, list, list]: """ - This method is used to return two valid lat lon points from a 2D - coordinate dataset. It gets the row and column of the latitude and longitude - arrays to get two valid points. This does a check for fill values and - This method does not go down to the next row and col. if the selected row and - column all have fills, it will raise an exception in those cases. + This method is used to return two sets of valid indices and the + corresponding two sets of row and col lat lon points from 2D + coordinate datasets. """ - geo_grid_indices_row, geo_grid_indices_col = get_row_col_valid_indices_in_dataset( + row_indices, col_indices = get_valid_row_col_pairs( lat_arr, lon_arr, latitude_coordinate, longitude_coordinate ) geo_grid_col_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in geo_grid_indices_row + (lon_arr[row, col], lat_arr[row, col]) for row, col in row_indices ] + geo_grid_row_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in geo_grid_indices_col + (lon_arr[row, col], lat_arr[row, col]) for row, col in col_indices ] - return geo_grid_row_points, geo_grid_col_points + return ( + row_indices, + col_indices, + geo_grid_row_points, + geo_grid_col_points, + ) def get_x_y_values_from_geographic_points( @@ -272,37 +275,18 @@ def get_x_y_values_from_geographic_points( return x_y_points -def get_x_y_values_from_geographic_points1( - points: Dict, crs: CRS -) -> Dict[tuple, tuple]: - """Take an input list of (longitude, latitude) coordinates and project - those points to the target grid. Then return the x-y dimscales - - """ - - point_longitudes, point_latitudes = zip(*list(points.values())) - - from_geo_transformer = Transformer.from_crs(4326, crs) - points_x, points_y = ( # pylint: disable=unpacking-non-sequence - from_geo_transformer.transform(point_latitudes, point_longitudes) - ) - - x_y_points = {} - for index, point_x, point_y in zip(list(points.keys()), points_x, points_y): - x_y_points.update({index: (point_x, point_y)}) - - return x_y_points - - -def get_row_col_valid_indices_in_dataset( +def get_valid_row_col_pairs( lat_arr: np.ndarray, lon_arr: np.ndarray, lat_coordinate: VariableFromDmr, lon_coordinate: VariableFromDmr, ) -> tuple[list, list]: """ - This function gets valid indices across rows and columns of - both the latitude and longitude datasets + This function finds a set of indices maximally spread across + a row, and the set maximally spread across a column, with the + indices being valid in both the latitude and longitude datasets. + When interpolating between these points, the maximal spread + ensures the greatest interpolation accuracy. """ valid_lat_lon_mask = np.logical_and( get_valid_indices(lat_arr, lat_coordinate), @@ -316,8 +300,8 @@ def get_row_col_valid_indices_in_dataset( # and then fixing the results from [x, y] to [y, x]. max_y_spread_trsp = get_max_x_spread_pts(np.transpose(~valid_lat_lon_mask)) max_y_spread_pts = [ - np.flip(max_y_spread_trsp[0]), - np.flip(max_y_spread_trsp[1]), + list(np.flip(max_y_spread_trsp[0])), + list(np.flip(max_y_spread_trsp[1])), ] return max_y_spread_pts, max_x_spread_pts @@ -384,24 +368,23 @@ def create_dimension_array_from_coordinates( lat_arr, lon_arr, ) - row_indices, col_indices = get_row_col_valid_indices_in_dataset( - lat_arr, lon_arr, latitude_coordinate, longitude_coordinate - ) - geo_grid_row_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in col_indices - ] + row_indices, col_indices, geo_grid_row_points, geo_grid_col_points = ( + get_row_col_geo_grid_points( + lat_arr, + lon_arr, + latitude_coordinate, + longitude_coordinate, + ) + ) x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) - col_indices_for_x = [col_index[1] for col_index in col_indices] - x_values = [x_y_value[0] for x_y_value in list(x_y_values1)] + col_indices_for_x = np.transpose(col_indices)[1] + x_values = np.transpose(x_y_values1)[0] x_dim = get_1d_dim_array_data_from_dimvalues(x_values, col_indices_for_x, col_size) - geo_grid_col_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in row_indices - ] x_y_values2 = get_x_y_values_from_geographic_points(geo_grid_col_points, crs) - row_indices_for_y = [row_index[0] for row_index in row_indices] - y_values = [x_y_value[1] for x_y_value in list(x_y_values2)] + row_indices_for_y = np.transpose(row_indices)[0] + y_values = np.transpose(x_y_values2)[1] y_dim = get_1d_dim_array_data_from_dimvalues(y_values, row_indices_for_y, row_size) projected_y, projected_x = tuple(projected_dimension_names) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 37fca8a..99b8bd1 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -20,8 +20,8 @@ get_projected_dimension_names_from_coordinate_variables, get_row_col_geo_grid_points, get_row_col_sizes_from_coordinate_datasets, - get_row_col_valid_indices_in_dataset, get_valid_indices, + get_valid_row_col_pairs, get_variables_with_anonymous_dims, get_x_y_values_from_geographic_points, ) @@ -528,19 +528,24 @@ def test_get_row_col_geo_grid_points(self): """ with self.subTest('Get two sets of valid geo grid points from coordinates'): + expected_row_indices = [[0, 0], [4, 0]] + expected_col_indices = [[0, 0], [0, 9]] expected_geo_grid_points = ( [(-179.3, 89.3), (178.4, 89.3)], [(-179.3, 89.3), (-179.3, -88.1)], ) - actual_geo_grid_points = get_row_col_geo_grid_points( - self.lat_arr, - self.lon_arr, - self.varinfo.get_variable(self.latitude), - self.varinfo.get_variable(self.longitude), + row_indices, col_indices, row_points, col_points = ( + get_row_col_geo_grid_points( + self.lat_arr, + self.lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) ) - - self.assertListEqual(actual_geo_grid_points[0], expected_geo_grid_points[0]) - self.assertListEqual(actual_geo_grid_points[1], expected_geo_grid_points[1]) + self.assertListEqual(row_indices, expected_row_indices) + self.assertListEqual(col_indices, expected_col_indices) + self.assertListEqual(row_points, expected_geo_grid_points[0]) + self.assertListEqual(col_points, expected_geo_grid_points[1]) with self.subTest('Get two valid geo grid points from reversed coordinates'): lon_arr_reversed = np.array( @@ -628,24 +633,27 @@ def test_get_row_col_geo_grid_points(self): ], ] ) + expected_row_indices = [[0, 0], [4, 0]] + expected_col_indices = [[0, 0], [0, 9]] expected_geo_grid_points_reversed = ( [(-179.3, 89.3), (-179.3, -89.3)], [(-179.3, 89.3), (178.4, -89.3)], ) - actual_geo_grid_points_reversed = get_row_col_geo_grid_points( - lat_arr_reversed, - lon_arr_reversed, - self.varinfo.get_variable(self.latitude), - self.varinfo.get_variable(self.longitude), - ) - self.assertListEqual( - actual_geo_grid_points_reversed[0], expected_geo_grid_points_reversed[0] - ) - self.assertListEqual( - actual_geo_grid_points_reversed[1], expected_geo_grid_points_reversed[1] + row_indices, col_indices, row_points, col_points = ( + get_row_col_geo_grid_points( + lat_arr_reversed, + lon_arr_reversed, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) ) + self.assertListEqual(row_indices, expected_row_indices) + self.assertListEqual(col_indices, expected_col_indices) + self.assertListEqual(row_points, expected_geo_grid_points_reversed[0]) + self.assertListEqual(col_points, expected_geo_grid_points_reversed[1]) + def test_get_max_x_spread_pts(self): """Ensure that two valid sets of indices are returned by the function with a masked dataset as input @@ -669,7 +677,7 @@ def test_get_max_x_spread_pts(self): self.assertTrue(actual_indices[0] == expected_indices[0]) self.assertTrue(actual_indices[1] == expected_indices[1]) - def test_get_row_col_valid_indices_in_dataset(self): + def test_get_valid_row_col_pairs(self): """Ensure that two sets of valid indices are returned by the method with a set of lat/lon coordinates as input @@ -679,7 +687,7 @@ def test_get_row_col_valid_indices_in_dataset(self): [(0, 0), (0, 9)], [(0, 0), (4, 0)], ) - actual_grid_indices = get_row_col_valid_indices_in_dataset( + actual_grid_indices = get_valid_row_col_pairs( self.lat_arr, self.lon_arr, self.varinfo.get_variable(self.latitude), From 1c2922126c8ebe8472e8e1bcad97a058d69100e2 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Tue, 19 Nov 2024 08:26:40 -0500 Subject: [PATCH 10/16] DAS-2236 - updates based on PR feedback --- hoss/coordinate_utilities.py | 67 ++++++++++++------------- hoss/dimension_utilities.py | 29 +++++++---- hoss/exceptions.py | 18 ++++++- hoss/projection_utilities.py | 5 ++ hoss/spatial.py | 6 +-- tests/unit/test_coordinate_utilities.py | 9 ++-- tests/unit/test_projection_utilities.py | 4 +- 7 files changed, 82 insertions(+), 56 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index e496b37..ab2fb77 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -3,8 +3,6 @@ coordinate variable data to projected x/y dimension values """ -from typing import Dict - import numpy as np from netCDF4 import Dataset from pyproj import CRS, Transformer @@ -12,6 +10,7 @@ from hoss.exceptions import ( IncompatibleCoordinateVariables, + InvalidCoordinateData, InvalidCoordinateDataset, MissingCoordinateVariable, MissingVariable, @@ -97,11 +96,12 @@ def get_coordinate_variables( varinfo: VarInfoFromDmr, requested_variables: list, ) -> tuple[list, list]: - """This function returns latitude and longitude variables listed in the - CF-Convention coordinates metadata attribute. It returns them in a specific - order [latitude, longitude]" + """This function returns latitude and longitude variable names from + latitude and longitude variables listed in the CF-Convention coordinates + metadata attribute. It returns them in a specific + order [latitude_name, longitude_name]" """ - # varinfo returns a set and not a list + coordinate_variables = varinfo.get_references_for_attribute( requested_variables, 'coordinates' ) @@ -166,13 +166,16 @@ def get_coordinate_array( def get_1d_dim_array_data_from_dimvalues( - dim_values: np.ndarray, # 2 element 1D array [ , ] - dim_indices: np.ndarray, # 2 element 1D array [ , ] - dim_size: int, # all dim_indices values => 0, <= dim_size -) -> np.ndarray: # 1D of size = dim_size, with proper dimension array values + dim_values: np.ndarray, + dim_indices: np.ndarray, + dim_size: int, +) -> np.ndarray: """ return a full dimension data array based upon 2 valid projected values - within - indices given - and the full dimension size + (x and y) given in dim_values which are within the indices given in + dim_indices and the full dimension size provided in dim_size. The + dim_indices need to be between 0 and less than the dim_size. + returns a 1D array of size = dim_size with proper dimension array values """ if (dim_indices[1] != dim_indices[0]) and (dim_values[1] != dim_values[0]): @@ -180,7 +183,7 @@ def get_1d_dim_array_data_from_dimvalues( dim_indices[1] - dim_indices[0] ) else: - raise InvalidCoordinateDataset(dim_values[0], dim_indices[0]) + raise InvalidCoordinateData(dim_values[0], dim_indices[0]) dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1])) @@ -194,10 +197,11 @@ def get_valid_indices( """ Returns an array of boolean values - true, false - indicating a valid value (non-fill, within range) - for a given coordinate variable + for a given coordinate variable. A value of True means the + value is valid - latitude or longitude - or returns an empty ndarray of size (0,0) for any other variable. - Note a numpy mask is the opposite of a valids array. + """ # get_attribute_value returns a value of type `str` @@ -219,9 +223,9 @@ def get_valid_indices( np.logical_and(lat_lon_array >= -90.0, lat_lon_array <= 90.0), ) else: - valid_indices = np.empty((0, 0)) + raise InvalidCoordinateDataset(coordinate.full_name_path) - return valid_indices # throw an exception + return valid_indices def get_row_col_geo_grid_points( @@ -270,9 +274,8 @@ def get_x_y_values_from_geographic_points( points_x, points_y = ( # pylint: disable=unpacking-non-sequence from_geo_transformer.transform(point_latitudes, point_longitudes) ) - x_y_points = list(zip(points_x, points_y)) - return x_y_points + return list(zip(points_x, points_y)) def get_valid_row_col_pairs( @@ -308,23 +311,19 @@ def get_valid_row_col_pairs( def get_max_x_spread_pts( - valid_mask: np.ndarray, # Numpy Mask Array, e.g., invalid latitudes & longitudes -) -> list[list]: # 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] + valid_mask: np.ndarray, +) -> list[list]: """ - # This function returns two data points by x, y indices that are spread farthest - # from each other in the same row, i.e., have the greatest delta-x value - and - # are valid data points from the valid_mask array passed in. The input array - # must be a 2D Numpy mask array providing the valid data points, e.g., filtering - # out fill values and out-of-range values. + This function returns two data points by x, y indices that are spread farthest + from each other in the same row, i.e., have the greatest delta-x value - and + are valid data points from the valid_mask array passed in. The input array + must be a 2D Numpy mask array providing the valid data points, e.g., filtering + out fill values and out-of-range values. + - input is Numpy Mask Array, e.g., invalid latitudes & longitudes + - returns 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] """ # fill a sample array with x-index values, x_ind[i, j] = j - x_ind = np.array( - [ - # [j for j in range(valid_mask.shape[1])] - list(range(valid_mask.shape[1])) - for i in range(valid_mask.shape[0]) - ] - ) + x_ind = np.indices((valid_mask.shape[0], valid_mask.shape[1]))[1] # mask x_ind to hide the invalid data points valid_x_ind = np.ma.array(x_ind, mask=valid_mask) @@ -342,13 +341,13 @@ def get_max_x_spread_pts( return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] -def create_dimension_array_from_coordinates( +def create_dimension_arrays_from_coordinates( prefetch_dataset: Dataset, latitude_coordinate: VariableFromDmr, longitude_coordinate: VariableFromDmr, crs: CRS, projected_dimension_names: list, -) -> Dict[str, np.ndarray]: +) -> dict[str, np.ndarray]: """Generate artificial 1D dimensions scales for each 2D dimension or coordinate variable. 1) Get 2 valid geo grid points diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index 432941a..8e759bf 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -81,8 +81,9 @@ def get_prefetch_variables( """ prefetch_variables = varinfo.get_required_dimensions(required_variables) if prefetch_variables: - bounds = varinfo.get_references_for_attribute(prefetch_variables, 'bounds') - prefetch_variables.update(bounds) + prefetch_variables.update( + varinfo.get_references_for_attribute(prefetch_variables, 'bounds') + ) else: latitude_coordinates, longitude_coordinates = get_coordinate_variables( varinfo, required_variables @@ -420,20 +421,23 @@ def add_index_range( the antimeridian or Prime Meridian) will have a minimum index greater than the maximum index. In this case the full dimension range should be requested, as the related values will be masked before returning the - output to the user. When the dimensions are not available, the coordinate - variables are used to calculate the projected dimensions. + output to the user. When a variable does not have named dimensions, + the index_ranges cache is checked for dimensions derived from the + coordinates CF-Conventions metadata attribute. """ variable = varinfo.get_variable(variable_name) range_strings = [] + if variable.dimensions: - range_strings = get_range_strings(variable.dimensions, index_ranges) + variable_dimensions = variable.dimensions else: - override_dimensions = get_projected_dimension_names_from_coordinate_variables( + # Anonymous dimensions, so check for dimension derived from coordinates: + variable_dimensions = get_projected_dimension_names_from_coordinate_variables( varinfo, variable_name ) - if override_dimensions: - range_strings = get_range_strings(override_dimensions, index_ranges) + + range_strings = get_range_strings(variable_dimensions, index_ranges) if all(range_string == '[]' for range_string in range_strings): indices_string = '' @@ -446,8 +450,13 @@ def get_range_strings( variable_dimensions: list, index_ranges: IndexRanges, ) -> list: - """Calculates the index ranges for each dimension of the variable - and returns the list of index ranges + """Retrieves index ranges which is a list of string elements + [min:max] from cache. If there is not an index range in the + cache for a dimension, the returned string is []. A bounding box + can cross the longitudinal edge of the grid. In those cases the + minimum dimension index is greater than the maximum dimension + index and this function will return []. HOSS will request the + full dimension range from OPeNDAP when the index range is []. """ range_strings = [] for dimension in variable_dimensions: diff --git a/hoss/exceptions.py b/hoss/exceptions.py index 26ba3f8..6cd5002 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -165,7 +165,7 @@ def __init__(self, longitude_shape, latitude_shape): ) -class InvalidCoordinateDataset(CustomError): +class InvalidCoordinateData(CustomError): """This exception is raised when the two values passed to the function computing the resolution are equal. This could occur when there are too many fill values and distinct valid @@ -175,12 +175,26 @@ class InvalidCoordinateDataset(CustomError): def __init__(self, dim_value, dim_index): super().__init__( - 'InvalidCoordinateDataset', + 'InvalidCoordinateData', 'Cannot compute the dimension resolution for ' f'dim_value: "{dim_value}" dim_index: "{dim_index}"', ) +class InvalidCoordinateDataset(CustomError): + """This exception is raised when there are too + many fill values and two distinct valid indices + could not be obtained + + """ + + def __init__(self, coordinate_name): + super().__init__( + 'InvalidCoordinateDataset', + f'Cannot get valid indices for {coordinate_name}', + ) + + class UnsupportedShapeFileFormat(CustomError): """This exception is raised when the shape file included in the input Harmony message is not GeoJSON. diff --git a/hoss/projection_utilities.py b/hoss/projection_utilities.py index d84c64d..cc21f9a 100644 --- a/hoss/projection_utilities.py +++ b/hoss/projection_utilities.py @@ -49,6 +49,11 @@ def get_variable_crs(variable: str, varinfo: VarInfoFromDmr) -> CRS: another are stored in the `Variable.references` dictionary attribute as sets. There should only be one reference in the `grid_mapping` attribute value, so the first element of the set is retrieved. + If the grid mapping variable, as referred to in the grid_mapping + CF-Convention metadata attribute, does not exist in the file then + the earthdata-varinfo configuration file is checked, as it may + contain metadata overrides specified for that non-existent variable + name. """ grid_mapping = next( diff --git a/hoss/spatial.py b/hoss/spatial.py index ad72f42..917c526 100644 --- a/hoss/spatial.py +++ b/hoss/spatial.py @@ -36,7 +36,7 @@ get_shape_file_geojson, ) from hoss.coordinate_utilities import ( - create_dimension_array_from_coordinates, + create_dimension_arrays_from_coordinates, get_coordinate_variables, get_projected_dimension_names_from_coordinate_variables, get_variables_with_anonymous_dims, @@ -84,7 +84,7 @@ def get_spatial_index_ranges( around the exterior of the user-defined GeoJSON shape, to ensure the correct extents are derived. - if geographic and projected dimensions are not specified in the granule, + If geographic and projected dimensions are not specified in the granule, the coordinate datasets are used to calculate the x-y dimensions and the index ranges are calculated similar to a projected grid. @@ -251,7 +251,7 @@ def get_x_y_index_ranges_from_coordinates( varinfo, non_spatial_variable ) - dimension_arrays = create_dimension_array_from_coordinates( + dimension_arrays = create_dimension_arrays_from_coordinates( prefetch_coordinate_datasets, latitude_coordinate, longitude_coordinate, diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 99b8bd1..e3a574f 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -27,7 +27,7 @@ ) from hoss.exceptions import ( IncompatibleCoordinateVariables, - InvalidCoordinateDataset, + InvalidCoordinateData, InvalidCoordinateVariable, MissingCoordinateVariable, MissingVariable, @@ -156,7 +156,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): dim_values_invalid = np.array([2, 2]) dim_indices_asc = np.array([0, 1]) dim_size_asc = 12 - with self.assertRaises(InvalidCoordinateDataset) as context: + with self.assertRaises(InvalidCoordinateData) as context: get_1d_dim_array_data_from_dimvalues( dim_values_invalid, dim_indices_asc, dim_size_asc ) @@ -170,7 +170,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): dim_values_desc = np.array([100, 70]) dim_indices_invalid = np.array([5, 5]) dim_size_desc = 10 - with self.assertRaises(InvalidCoordinateDataset) as context: + with self.assertRaises(InvalidCoordinateData) as context: get_1d_dim_array_data_from_dimvalues( dim_values_desc, dim_indices_invalid, dim_size_desc ) @@ -396,9 +396,6 @@ def test_get_valid_indices(self): ascending or descending. """ - # expected_valid_indices_lat_arr_with_fill = np.array([1, 2, 3, 4]) - # expected_valid_indices_lon_arr_with_fill = np.array([0, 1, 2, 6, 7, 8, 9]) - # expected_valid_indices_lat_arr_over_range = np.array([0, 1, 2]) expected_valid_indices_lon_arr_over_range = np.array([0, 1, 2, 6, 7, 8, 9]) fill_array = np.array([-9999.0, -9999.0, -9999.0, -9999.0]) diff --git a/tests/unit/test_projection_utilities.py b/tests/unit/test_projection_utilities.py index 7a08135..164eb9e 100644 --- a/tests/unit/test_projection_utilities.py +++ b/tests/unit/test_projection_utilities.py @@ -194,7 +194,9 @@ def test_get_variable_crs(self): 'present in granule .dmr file.', ) - with self.subTest('grid_mapping override with json configuration'): + with self.subTest( + 'attributes for missing grid_mapping retrieved from earthdata-varinfo configuration file' + ): smap_varinfo = VarInfoFromDmr( 'tests/data/SC_SPL3SMP_008.dmr', 'SPL3SMP', From 2f07d1a475b2033b4ee78de17d353d22729addc1 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Tue, 19 Nov 2024 18:11:08 -0500 Subject: [PATCH 11/16] DAS-2236 - updated functions and tests based on PR feedback --- hoss/coordinate_utilities.py | 113 +++++++-------- hoss/exceptions.py | 7 +- tests/unit/test_coordinate_utilities.py | 176 ++++++------------------ 3 files changed, 100 insertions(+), 196 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index ab2fb77..35b2ce6 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -228,44 +228,13 @@ def get_valid_indices( return valid_indices -def get_row_col_geo_grid_points( - lat_arr: np.ndarray, - lon_arr: np.ndarray, - latitude_coordinate: VariableFromDmr, - longitude_coordinate: VariableFromDmr, -) -> tuple[list, list, list, list]: - """ - This method is used to return two sets of valid indices and the - corresponding two sets of row and col lat lon points from 2D - coordinate datasets. - """ - - row_indices, col_indices = get_valid_row_col_pairs( - lat_arr, lon_arr, latitude_coordinate, longitude_coordinate - ) - - geo_grid_col_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in row_indices - ] - - geo_grid_row_points = [ - (lon_arr[row, col], lat_arr[row, col]) for row, col in col_indices - ] - - return ( - row_indices, - col_indices, - geo_grid_row_points, - geo_grid_col_points, - ) - - def get_x_y_values_from_geographic_points( - points: list[tuple], # list of data points as tuple in (lon, lat) order - crs: CRS, # CRS object from PyProj -) -> list[tuple]: # list of X-Y points as tuple in (X, Y) order - """Take an input list of (longitude, latitude) coordinates and project - those points to the target grid. Then return the x-y dimscales + points: list[tuple], + crs: CRS, +) -> list[tuple]: + """Takes an input list of (longitude, latitude) coordinates and CRS object + from PyProj and projects those points to the target grid. Then returns + the list of x-y points as tuple in (x,y) order """ point_longitudes, point_latitudes = zip(*list(points)) @@ -311,21 +280,23 @@ def get_valid_row_col_pairs( def get_max_x_spread_pts( - valid_mask: np.ndarray, + valid_geospatial_mask: np.ndarray, ) -> list[list]: """ This function returns two data points by x, y indices that are spread farthest from each other in the same row, i.e., have the greatest delta-x value - and - are valid data points from the valid_mask array passed in. The input array + are valid data points from the valid_geospatial_mask array passed in. The input array must be a 2D Numpy mask array providing the valid data points, e.g., filtering out fill values and out-of-range values. - input is Numpy Mask Array, e.g., invalid latitudes & longitudes - returns 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] """ # fill a sample array with x-index values, x_ind[i, j] = j - x_ind = np.indices((valid_mask.shape[0], valid_mask.shape[1]))[1] + x_ind = np.indices( + (valid_geospatial_mask.shape[0], valid_geospatial_mask.shape[1]) + )[1] # mask x_ind to hide the invalid data points - valid_x_ind = np.ma.array(x_ind, mask=valid_mask) + valid_x_ind = np.ma.array(x_ind, mask=valid_geospatial_mask) # ptp (peak-to-peak) finds the greatest delta-x value amongst valid points # for each row. Result is 1D @@ -338,6 +309,10 @@ def get_max_x_spread_pts( min_x_ind = np.min(valid_x_ind[max_x_spread_row]) max_x_ind = np.max(valid_x_ind[max_x_spread_row]) + # There is just one valid point + if min_x_ind == max_x_ind: + raise InvalidCoordinateData(x_ind_spread, min_x_ind) + return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] @@ -368,23 +343,49 @@ def create_dimension_arrays_from_coordinates( lon_arr, ) - row_indices, col_indices, geo_grid_row_points, geo_grid_col_points = ( - get_row_col_geo_grid_points( - lat_arr, - lon_arr, - latitude_coordinate, - longitude_coordinate, - ) + row_indices, col_indices = get_valid_row_col_pairs( + lat_arr, lon_arr, latitude_coordinate, longitude_coordinate + ) + + y_dim = get_dimension_array_from_geo_points( + lat_arr, lon_arr, crs, row_indices, row_size, True ) - x_y_values1 = get_x_y_values_from_geographic_points(geo_grid_row_points, crs) - col_indices_for_x = np.transpose(col_indices)[1] - x_values = np.transpose(x_y_values1)[0] - x_dim = get_1d_dim_array_data_from_dimvalues(x_values, col_indices_for_x, col_size) - x_y_values2 = get_x_y_values_from_geographic_points(geo_grid_col_points, crs) - row_indices_for_y = np.transpose(row_indices)[0] - y_values = np.transpose(x_y_values2)[1] - y_dim = get_1d_dim_array_data_from_dimvalues(y_values, row_indices_for_y, row_size) + x_dim = get_dimension_array_from_geo_points( + lat_arr, lon_arr, crs, col_indices, col_size, False + ) projected_y, projected_x = tuple(projected_dimension_names) return {projected_y: y_dim, projected_x: x_dim} + + +def get_dimension_array_from_geo_points( + lat_arr: np.ndarray, + lon_arr: np.ndarray, + crs: CRS, + dimension_indices: list, + dimension_size: int, + is_row=True, +) -> np.ndarray: + """This function uses the list of lat/lon points corresponding + to a list of array indices and reprojects it with the CRS + provided and scales the x/y values to a dimension array with the dimension + size provided + + """ + if is_row: + index_for_dimension = 0 + x_or_y_index = 1 + else: + index_for_dimension = 1 + x_or_y_index = 0 + + geo_grid_points = [ + (lon_arr[row, col], lat_arr[row, col]) for row, col in dimension_indices + ] + x_y_values = get_x_y_values_from_geographic_points(geo_grid_points, crs) + indices_for_dimension = np.transpose(dimension_indices)[index_for_dimension] + dimension_values = np.transpose(x_y_values)[x_or_y_index] + return get_1d_dim_array_data_from_dimvalues( + dimension_values, indices_for_dimension, dimension_size + ) diff --git a/hoss/exceptions.py b/hoss/exceptions.py index 6cd5002..a7b3fab 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -166,9 +166,8 @@ def __init__(self, longitude_shape, latitude_shape): class InvalidCoordinateData(CustomError): - """This exception is raised when the two values passed to - the function computing the resolution are equal. This could - occur when there are too many fill values and distinct valid + """This exception is raised when the data does not contain at least. + two valid points. This could occur when there are too many fill values and distinct valid indices could not be obtained """ @@ -176,7 +175,7 @@ class InvalidCoordinateData(CustomError): def __init__(self, dim_value, dim_index): super().__init__( 'InvalidCoordinateData', - 'Cannot compute the dimension resolution for ' + 'The data does not have at least two valid values ' f'dim_value: "{dim_value}" dim_index: "{dim_index}"', ) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index e3a574f..c739b17 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -15,10 +15,10 @@ get_1d_dim_array_data_from_dimvalues, get_coordinate_array, get_coordinate_variables, + get_dimension_array_from_geo_points, get_max_x_spread_pts, get_projected_dimension_names, get_projected_dimension_names_from_coordinate_variables, - get_row_col_geo_grid_points, get_row_col_sizes_from_coordinate_datasets, get_valid_indices, get_valid_row_col_pairs, @@ -162,7 +162,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): ) self.assertEqual( context.exception.message, - 'Cannot compute the dimension resolution for ' + 'The data does not have at least two valid values ' 'dim_value: "2" dim_index: "0"', ) @@ -176,7 +176,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): ) self.assertEqual( context.exception.message, - 'Cannot compute the dimension resolution for ' + 'The data does not have at least two valid values ' 'dim_value: "100" dim_index: "5"', ) @@ -518,139 +518,6 @@ def test_any_absent_dimension_variables(self): ) self.assertFalse(variable_has_fake_dims) - def test_get_row_col_geo_grid_points(self): - """Ensure that two valid lat/lon points returned by the method - with a set of lat/lon coordinates as input - - """ - - with self.subTest('Get two sets of valid geo grid points from coordinates'): - expected_row_indices = [[0, 0], [4, 0]] - expected_col_indices = [[0, 0], [0, 9]] - expected_geo_grid_points = ( - [(-179.3, 89.3), (178.4, 89.3)], - [(-179.3, 89.3), (-179.3, -88.1)], - ) - row_indices, col_indices, row_points, col_points = ( - get_row_col_geo_grid_points( - self.lat_arr, - self.lon_arr, - self.varinfo.get_variable(self.latitude), - self.varinfo.get_variable(self.longitude), - ) - ) - self.assertListEqual(row_indices, expected_row_indices) - self.assertListEqual(col_indices, expected_col_indices) - self.assertListEqual(row_points, expected_geo_grid_points[0]) - self.assertListEqual(col_points, expected_geo_grid_points[1]) - - with self.subTest('Get two valid geo grid points from reversed coordinates'): - lon_arr_reversed = np.array( - [ - [ - -179.3, - -179.3, - -179.3, - -179.3, - -9999, - -9999, - -179.3, - -179.3, - -179.3, - -179.3, - ], - [ - -120.2, - -120.2, - -120.2, - -9999, - -9999, - -120.2, - -120.2, - -120.2, - -120.2, - -120.2, - ], - [20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, 20.6, -9999, -9999], - [ - 150.5, - 150.5, - 150.5, - 150.5, - 150.5, - 150.5, - -9999, - -9999, - 150.5, - 150.5, - ], - [ - 178.4, - 178.4, - 178.4, - 178.4, - 178.4, - 178.4, - 178.4, - -9999, - 178.4, - 178.4, - ], - ] - ) - - lat_arr_reversed = np.array( - [ - [89.3, 79.3, -9999, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, 79.3, 60.3, 59.3, 29.3, 2.1, -9999, -59.3, -79.3, -89.3], - [89.3, -9999, 60.3, 59.3, 29.3, 2.1, -9999, -9999, -9999, -89.3], - [ - -9999, - 79.3, - -60.3, - -9999, - -9999, - -9999, - -60.2, - -59.3, - -79.3, - -89.3, - ], - [ - -89.3, - 79.3, - -60.3, - -9999, - -9999, - -9999, - -60.2, - -59.3, - -79.3, - -9999, - ], - ] - ) - expected_row_indices = [[0, 0], [4, 0]] - expected_col_indices = [[0, 0], [0, 9]] - expected_geo_grid_points_reversed = ( - [(-179.3, 89.3), (-179.3, -89.3)], - [(-179.3, 89.3), (178.4, -89.3)], - ) - - row_indices, col_indices, row_points, col_points = ( - get_row_col_geo_grid_points( - lat_arr_reversed, - lon_arr_reversed, - self.varinfo.get_variable(self.latitude), - self.varinfo.get_variable(self.longitude), - ) - ) - - self.assertListEqual(row_indices, expected_row_indices) - self.assertListEqual(col_indices, expected_col_indices) - self.assertListEqual(row_points, expected_geo_grid_points_reversed[0]) - self.assertListEqual(col_points, expected_geo_grid_points_reversed[1]) - def test_get_max_x_spread_pts(self): """Ensure that two valid sets of indices are returned by the function with a masked dataset as input @@ -731,3 +598,40 @@ def test_get_x_y_values_from_geographic_points(self): two_row_points_in_a_col, crs ) self.assertListEqual(actual_x_y_points, expected_x_y_points) + + def test_get_dimension_array_from_geo_points(self): + """Ensure that a valid x/y dimension array is returned + with a geo array of lat/lon values + + """ + crs = CRS.from_cf( + { + 'false_easting': 0.0, + 'false_northing': 0.0, + 'longitude_of_central_meridian': 0.0, + 'standard_parallel': 30.0, + 'grid_mapping_name': 'lambert_cylindrical_equal_area', + } + ) + with self.subTest('Get y dimension array from geo coordinates'): + row_indices = [[0, 0], [4, 0]] + ymax = 7341677.255608977 + ymin = -25687950.314159617 + + dim_array = get_dimension_array_from_geo_points( + self.lat_arr, self.lon_arr, crs, row_indices, 10, True + ) + self.assertEqual(dim_array.size, 10) + self.assertEqual(dim_array[0], ymax) + self.assertEqual(dim_array[-1], ymin) + + with self.subTest('Get x dimension array from geo coordinates'): + col_indices = [[0, 0], [0, 9]] + xmin = -17299990.048985746 + xmax = -1960815.628654331 + dim_array = get_dimension_array_from_geo_points( + self.lat_arr, self.lon_arr, crs, col_indices, 5, False + ) + self.assertEqual(dim_array.size, 5) + self.assertEqual(dim_array[0], xmin) + self.assertEqual(dim_array[-1], xmax) From afe85d8aa170f9cb932f93c4d33fd1d20823210d Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Tue, 19 Nov 2024 18:44:12 -0500 Subject: [PATCH 12/16] DAS-2236 - renamed add_bounds_variables --- hoss/dimension_utilities.py | 6 ++++-- tests/unit/test_dimension_utilities.py | 14 +++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/hoss/dimension_utilities.py b/hoss/dimension_utilities.py index 8e759bf..61c634f 100644 --- a/hoss/dimension_utilities.py +++ b/hoss/dimension_utilities.py @@ -102,11 +102,13 @@ def get_prefetch_variables( ) # Create bounds variables if necessary. - add_bounds_variables(prefetch_variables_nc4, prefetch_variables, varinfo, logger) + check_add_artificial_bounds( + prefetch_variables_nc4, prefetch_variables, varinfo, logger + ) return prefetch_variables_nc4 -def add_bounds_variables( +def check_add_artificial_bounds( dimensions_nc4: str, required_dimensions: Set[str], varinfo: VarInfoFromDmr, diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 9be9f07..80f89d4 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -14,8 +14,8 @@ from varinfo import VarInfoFromDmr from hoss.dimension_utilities import ( - add_bounds_variables, add_index_range, + check_add_artificial_bounds, get_bounds_array, get_dimension_bounds, get_dimension_extents, @@ -326,10 +326,10 @@ def test_get_fill_slice(self): with self.subTest('A filled dimension returns slice(start, stop).'): self.assertEqual(get_fill_slice('/longitude', fill_ranges), slice(16, 200)) - @patch('hoss.dimension_utilities.add_bounds_variables') + @patch('hoss.dimension_utilities.check_add_artificial_bounds') @patch('hoss.dimension_utilities.get_opendap_nc4') def test_get_prefetch_variables( - self, mock_get_opendap_nc4, mock_add_bounds_variables + self, mock_get_opendap_nc4, mock_check_add_artificial_bounds ): """Ensure that when a list of required variables is specified, a request to OPeNDAP will be sent requesting only those that are @@ -365,13 +365,13 @@ def test_get_prefetch_variables( url, required_dimensions, output_dir, self.logger, access_token, self.config ) - mock_add_bounds_variables.assert_called_once_with( + mock_check_add_artificial_bounds.assert_called_once_with( prefetch_path, required_dimensions, self.varinfo, self.logger ) @patch('hoss.dimension_utilities.needs_bounds') @patch('hoss.dimension_utilities.write_bounds') - def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): + def test_check_add_artificial_bounds(self, mock_write_bounds, mock_needs_bounds): """Ensure that `write_bounds` is called when it's needed, and that it's not called when it's not needed. @@ -389,7 +389,7 @@ def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): with self.subTest('Bounds need to be written'): mock_needs_bounds.return_value = True - add_bounds_variables( + check_add_artificial_bounds( prefetch_dataset_name, required_dimensions, varinfo_prefetch, @@ -402,7 +402,7 @@ def test_add_bounds_variables(self, mock_write_bounds, mock_needs_bounds): with self.subTest('Bounds should not be written'): mock_needs_bounds.return_value = False - add_bounds_variables( + check_add_artificial_bounds( prefetch_dataset_name, required_dimensions, varinfo_prefetch, From a3680c5f5a89f3443016c9bd0203b1e07d88e11b Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Wed, 20 Nov 2024 01:04:39 -0500 Subject: [PATCH 13/16] DAS-2236 - updated unit tests --- hoss/coordinate_utilities.py | 10 +++++-- hoss/exceptions.py | 5 ++-- tests/unit/test_coordinate_utilities.py | 38 +++++++++++++++++++++---- 3 files changed, 42 insertions(+), 11 deletions(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index 35b2ce6..d88c73b 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -183,7 +183,9 @@ def get_1d_dim_array_data_from_dimvalues( dim_indices[1] - dim_indices[0] ) else: - raise InvalidCoordinateData(dim_values[0], dim_indices[0]) + raise InvalidCoordinateData( + f'No distinct valid coordinate points - dim_index={dim_indices[0]}, dim_value={dim_values[0]}' + ) dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1])) @@ -295,9 +297,13 @@ def get_max_x_spread_pts( x_ind = np.indices( (valid_geospatial_mask.shape[0], valid_geospatial_mask.shape[1]) )[1] + # mask x_ind to hide the invalid data points valid_x_ind = np.ma.array(x_ind, mask=valid_geospatial_mask) + if valid_x_ind.count() == 0: + raise InvalidCoordinateData("No valid coordinate data") + # ptp (peak-to-peak) finds the greatest delta-x value amongst valid points # for each row. Result is 1D x_ind_spread = valid_x_ind.ptp(axis=1) @@ -311,7 +317,7 @@ def get_max_x_spread_pts( # There is just one valid point if min_x_ind == max_x_ind: - raise InvalidCoordinateData(x_ind_spread, min_x_ind) + raise InvalidCoordinateData("Only one valid point in coordinate data") return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] diff --git a/hoss/exceptions.py b/hoss/exceptions.py index a7b3fab..18ab700 100644 --- a/hoss/exceptions.py +++ b/hoss/exceptions.py @@ -172,11 +172,10 @@ class InvalidCoordinateData(CustomError): """ - def __init__(self, dim_value, dim_index): + def __init__(self, custom_msg): super().__init__( 'InvalidCoordinateData', - 'The data does not have at least two valid values ' - f'dim_value: "{dim_value}" dim_index: "{dim_index}"', + f'{custom_msg}', ) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index c739b17..1a837e7 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -162,8 +162,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): ) self.assertEqual( context.exception.message, - 'The data does not have at least two valid values ' - 'dim_value: "2" dim_index: "0"', + 'No distinct valid coordinate points - ' 'dim_index=0, dim_value=2', ) with self.subTest('invalid dimension indices'): @@ -176,8 +175,7 @@ def test_get_1d_dim_array_data_from_dimvalues(self): ) self.assertEqual( context.exception.message, - 'The data does not have at least two valid values ' - 'dim_value: "100" dim_index: "5"', + 'No distinct valid coordinate points - ' 'dim_index=5, dim_value=100', ) def test_get_coordinate_array(self): @@ -535,12 +533,40 @@ def test_get_max_x_spread_pts(self): ] ) expected_indices = [[0, 0], [0, 9]] - actual_indices = get_max_x_spread_pts(~valid_values) - self.assertTrue(actual_indices[0] == expected_indices[0]) self.assertTrue(actual_indices[1] == expected_indices[1]) + with self.subTest('With just one valid index in the coordinates'): + valid_values = np.array( + [ + [False, False, False], + [False, True, False], + [False, False, False], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_max_x_spread_pts(~valid_values) + self.assertEqual( + context.exception.message, + 'Only one valid point in coordinate data', + ) + + with self.subTest('No valid points from coordinates'): + valid_values = np.array( + [ + [False, False, False], + [False, False, False], + [False, False, False], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_max_x_spread_pts(~valid_values) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + def test_get_valid_row_col_pairs(self): """Ensure that two sets of valid indices are returned by the method with a set of lat/lon coordinates as input From 57744a6298adf081d0bf06c9a6acd0b98cccf99c Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Wed, 20 Nov 2024 01:42:35 -0500 Subject: [PATCH 14/16] DAS-2236 - updates to unit tests --- tests/unit/test_coordinate_utilities.py | 97 +++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/tests/unit/test_coordinate_utilities.py b/tests/unit/test_coordinate_utilities.py index 1a837e7..dc0b2a1 100644 --- a/tests/unit/test_coordinate_utilities.py +++ b/tests/unit/test_coordinate_utilities.py @@ -587,6 +587,103 @@ def test_get_valid_row_col_pairs(self): self.assertTrue(actual_grid_indices[0], expected_grid_indices[0]) self.assertTrue(actual_grid_indices[1], expected_grid_indices[1]) + with self.subTest('Only a single valid point in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, 40.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('valid points in one row in coordinates dataset'): + lat_arr = np.array( + [ + [40.1, 40.1, 40.1, 40.1, 40.1], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-179.0, -10.0, 100.1, 130.0, 179.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('valid points in one column in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, 40.1, -9999.0, -9999.0], + [-9999.0, -9999.0, -50.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + [-9999.0, -9999.0, 100.1, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + with self.subTest('no valid points in coordinates dataset'): + lat_arr = np.array( + [ + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + lon_arr = np.array( + [ + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + [-9999.0, -9999.0, -9999.0, -9999.0, -9999.0], + ] + ) + with self.assertRaises(InvalidCoordinateData) as context: + get_valid_row_col_pairs( + lat_arr, + lon_arr, + self.varinfo.get_variable(self.latitude), + self.varinfo.get_variable(self.longitude), + ) + self.assertEqual( + context.exception.message, + 'No valid coordinate data', + ) + def test_get_x_y_values_from_geographic_points(self): """Ensure that the correct x-values are returned by the function with a set of geographic coordinates as input. From 8590760d7571b770ca2f2505b636330d49801b0d Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Wed, 20 Nov 2024 10:33:11 -0500 Subject: [PATCH 15/16] DAS-2236 - added unit tests for get_range_strings --- hoss/coordinate_utilities.py | 3 +- tests/unit/test_dimension_utilities.py | 40 ++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/hoss/coordinate_utilities.py b/hoss/coordinate_utilities.py index d88c73b..525b038 100644 --- a/hoss/coordinate_utilities.py +++ b/hoss/coordinate_utilities.py @@ -184,7 +184,8 @@ def get_1d_dim_array_data_from_dimvalues( ) else: raise InvalidCoordinateData( - f'No distinct valid coordinate points - dim_index={dim_indices[0]}, dim_value={dim_values[0]}' + 'No distinct valid coordinate points - ' + f'dim_index={dim_indices[0]}, dim_value={dim_values[0]}' ) dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 80f89d4..9952665 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -24,6 +24,7 @@ get_dimension_indices_from_values, get_fill_slice, get_prefetch_variables, + get_range_strings, get_requested_index_ranges, is_almost_in, is_dimension_ascending, @@ -313,6 +314,45 @@ def test_add_index_range(self): '/sst_dtime[][12:34][]', ) + def test_get_range_strings(self): + """Ensure the correct combinations of range_strings are added as + suffixes to the input variable based upon that variable's dimensions. + If a dimension range has the lower index > upper index, that + indicates the bounding box crosses the edge of the grid. In this + instance, the full range of the variable should be retrieved. + + """ + with self.subTest('all dimensions found in index_ranges'): + variable_list = ['/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (600, 699), '/Grid/lon': (2200, 2299)} + expected_range_strings = range_strings = ['[2200:2299]', '[600:699]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), expected_range_strings + ) + + with self.subTest('only some dimensions found in index range'): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (600, 699), '/Grid/lon': (2200, 2299)} + expected_range_strings = ['[]', '[2200:2299]', '[600:699]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), + expected_range_strings, + ) + + with self.subTest('No variables found in index ranges'): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + self.assertEqual(get_range_strings(variable_list, {}), ['[]', '[]', '[]']) + with self.subTest( + 'when dimension range lower index is greater than upper index' + ): + variable_list = ['/Grid/time', '/Grid/lon', '/Grid/lat'] + index_ranges = {'/Grid/lat': (699, 600), '/Grid/lon': (2200, 2299)} + expected_range_strings = ['[]', '[2200:2299]', '[]'] + self.assertEqual( + get_range_strings(variable_list, index_ranges), + expected_range_strings, + ) + def test_get_fill_slice(self): """Ensure that a slice object is correctly formed for a requested dimension. From 3b0a149e16dedd289e89d981e5dcf9a4064a1903 Mon Sep 17 00:00:00 2001 From: sudhamurthy Date: Wed, 20 Nov 2024 17:17:02 -0500 Subject: [PATCH 16/16] DAS-2236 - added unit tests for get_prefetch_variables --- tests/unit/test_dimension_utilities.py | 136 ++++++++++++++++++++++++- 1 file changed, 135 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_dimension_utilities.py b/tests/unit/test_dimension_utilities.py index 9952665..e280914 100644 --- a/tests/unit/test_dimension_utilities.py +++ b/tests/unit/test_dimension_utilities.py @@ -404,11 +404,145 @@ def test_get_prefetch_variables( mock_get_opendap_nc4.assert_called_once_with( url, required_dimensions, output_dir, self.logger, access_token, self.config ) - mock_check_add_artificial_bounds.assert_called_once_with( prefetch_path, required_dimensions, self.varinfo, self.logger ) + @patch('hoss.dimension_utilities.check_add_artificial_bounds') + @patch('hoss.dimension_utilities.get_opendap_nc4') + def test_get_prefetch_variables_with_anonymous_dimensions( + self, + mock_get_opendap_nc4, + mock_check_add_artificial_bounds, + ): + """Ensure that when a list of required variables is specified, + and the required dimension variables are not present, + checks and retrieves coordinate variables which is used + in the opendap prefetch request. + + """ + prefetch_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + mock_get_opendap_nc4.return_value = prefetch_path + access_token = 'access' + output_dir = 'tests/output' + url = 'https://url_to_opendap_granule' + prefetch_variables = { + '/Soil_Moisture_Retrieval_Data_AM/latitude', + '/Soil_Moisture_Retrieval_Data_AM/longitude', + } + requested_variables = { + '/Soil_Moisture_Retrieval_Data_AM/albedo', + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + } + varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + config_file='hoss/hoss_config.json', + ) + + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, prefetch_variables, output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, prefetch_variables, varinfo, self.logger + ) + + @patch('hoss.dimension_utilities.get_coordinate_variables') + @patch('hoss.dimension_utilities.check_add_artificial_bounds') + @patch('hoss.dimension_utilities.get_opendap_nc4') + def test_get_prefetch_variables_with_no_anonymous_dimensions( + self, + mock_get_opendap_nc4, + mock_check_add_artificial_bounds, + mock_get_coordinate_variables, + ): + """Ensure that when a list of required variables is specified, a + If two coordinate variables are also not present, the opendap prefetch request will not include any + dimension variables. + """ + prefetch_path = 'tests/data/SC_SPL3SMP_008_prefetch.nc4' + mock_get_opendap_nc4.return_value = prefetch_path + access_token = 'access' + output_dir = 'tests/output' + url = 'https://url_to_opendap_granule' + + requested_variables = { + '/Soil_Moisture_Retrieval_Data_AM/albedo', + '/Soil_Moisture_Retrieval_Data_AM/surface_flag', + } + varinfo = VarInfoFromDmr( + 'tests/data/SC_SPL3SMP_008.dmr', + 'SPL3SMP', + config_file='hoss/hoss_config.json', + ) + with self.subTest('No coordinate variables'): + mock_get_coordinate_variables.return_value = ([], []) + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_coordinate_variables.assert_called_once_with( + varinfo, + requested_variables, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, set(), output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, set(), varinfo, self.logger + ) + mock_get_coordinate_variables.reset_mock() + mock_get_opendap_nc4.reset_mock() + mock_check_add_artificial_bounds.reset_mock() + + with self.subTest('Only one coordinate variable'): + mock_get_coordinate_variables.return_value = ( + ['/Soil_Moisture_Retrieval_Data_AM/latitude'], + [], + ) + self.assertEqual( + get_prefetch_variables( + url, + varinfo, + requested_variables, + output_dir, + self.logger, + access_token, + self.config, + ), + prefetch_path, + ) + mock_get_coordinate_variables.assert_called_once_with( + varinfo, + requested_variables, + ) + mock_get_opendap_nc4.assert_called_once_with( + url, set(), output_dir, self.logger, access_token, self.config + ) + mock_check_add_artificial_bounds.assert_called_once_with( + prefetch_path, set(), varinfo, self.logger + ) + @patch('hoss.dimension_utilities.needs_bounds') @patch('hoss.dimension_utilities.write_bounds') def test_check_add_artificial_bounds(self, mock_write_bounds, mock_needs_bounds):