-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAS-2236 - Updates to methods that calculate the points used to create the dimensions #18
base: DAS-2208-SMAP-L3
Are you sure you want to change the base?
Changes from all commits
1ad0173
2fb332d
51c3928
089f6a5
2a7e61e
5b9105c
f0e283a
0d6c01b
1c8751f
1c29221
2f07d1a
afe85d8
a3680c5
57744a6
8590760
3b0a149
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,14 +5,13 @@ | |
|
||
import numpy as np | ||
from netCDF4 import Dataset | ||
|
||
# from numpy import ndarray | ||
from pyproj import CRS, Transformer | ||
from varinfo import VariableFromDmr, VarInfoFromDmr | ||
|
||
from hoss.exceptions import ( | ||
IncompatibleCoordinateVariables, | ||
InvalidCoordinateData, | ||
InvalidCoordinateDataset, | ||
InvalidCoordinateVariable, | ||
MissingCoordinateVariable, | ||
MissingVariable, | ||
) | ||
|
@@ -97,24 +96,28 @@ def get_coordinate_variables( | |
varinfo: VarInfoFromDmr, | ||
requested_variables: list, | ||
) -> tuple[list, list]: | ||
"""This function returns latitude and longitude variables listed in the | ||
CF-Convention coordinates metadata attribute. It returns them in a specific | ||
order [latitude, longitude]" | ||
"""This function returns latitude and longitude variable names from | ||
latitude and longitude variables listed in the CF-Convention coordinates | ||
metadata attribute. It returns them in a specific | ||
order [latitude_name, longitude_name]" | ||
""" | ||
|
||
coordinate_variables_list = varinfo.get_references_for_attribute( | ||
coordinate_variables = varinfo.get_references_for_attribute( | ||
requested_variables, 'coordinates' | ||
) | ||
|
||
latitude_coordinate_variables = [ | ||
coordinate | ||
for coordinate in coordinate_variables_list | ||
if varinfo.get_variable(coordinate).is_latitude() | ||
for coordinate in coordinate_variables | ||
if varinfo.get_variable(coordinate) is not None | ||
and varinfo.get_variable(coordinate).is_latitude() | ||
] | ||
|
||
longitude_coordinate_variables = [ | ||
coordinate | ||
for coordinate in coordinate_variables_list | ||
if varinfo.get_variable(coordinate).is_longitude() | ||
for coordinate in coordinate_variables | ||
if varinfo.get_variable(coordinate) is not None | ||
and varinfo.get_variable(coordinate).is_longitude() | ||
] | ||
|
||
return latitude_coordinate_variables, longitude_coordinate_variables | ||
|
@@ -162,59 +165,234 @@ def get_coordinate_array( | |
return coordinate_array | ||
|
||
|
||
def get_1D_dim_array_data_from_dimvalues( | ||
D-Auty marked this conversation as resolved.
Show resolved
Hide resolved
|
||
dim_values: np.ndarray, dim_indices: np.ndarray, dim_size: int | ||
def get_1d_dim_array_data_from_dimvalues( | ||
dim_values: np.ndarray, | ||
dim_indices: np.ndarray, | ||
dim_size: int, | ||
) -> np.ndarray: | ||
D-Auty marked this conversation as resolved.
Show resolved
Hide resolved
|
||
""" | ||
D-Auty marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return a full dimension data array based on the 2 projected points and | ||
grid size | ||
return a full dimension data array based upon 2 valid projected values | ||
(x and y) given in dim_values which are within the indices given in | ||
dim_indices and the full dimension size provided in dim_size. The | ||
dim_indices need to be between 0 and less than the dim_size. | ||
returns a 1D array of size = dim_size with proper dimension array values | ||
""" | ||
|
||
if (dim_indices[1] != dim_indices[0]) and (dim_values[1] != dim_values[0]): | ||
dim_resolution = (dim_values[1] - dim_values[0]) / ( | ||
dim_indices[1] - dim_indices[0] | ||
) | ||
else: | ||
raise InvalidCoordinateDataset(dim_values[0], dim_indices[0]) | ||
raise InvalidCoordinateData( | ||
'No distinct valid coordinate points - ' | ||
f'dim_index={dim_indices[0]}, dim_value={dim_values[0]}' | ||
) | ||
|
||
dim_min = dim_values[0] - (dim_resolution * dim_indices[0]) | ||
dim_max = dim_values[1] + (dim_resolution * (dim_size - 1 - dim_indices[1])) | ||
|
||
return np.linspace(dim_min, dim_max, dim_size) | ||
|
||
|
||
def get_valid_indices( | ||
coordinate_row_col: np.ndarray, coordinate: VariableFromDmr | ||
lat_lon_array: np.ndarray, coordinate: VariableFromDmr | ||
) -> np.ndarray: | ||
""" | ||
Returns indices of a valid array without fill values if the fill | ||
value is provided. If it is not provided, we check for valid values | ||
for latitude and longitude | ||
Returns an array of boolean values | ||
- true, false - indicating a valid value (non-fill, within range) | ||
for a given coordinate variable. A value of True means the | ||
value is valid | ||
- latitude or longitude - or | ||
returns an empty ndarray of size (0,0) for any other variable. | ||
|
||
""" | ||
D-Auty marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
# get_attribute_value returns a value of type `str` | ||
coordinate_fill = coordinate.get_attribute_value('_FillValue') | ||
if coordinate_fill is not None: | ||
is_not_fill = ~np.isclose(coordinate_row_col, float(coordinate_fill)) | ||
is_not_fill = ~np.isclose(lat_lon_array, float(coordinate_fill)) | ||
else: | ||
# Creates an entire array of `True` values. | ||
is_not_fill = np.ones_like(coordinate_row_col, dtype=bool) | ||
is_not_fill = np.ones_like(lat_lon_array, dtype=bool) | ||
|
||
if coordinate.is_longitude(): | ||
valid_indices = np.where( | ||
np.logical_and( | ||
is_not_fill, | ||
np.logical_and( | ||
coordinate_row_col >= -180.0, coordinate_row_col <= 360.0 | ||
), | ||
) | ||
)[0] | ||
valid_indices = np.logical_and( | ||
is_not_fill, | ||
np.logical_and(lat_lon_array >= -180.0, lat_lon_array <= 360.0), | ||
) | ||
elif coordinate.is_latitude(): | ||
valid_indices = np.where( | ||
np.logical_and( | ||
is_not_fill, | ||
np.logical_and(coordinate_row_col >= -90.0, coordinate_row_col <= 90.0), | ||
) | ||
)[0] | ||
valid_indices = np.logical_and( | ||
is_not_fill, | ||
np.logical_and(lat_lon_array >= -90.0, lat_lon_array <= 90.0), | ||
) | ||
else: | ||
valid_indices = np.empty((0, 0)) | ||
raise InvalidCoordinateDataset(coordinate.full_name_path) | ||
|
||
return valid_indices | ||
|
||
|
||
def get_x_y_values_from_geographic_points( | ||
points: list[tuple], | ||
crs: CRS, | ||
) -> list[tuple]: | ||
"""Takes an input list of (longitude, latitude) coordinates and CRS object | ||
from PyProj and projects those points to the target grid. Then returns | ||
the list of x-y points as tuple in (x,y) order | ||
|
||
""" | ||
point_longitudes, point_latitudes = zip(*list(points)) | ||
|
||
from_geo_transformer = Transformer.from_crs(4326, crs) | ||
points_x, points_y = ( # pylint: disable=unpacking-non-sequence | ||
from_geo_transformer.transform(point_latitudes, point_longitudes) | ||
) | ||
|
||
return list(zip(points_x, points_y)) | ||
|
||
|
||
def get_valid_row_col_pairs( | ||
lat_arr: np.ndarray, | ||
lon_arr: np.ndarray, | ||
lat_coordinate: VariableFromDmr, | ||
lon_coordinate: VariableFromDmr, | ||
) -> tuple[list, list]: | ||
""" | ||
This function finds a set of indices maximally spread across | ||
a row, and the set maximally spread across a column, with the | ||
indices being valid in both the latitude and longitude datasets. | ||
When interpolating between these points, the maximal spread | ||
ensures the greatest interpolation accuracy. | ||
""" | ||
D-Auty marked this conversation as resolved.
Show resolved
Hide resolved
|
||
valid_lat_lon_mask = np.logical_and( | ||
get_valid_indices(lat_arr, lat_coordinate), | ||
get_valid_indices(lon_arr, lon_coordinate), | ||
) | ||
|
||
# get maximally spread points within rows | ||
max_x_spread_pts = get_max_x_spread_pts(~valid_lat_lon_mask) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is a regular grid, which HOSS requires to work correctly, you don't need to pick the two furthest points from one another. You just need any two points that don't have the same row or column. But, on the other hand, picking the furthest points means you get that. (TL;DR - no action needed) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did have it as just any 2 points before. But @autydp said that the best values would be if they were furthest apart. |
||
|
||
# Doing the same for the columns is done by transposing the valid_mask | ||
# and then fixing the results from [x, y] to [y, x]. | ||
max_y_spread_trsp = get_max_x_spread_pts(np.transpose(~valid_lat_lon_mask)) | ||
max_y_spread_pts = [ | ||
list(np.flip(max_y_spread_trsp[0])), | ||
list(np.flip(max_y_spread_trsp[1])), | ||
] | ||
|
||
return max_y_spread_pts, max_x_spread_pts | ||
|
||
|
||
def get_max_x_spread_pts( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this function is now being used for both x and y, then it (and the local variables within it) should be renamed to ensure it is clear it isn't just for x. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the x_spread is basically what returns the y_indices |
||
valid_geospatial_mask: np.ndarray, | ||
) -> list[list]: | ||
""" | ||
This function returns two data points by x, y indices that are spread farthest | ||
from each other in the same row, i.e., have the greatest delta-x value - and | ||
are valid data points from the valid_geospatial_mask array passed in. The input array | ||
must be a 2D Numpy mask array providing the valid data points, e.g., filtering | ||
out fill values and out-of-range values. | ||
- input is Numpy Mask Array, e.g., invalid latitudes & longitudes | ||
- returns 2 points by indices, [[y_ind, x_ind], [y_ind, x_ind] | ||
""" | ||
# fill a sample array with x-index values, x_ind[i, j] = j | ||
x_ind = np.indices( | ||
(valid_geospatial_mask.shape[0], valid_geospatial_mask.shape[1]) | ||
)[1] | ||
|
||
# mask x_ind to hide the invalid data points | ||
valid_x_ind = np.ma.array(x_ind, mask=valid_geospatial_mask) | ||
|
||
if valid_x_ind.count() == 0: | ||
raise InvalidCoordinateData("No valid coordinate data") | ||
|
||
# ptp (peak-to-peak) finds the greatest delta-x value amongst valid points | ||
# for each row. Result is 1D | ||
x_ind_spread = valid_x_ind.ptp(axis=1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there is only one valid value, I think this gives you a single result (of value 0). It feels like that needs to be caught here before proceeding. (I don't think right now you are capturing the case anywhere that there is only a single valid point, apologies if I missed that, though) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah. Good catch. I think I had that check in the previous version of the method... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated - 2f07d1a |
||
|
||
# This finds which row has the greatest spread (delta-x) | ||
max_x_spread_row = np.argmax(x_ind_spread) | ||
|
||
# Using the row reference, find the min-x and max-x | ||
min_x_ind = np.min(valid_x_ind[max_x_spread_row]) | ||
max_x_ind = np.max(valid_x_ind[max_x_spread_row]) | ||
|
||
# There is just one valid point | ||
if min_x_ind == max_x_ind: | ||
raise InvalidCoordinateData("Only one valid point in coordinate data") | ||
|
||
return [[max_x_spread_row, min_x_ind], [max_x_spread_row, max_x_ind]] | ||
|
||
|
||
def create_dimension_arrays_from_coordinates( | ||
prefetch_dataset: Dataset, | ||
latitude_coordinate: VariableFromDmr, | ||
longitude_coordinate: VariableFromDmr, | ||
crs: CRS, | ||
projected_dimension_names: list, | ||
) -> dict[str, np.ndarray]: | ||
"""Generate artificial 1D dimensions scales for each | ||
2D dimension or coordinate variable. | ||
1) Get 2 valid geo grid points | ||
2) convert them to a projected x-y extent | ||
3) Generate the x-y dimscale array and return to the calling method | ||
|
||
""" | ||
lat_arr = get_coordinate_array( | ||
prefetch_dataset, | ||
latitude_coordinate.full_name_path, | ||
) | ||
lon_arr = get_coordinate_array( | ||
prefetch_dataset, | ||
longitude_coordinate.full_name_path, | ||
) | ||
row_size, col_size = get_row_col_sizes_from_coordinate_datasets( | ||
lat_arr, | ||
lon_arr, | ||
) | ||
|
||
row_indices, col_indices = get_valid_row_col_pairs( | ||
lat_arr, lon_arr, latitude_coordinate, longitude_coordinate | ||
) | ||
|
||
y_dim = get_dimension_array_from_geo_points( | ||
lat_arr, lon_arr, crs, row_indices, row_size, True | ||
) | ||
|
||
x_dim = get_dimension_array_from_geo_points( | ||
lat_arr, lon_arr, crs, col_indices, col_size, False | ||
) | ||
|
||
projected_y, projected_x = tuple(projected_dimension_names) | ||
return {projected_y: y_dim, projected_x: x_dim} | ||
|
||
|
||
def get_dimension_array_from_geo_points( | ||
lat_arr: np.ndarray, | ||
lon_arr: np.ndarray, | ||
crs: CRS, | ||
dimension_indices: list, | ||
dimension_size: int, | ||
is_row=True, | ||
) -> np.ndarray: | ||
"""This function uses the list of lat/lon points corresponding | ||
to a list of array indices and reprojects it with the CRS | ||
provided and scales the x/y values to a dimension array with the dimension | ||
size provided | ||
|
||
""" | ||
if is_row: | ||
index_for_dimension = 0 | ||
x_or_y_index = 1 | ||
else: | ||
index_for_dimension = 1 | ||
x_or_y_index = 0 | ||
|
||
geo_grid_points = [ | ||
(lon_arr[row, col], lat_arr[row, col]) for row, col in dimension_indices | ||
] | ||
x_y_values = get_x_y_values_from_geographic_points(geo_grid_points, crs) | ||
indices_for_dimension = np.transpose(dimension_indices)[index_for_dimension] | ||
dimension_values = np.transpose(x_y_values)[x_or_y_index] | ||
return get_1d_dim_array_data_from_dimvalues( | ||
dimension_values, indices_for_dimension, dimension_size | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(Same comment for the similar change in the list comprehension for
longitude_coordinate_variables
)Where do you catch any issues if there is a reference to a variable in a
coordinates
metadata attribute that isn't present in the file? If it's pointing to a latitude or longitude variable that isn't present in the file, at some point that's going to cause a failure when you try to access that variable. Is that gracefully handled somewhere?(It's likely this is already nicely supported, but it's been a while since the previous PR and I just can't remember)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Separate comment - you have not added unit tests to prove this case is being handled as expected. (Needed both for latitude or longitude)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There are unit tests to check for the above cases in line 205 of test_coordinate_utilities.py
https://github.com/nasa/harmony-opendap-subsetter/blob/57744a6298adf081d0bf06c9a6acd0b98cccf99c/tests/unit/test_coordinate_utilities.py#L205C1-L219C1