From c29945a322590285354a10e5a4e824ca3b67cf63 Mon Sep 17 00:00:00 2001 From: Romain Hugonnet Date: Fri, 31 May 2024 21:36:13 -0800 Subject: [PATCH] Add point cloud gridding (#553) --- geoutils/pointcloud.py | 77 +++++++++++++++++++++++++++ geoutils/raster/raster.py | 4 +- tests/test_pointcloud.py | 107 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 geoutils/pointcloud.py create mode 100644 tests/test_pointcloud.py diff --git a/geoutils/pointcloud.py b/geoutils/pointcloud.py new file mode 100644 index 00000000..f80b084e --- /dev/null +++ b/geoutils/pointcloud.py @@ -0,0 +1,77 @@ +"""Module for point cloud manipulation.""" + +import warnings +from typing import Literal + +import geopandas as gpd +import numpy as np +from scipy.interpolate import griddata + +from geoutils._typing import NDArrayNum + + +def _grid_pointcloud( + pc: gpd.GeoDataFrame, + grid_coords: tuple[NDArrayNum, NDArrayNum], + data_column_name: str = "b1", + resampling: Literal["nearest", "linear", "cubic"] = "linear", + dist_nodata_pixel: float = 1.0, +) -> NDArrayNum: + """ + Grid point cloud (possibly irregular coordinates) to raster (regular grid) using delaunay triangles interpolation. + + Based on scipy.interpolate.griddata combined to a nearest point search to replace values of grid cells further than + a certain distance (in number of pixels) by nodata values (as griddata interpolates all values in convex hull, no + matter the distance). + + :param pc: Point cloud. + :param grid_coords: Grid coordinates for X and Y. + :param data_column_name: Name of data column for point cloud (only if passed as a geodataframe). + :param resampling: Resampling method within delauney triangles (defaults to linear). + :param dist_nodata_pixel: Distance from the point cloud after which grid cells are filled by nodata values, + expressed in number of pixels. + """ + + # 1/ Interpolate irregular point cloud on a regular grid + + # Get meshgrid coordinates + xx, yy = np.meshgrid(grid_coords[0], grid_coords[1]) + + # Use griddata on all points + aligned_dem = griddata( + points=(pc.geometry.x.values, pc.geometry.y.values), + values=pc[data_column_name].values, + xi=(xx, yy), + method=resampling, + rescale=True, # Rescale inputs to unit cube to avoid precision issues + ) + + # 2/ Identify which grid points are more than X pixels away from the point cloud, and convert to NaNs + # (otherwise all grid points in the convex hull of the irregular triangulation are filled, no matter the distance) + + # Get the nearest point for each grid point + grid_pc = gpd.GeoDataFrame( + data={"placeholder": np.ones(len(xx.ravel()))}, geometry=gpd.points_from_xy(x=xx.ravel(), y=yy.ravel()) + ) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=UserWarning, message="Geometry is in a geographic CRS.*") + near = gpd.sjoin_nearest(grid_pc, pc) + # In case there are several points at the same distance, it doesn't matter which one is used to compute the + # distance, so we keep the first index of closest point + index_right = near.groupby(by=near.index)["index_right"].min() + + # Compute distance between points as a function of the pixel sizes in X and Y + res_x = np.abs(grid_coords[0][1] - grid_coords[0][0]) + res_y = np.abs(grid_coords[1][1] - grid_coords[1][0]) + dist = np.sqrt( + ((pc.geometry.x.values[index_right] - grid_pc.geometry.x.values) / res_x) ** 2 + + ((pc.geometry.y.values[index_right] - grid_pc.geometry.y.values) / res_y) ** 2 + ) + + # Replace all points further away than the distance of nodata by NaNs + aligned_dem[dist.reshape(aligned_dem.shape) > dist_nodata_pixel] = np.nan + + # Flip Y axis of grid + aligned_dem = np.flip(aligned_dem, axis=0) + + return aligned_dem diff --git a/geoutils/raster/raster.py b/geoutils/raster/raster.py index b3661ea4..b521abce 100644 --- a/geoutils/raster/raster.py +++ b/geoutils/raster/raster.py @@ -3603,7 +3603,7 @@ def ij2xy( def coords( self, grid: bool = True, shift_area_or_point: bool | None = None, force_offset: str | None = None - ) -> tuple[NDArrayNum, ...]: + ) -> tuple[NDArrayNum, NDArrayNum]: """ Get coordinates (x,y) of all pixels in the raster. @@ -3631,7 +3631,7 @@ def coords( # If grid is True, return coordinate grids if grid: meshgrid = tuple(np.meshgrid(xx, np.flip(yy))) - return meshgrid + return meshgrid # type: ignore else: return np.asarray(xx), np.asarray(yy) diff --git a/tests/test_pointcloud.py b/tests/test_pointcloud.py new file mode 100644 index 00000000..df6d5261 --- /dev/null +++ b/tests/test_pointcloud.py @@ -0,0 +1,107 @@ +"""Test module for point cloud functionalities.""" + +import geopandas as gpd +import numpy as np +import rasterio as rio +from shapely import geometry + +from geoutils import Raster +from geoutils.pointcloud import _grid_pointcloud + + +class TestPointCloud: + def test_grid_pc__chull(self) -> None: + """Test point cloud gridding.""" + + # 1/ Check gridding interpolation falls back exactly on original raster + + # Create a point cloud from interpolating a grid, so we can compare back after to check consistency + rng = np.random.default_rng(42) + shape = (10, 12) + rst_arr = np.linspace(0, 10, int(np.prod(shape))).reshape(*shape) + transform = rio.transform.from_origin(0, shape[0] - 1, 1, 1) + rst = Raster.from_array(rst_arr, transform=transform, crs=4326, nodata=100) + + # Generate random coordinates to interpolate, to create an irregular point cloud + points = rng.integers(low=1, high=shape[0] - 1, size=(100, 2)) + rng.normal(0, 0.15, size=(100, 2)) + b1_value = rst.interp_points((points[:, 0], points[:, 1])) + pc = gpd.GeoDataFrame(data={"b1": b1_value}, geometry=gpd.points_from_xy(x=points[:, 0], y=points[:, 1])) + grid_coords = rst.coords(grid=False) + + # Grid the point cloud + gridded_pc = _grid_pointcloud(pc, grid_coords=grid_coords) + + # Compare back to raster, all should be very close (but not exact, some info is lost due to interpolations) + valids = np.isfinite(gridded_pc) + assert np.allclose(gridded_pc[valids], rst.data.data[valids], rtol=10e-5) + + # 2/ Check the propagation of nodata values + + # 2.1/ Grid points outside the convex hull of all points should always be nodata + + # We convert the full raster to a point cloud, keeping all cells even nodata + rst_pc = rst.to_pointcloud(skip_nodata=False).ds + + # We define a multi-point geometry from the individual points, and compute its convex hull + poly = geometry.MultiPoint([[p.x, p.y] for p in pc.geometry]) + chull = poly.convex_hull + + # We compute the index of grid cells interesting the convex hull + ind_inters_convhull = rst_pc.intersects(chull) + + # We get corresponding 1D indexes for gridded output + i, j = rst.xy2ij(x=rst_pc.geometry.x.values, y=rst_pc.geometry.y.values) + + # Check all values outside convex hull are NaNs + assert all(~np.isfinite(gridded_pc[i[~ind_inters_convhull], j[~ind_inters_convhull]])) + + # 2.2/ For the rest of the points, data should be valid only if a point exists within 1 pixel of their + # coordinate, that is the closest rounded number + # TODO: Replace by check with distance, because some pixel not rounded can also be at less than 1 from a point + + # Compute min distance to irregular point cloud for each grid point + list_min_dist = [] + for p in rst_pc.geometry: + min_dist = np.min(np.sqrt((p.x - pc.geometry.x.values) ** 2 + (p.y - pc.geometry.y.values) ** 2)) + list_min_dist.append(min_dist) + + ind_close = np.array(list_min_dist) <= 1 + # We get the indexes for these coordinates + iround, jround = rst.xy2ij(x=rst_pc.geometry.x.values[ind_close], y=rst_pc.geometry.y.values[ind_close]) + + # Keep only indexes in the convex hull + indexes_close = [(iround[k], jround[k]) for k in range(len(iround))] + indexes_chull = [(i[k], j[k]) for k in range(len(i)) if ind_inters_convhull[k]] + close_in_chull = [tup for tup in indexes_close if tup in indexes_chull] + iclosechull, jclosehull = list(zip(*close_in_chull)) + + # All values close pixel in the convex hull should be valid + assert all(np.isfinite(gridded_pc[iclosechull, jclosehull])) + + # Other values in the convex hull should not be + far_in_chull = [tup for tup in indexes_chull if tup not in indexes_close] + ifarchull, jfarchull = list(zip(*far_in_chull)) + + assert all(~np.isfinite(gridded_pc[ifarchull, jfarchull])) + + # Check for a different distance value + gridded_pc = _grid_pointcloud(pc, grid_coords=grid_coords, dist_nodata_pixel=0.5) + ind_close = np.array(list_min_dist) <= 0.5 + + # We get the indexes for these coordinates + iround, jround = rst.xy2ij(x=rst_pc.geometry.x.values[ind_close], y=rst_pc.geometry.y.values[ind_close]) + + # Keep only indexes in the convex hull + indexes_close = [(iround[k], jround[k]) for k in range(len(iround))] + indexes_chull = [(i[k], j[k]) for k in range(len(i)) if ind_inters_convhull[k]] + close_in_chull = [tup for tup in indexes_close if tup in indexes_chull] + iclosechull, jclosehull = list(zip(*close_in_chull)) + + # All values close pixel in the convex hull should be valid + assert all(np.isfinite(gridded_pc[iclosechull, jclosehull])) + + # Other values in the convex hull should not be + far_in_chull = [tup for tup in indexes_chull if tup not in indexes_close] + ifarchull, jfarchull = list(zip(*far_in_chull)) + + assert all(~np.isfinite(gridded_pc[ifarchull, jfarchull]))