Skip to content

Commit

Permalink
Add proximity and buffer_metric functionalities with metric opt…
Browse files Browse the repository at this point in the history
…ion based on finding local UTM (#336)

* Add latlon_to_utm and utm_to_epsg functions

* Add raster proximity function

* Add vector proximity, buffer_metric functions, and add metric argument to buffer_without_overlap

* Add test for utm_to_epsg

* Fix circular import

* Add tests for latlon_to_utm

* Add test for buffer_metric

* Update tests for buffer_without_overlap

* Linting

* Make proximity function more modular

* Add more parameters to proximity

* Incremental commit on proximity tests

* Finalize proximity tests

* Linting

* Fix description of buffer metric

* Incremental commit on eriks comments

* Finalize accounting for comments

* Fix code broken when accounting for comments

* Linting

* Linting
  • Loading branch information
rhugonnet authored Jan 20, 2023
1 parent 7e7ad86 commit 931c9dc
Show file tree
Hide file tree
Showing 6 changed files with 661 additions and 9 deletions.
127 changes: 126 additions & 1 deletion geoutils/georaster/raster.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from rasterio.features import shapes
from rasterio.plot import show as rshow
from rasterio.warp import Resampling
from scipy.ndimage import map_coordinates
from scipy.ndimage import distance_transform_edt, map_coordinates

import geoutils.geovector as gv
from geoutils._typing import AnyNumber, ArrayLike, DTypeLike
Expand Down Expand Up @@ -1146,6 +1146,17 @@ def copy(self: RasterType, new_array: np.ndarray | None = None) -> RasterType:

return cp

def equal_georeferenced_grid(self: RasterType, raster: RasterType) -> bool:
"""
Check that grid shape, geotransform and CRS are equal.
:param raster: Another Raster object
:return: Whether the two objects have the same georeferenced grid
"""

return all([self.shape == raster.shape, self.transform == raster.transform, self.crs == raster.crs])

@overload
def get_nanarray(self, return_mask: Literal[False] = False) -> np.ndarray:
...
Expand Down Expand Up @@ -2510,3 +2521,117 @@ def polygonize(
gdf.set_crs(self.crs, inplace=True)

return gv.Vector(gdf)

def proximity(
self,
vector: Vector | None = None,
target_values: list[float] | None = None,
geometry_type: str = "boundary",
in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both",
distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced",
) -> Raster:
"""
Proximity to this Raster's target pixels, or to a Vector's geometry, computed for each cell of this Raster's
grid.
When passing a Vector, by default, the boundary of the geometry will be used. The full geometry can be used by
passing "geometry", or any lower dimensional geometry attribute such as "centroid", "envelope" or "convex_hull".
See all geometry attributes in the Shapely documentation at https://shapely.readthedocs.io/.
:param vector: Vector for which to compute the proximity to geometry,
if not provided computed on this Raster target pixels.
:param target_values: (Only with Raster) List of target values to use for the proximity,
defaults to all non-zero values.
:param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'.
:param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'.
:param distance_unit: Distance unit, either 'georeferenced' or 'pixel'.
:return: Proximity raster.
"""

proximity = proximity_from_vector_or_raster(
raster=self,
vector=vector,
target_values=target_values,
geometry_type=geometry_type,
in_or_out=in_or_out,
distance_unit=distance_unit,
)

return self.copy(new_array=proximity)


# -----------------------------------------
# Additional stand-alone utility functions
# -----------------------------------------


def proximity_from_vector_or_raster(
raster: Raster,
vector: Vector | None = None,
target_values: list[float] | None = None,
geometry_type: str = "boundary",
in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both",
distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced",
) -> np.ndarray:
"""
(This function is defined here as mostly raster-based, but used in a class method for both Raster and Vector)
Proximity to a Raster's target values if no Vector is provided, otherwise to a Vector's geometry type
rasterized on the Raster.
:param raster: Raster to burn the proximity grid on.
:param vector: Vector for which to compute the proximity to geometry,
if not provided computed on the Raster target pixels.
:param target_values: (Only with a Raster) List of target values to use for the proximity,
defaults to all non-zero values.
:param geometry_type: (Only with a Vector) Type of geometry to use for the proximity, defaults to 'boundary'.
:param in_or_out: (Only with a Vector) Compute proximity only 'in' or 'out'-side the geometry, or 'both'.
:param distance_unit: Distance unit, either 'georeferenced' or 'pixel'.
"""

# 1/ First, if there is a vector input, we rasterize the geometry type
# (works with .boundary that is a LineString (.exterior exists, but is a LinearRing)
if vector is not None:
# We create a geodataframe with the geometry type
boundary_shp = gpd.GeoDataFrame(geometry=vector.ds.__getattr__(geometry_type), crs=vector.crs)
# We mask the pixels that make up the geometry type
mask_boundary = Vector(boundary_shp).create_mask(raster).squeeze()

else:
# We mask target pixels
if target_values is not None:
mask_boundary = np.logical_or.reduce([raster.get_nanarray() == target_val for target_val in target_values])
# Otherwise, all non-zero values are considered targets
else:
mask_boundary = raster.get_nanarray().astype(bool)

# 2/ Now, we compute the distance matrix relative to the masked geometry type
if distance_unit.lower() == "georeferenced":
sampling: int | tuple[float | int, float | int] = raster.res
elif distance_unit.lower() == "pixel":
sampling = 1
else:
raise ValueError('Distance unit must be either "georeferenced" or "pixel".')

# If not all pixels are targets, then we compute the distance
non_targets = np.count_nonzero(mask_boundary)
if non_targets > 0:
proximity = distance_transform_edt(~mask_boundary, sampling=sampling)
# Otherwise, pass an array full of nodata
else:
proximity = np.ones(np.shape(mask_boundary)) * np.nan

# 3/ If there was a vector input, apply the in_and_out argument to optionally mask inside/outside
if vector is not None:
if in_or_out == "both":
pass
elif in_or_out in ["in", "out"]:
mask_polygon = Vector(vector.ds).create_mask(raster).squeeze()
if in_or_out == "in":
proximity[~mask_polygon] = 0
else:
proximity[mask_polygon] = 0
else:
raise ValueError('The type of proximity must be one of "in", "out" or "both".')

return proximity
129 changes: 123 additions & 6 deletions geoutils/geovector.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def query(self: VectorType, expression: str, inplace: bool = False) -> VectorTyp
"""
Query the Vector dataset with a valid Pandas expression.
:param expression: A python-like expression to evaluate. Example: "col1 > col2"
:param expression: A python-like expression to evaluate. Example: "col1 > col2".
:param inplace: Whether the query should modify the data in place or return a modified copy.
:returns: Vector resulting from the provided query expression or itself if inplace=True.
Expand All @@ -379,12 +379,105 @@ def query(self: VectorType, expression: str, inplace: bool = False) -> VectorTyp
new_vector.__init__(self.ds.query(expression)) # type: ignore
return new_vector # type: ignore

def proximity(
self,
raster: gu.Raster | None = None,
grid_size: tuple[int, int] = (1000, 1000),
geometry_type: str = "boundary",
in_or_out: Literal["in"] | Literal["out"] | Literal["both"] = "both",
distance_unit: Literal["pixel"] | Literal["georeferenced"] = "georeferenced",
) -> gu.Raster:
"""
Proximity to this Vector's geometry computed for each cell of a Raster grid, or for a grid size with
this Vector's extent.
If passed, the Raster georeferenced grid will be used to burn the proximity. If not, a grid size can be
passed to create a georeferenced grid with the bounds and CRS of this Vector.
By default, the boundary of the Vector's geometry will be used. The full geometry can be used by
passing "geometry",
or any lower dimensional geometry attribute such as "centroid", "envelope" or "convex_hull".
See all geometry attributes in the Shapely documentation at https://shapely.readthedocs.io/.
:param raster: Raster to burn the proximity grid on.
:param grid_size: If no Raster is provided, grid size to use with this Vector's extent and CRS
(defaults to 1000 x 1000).
:param geometry_type: Type of geometry to use for the proximity, defaults to 'boundary'.
:param in_or_out: Compute proximity only 'in' or 'out'-side the polygon, or 'both'.
:param distance_unit: Distance unit, either 'georeferenced' or 'pixel'.
:return: Proximity raster.
"""

# 0/ If no Raster is passed, create one on the Vector bounds of size 1000 x 1000
if raster is None:

# TODO: this bit of code is common in several vector functions (rasterize, etc): move out as common code?
# By default, use self's bounds
if self.bounds is None:
raise ValueError("To automatically rasterize on the vector, bounds need to be defined.")

# Calculate raster shape
left, bottom, right, top = self.bounds

# Calculate raster transform
transform = rio.transform.from_bounds(left, bottom, right, top, grid_size[0], grid_size[1])

raster = gu.Raster.from_array(data=np.zeros((1000, 1000)), transform=transform, crs=self.crs)

proximity = gu.georaster.raster.proximity_from_vector_or_raster(
raster=raster, vector=self, geometry_type=geometry_type, in_or_out=in_or_out, distance_unit=distance_unit
)

return raster.copy(new_array=proximity)

def buffer_metric(self, buffer_size: float) -> Vector:
"""
Buffer the vector in a metric system (UTM only).
The outlines are projected to a local UTM, then reverted to the original projection after buffering.
:param buffer_size: Buffering distance in meters
:return: Buffered shapefile
"""

from geoutils.projtools import latlon_to_utm, utm_to_epsg

# Get a rough centroid in geographic coordinates (ignore the warning that it is not the most precise):
with warnings.catch_warnings():
warnings.simplefilter(action="ignore", category=UserWarning)
shp_wgs84 = self.ds.to_crs(epsg=4326)
lat, lon = shp_wgs84.centroid.y.values[0], shp_wgs84.centroid.x.values[0]
del shp_wgs84

# Get the EPSG code of the local UTM
utm = latlon_to_utm(lat, lon)
epsg = utm_to_epsg(utm)

# Reproject the shapefile in the local UTM
ds_utm = self.ds.to_crs(epsg=epsg)

# Buffer the shapefile
ds_buffered = ds_utm.buffer(distance=buffer_size)
del ds_utm

# Revert-project the shapefile in the original CRS
ds_buffered_origproj = ds_buffered.to_crs(crs=self.ds.crs)
del ds_buffered

# Return a Vector object of the buffered GeoDataFrame
# TODO: Clarify what is conserved in the GeoSeries and what to pass the GeoDataFrame to not lose any attributes
vector_buffered = Vector(gpd.GeoDataFrame(geometry=ds_buffered_origproj.geometry, crs=self.ds.crs))

return vector_buffered

def get_bounds_projected(self, out_crs: CRS, densify_pts: int = 5000) -> rio.coords.BoundingBox:
"""
Return self's bounds in the given CRS.
:param out_crs: Output CRS
:param densify_pts: Maximum points to be added between image corners to account for non linear edges.
:param densify_pts: Maximum points to be added between image corners to account for nonlinear edges.
Reduce if time computation is really critical (ms) or increase if extent is not accurate enough.
"""

Expand All @@ -393,14 +486,14 @@ def get_bounds_projected(self, out_crs: CRS, densify_pts: int = 5000) -> rio.coo

return new_bounds

def buffer_without_overlap(self, buffer_size: int | float, plot: bool = False) -> Vector:
def buffer_without_overlap(self, buffer_size: int | float, metric: bool = True, plot: bool = False) -> Vector:
"""
Returns a Vector object containing self's geometries extended by a buffer, without overlapping each other.
The algorithm is based upon this tutorial: https://statnmap.com/2020-07-31-buffer-area-for-nearest-neighbour/.
The buffered polygons are created using Voronoi polygons in order to delineate the "area of influence" \
The buffered polygons are created using Voronoi polygons in order to delineate the "area of influence"
of each geometry.
The buffer is slightly inaccurate where two geometries touch, due to the nature of the Voronoi polygons,\
The buffer is slightly inaccurate where two geometries touch, due to the nature of the Voronoi polygons,
hence one geometry "steps" slightly on the neighbor buffer in some cases.
The algorithm may also yield unexpected results on very simple geometries.
Expand All @@ -416,12 +509,32 @@ def buffer_without_overlap(self, buffer_size: int | float, plot: bool = False) -
>>> plt.show() # doctest: +SKIP
:param buffer_size: Buffer size in self's coordinate system units.
:param metric: Whether to perform the buffering in a local metric system (default: True).
:param plot: Set to True to show intermediate plots, useful for understanding or debugging.
:returns: A Vector containing the buffered geometries.
"""

from geoutils.projtools import latlon_to_utm, utm_to_epsg

# Project in local UTM if metric is True
if metric:
# Get a rough centroid in geographic coordinates (ignore the warning that it is not the most precise):
with warnings.catch_warnings():
warnings.simplefilter(action="ignore", category=UserWarning)
shp_wgs84 = self.ds.to_crs(epsg=4326)
lat, lon = shp_wgs84.centroid.y.values[0], shp_wgs84.centroid.x.values[0]
del shp_wgs84

# Get the EPSG code of the local UTM
utm = latlon_to_utm(lat, lon)
epsg = utm_to_epsg(utm)

gdf = self.ds.to_crs(epsg=epsg)
else:
gdf = self.ds

# Dissolve all geometries into one
gdf = self.ds
merged = gdf.dissolve()

# Add buffer around geometries
Expand Down Expand Up @@ -478,6 +591,10 @@ def buffer_without_overlap(self, buffer_size: int | float, plot: bool = False) -
ax4.set_title("Final buffer")
plt.show()

# Reverse-project to the original CRS if metric is True
if metric:
merged_voronoi = merged_voronoi.to_crs(crs=self.crs)

return Vector(merged_voronoi)


Expand Down
47 changes: 47 additions & 0 deletions geoutils/projtools.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,53 @@
from shapely.geometry.polygon import Polygon


def latlon_to_utm(lat: float, lon: float) -> str:
"""
Get UTM zone for a given latitude and longitude coordinates.
:param lat: Latitude coordinate.
:param lon: Longitude coordinate.
:returns: UTM zone.
"""

if not (
isinstance(lat, (float, np.floating, int, np.integer))
and isinstance(lon, (float, np.floating, int, np.integer))
):
raise TypeError("Latitude and longitude must be floats or integers.")

if not -180 <= lon < 180:
raise ValueError("Longitude value is out of range [-180, 180[.")
if not -90 <= lat < 90:
raise ValueError("Latitude value is out of range [-90, 90[.")

# Get UTM zone from name string of crs info
utm_zone = pyproj.database.query_utm_crs_info(
"WGS 84", area_of_interest=pyproj.aoi.AreaOfInterest(lon, lat, lon, lat)
)[0].name.split(" ")[-1]

return str(utm_zone)


def utm_to_epsg(utm: str) -> int:
"""
Get EPSG code of UTM zone.
:param utm: UTM zone.
:return: EPSG of UTM zone.
"""

# Whether UTM is passed as single or double digits, homogenize to single-digit
utm = str(int(utm[:-1])) + utm[-1].upper()

# Get corresponding EPSG
epsg = pyproj.CRS(f"WGS 84 / UTM Zone {utm}").to_epsg()

return int(epsg)


def bounds2poly(
boundsGeom: list[float] | rio.io.DatasetReader,
in_crs: CRS | None = None,
Expand Down
Loading

0 comments on commit 931c9dc

Please sign in to comment.