From c440d6336e0d7be498ca34f7b054f41e00b44df3 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 07:42:13 +0100 Subject: [PATCH 1/6] fix error where guess_geometry fails on multipolygons --- mapchete/config/parse.py | 19 +++++++++++++++++-- mapchete/geometry/filter.py | 2 +- test/test_config.py | 28 +++++++++++++++++++++++++++- 3 files changed, 45 insertions(+), 4 deletions(-) diff --git a/mapchete/config/parse.py b/mapchete/config/parse.py index 0562d19b..3212b657 100644 --- a/mapchete/config/parse.py +++ b/mapchete/config/parse.py @@ -11,6 +11,7 @@ from mapchete.config.models import ProcessConfig, ZoomParameters from mapchete.errors import GeometryTypeError from mapchete.geometry import is_type, reproject_geometry +from mapchete.geometry.types import Polygon, MultiPolygon from mapchete.io.vector import fiona_open from mapchete.path import MPath from mapchete.tile import BufferedTilePyramid @@ -205,7 +206,21 @@ def guess_geometry( crs = None # WKT or path: if isinstance(some_input, (str, MPath)): - if str(some_input).upper().startswith(("POLYGON ", "MULTIPOLYGON ")): + if ( + str(some_input) + .upper() + .startswith( + ( + "POINT ", + "MULTIPOINT ", + "LINESTRING ", + "MULTILINESTRING ", + "POLYGON ", + "MULTIPOLYGON ", + "GEOMETRYCOLLECTION ", + ) + ) + ): geom = wkt.loads(some_input) else: path = MPath.from_inp(some_input) @@ -226,7 +241,7 @@ def guess_geometry( ) if not geom.is_valid: # pragma: no cover raise TypeError("area is not a valid geometry") - if not is_type(geom, "Polygon", singlepart_equivalent_matches=True): + if not is_type(geom, target_type=(Polygon, MultiPolygon)): raise GeometryTypeError( f"area must either be a Polygon or a MultiPolygon, not {geom.geom_type}" ) diff --git a/mapchete/geometry/filter.py b/mapchete/geometry/filter.py index c8b036e2..d8495a75 100644 --- a/mapchete/geometry/filter.py +++ b/mapchete/geometry/filter.py @@ -35,7 +35,7 @@ def omit_empty_geometries(geometry: Geometry) -> Generator[Geometry, None, None] def is_type( geometry: Geometry, - target_type: Union[GeometryTypeLike, Tuple[GeometryTypeLike]], + target_type: Union[GeometryTypeLike, Tuple[GeometryTypeLike, ...]], singlepart_equivalent_matches: bool = True, multipart_equivalent_matches: bool = True, ) -> bool: diff --git a/test/test_config.py b/test/test_config.py index cf80a22d..dfc688a4 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -16,7 +16,7 @@ from mapchete.config.models import DaskAdaptOptions, DaskSpecs from mapchete.config.parse import bounds_from_opts, guess_geometry from mapchete.config.process_func import ProcessFunc -from mapchete.errors import MapcheteConfigError +from mapchete.errors import GeometryTypeError, MapcheteConfigError from mapchete.io import fiona_open, rasterio_open from mapchete.path import MPath from mapchete.bounds import Bounds @@ -412,6 +412,32 @@ def test_guess_geometry(aoi_br_geojson): guess_geometry(area.centroid) +@pytest.mark.parametrize( + "geometry", + [ + lazy_fixture("polygon"), + lazy_fixture("multipolygon"), + ], +) +def test_guess_geometry_types(geometry): + assert guess_geometry(geometry.wkt)[0].is_valid + + +@pytest.mark.parametrize( + "geometry", + [ + lazy_fixture("point"), + lazy_fixture("multipoint"), + lazy_fixture("linestring"), + lazy_fixture("multilinestring"), + lazy_fixture("geometrycollection"), + ], +) +def test_guess_geometry_types_errors(geometry): + with pytest.raises(GeometryTypeError): + guess_geometry(geometry.wkt) + + def test_bounds_from_opts_wkt(wkt_geom): # WKT assert isinstance(bounds_from_opts(wkt_geometry=wkt_geom), Bounds) From e8c711be0b0e4df60473b0dda081dc1d9bbf65ad Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 07:54:18 +0100 Subject: [PATCH 2/6] fix some typing smells --- mapchete/config/parse.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mapchete/config/parse.py b/mapchete/config/parse.py index 3212b657..d8dfc238 100644 --- a/mapchete/config/parse.py +++ b/mapchete/config/parse.py @@ -149,8 +149,8 @@ def bounds_from_opts( reproj = reproject_geometry( Point(x, y), src_crs=point_crs, dst_crs=tp.crs ) - x = reproj.x - y = reproj.y + x = reproj.x # type: ignore + y = reproj.y # type: ignore zoom_levels = get_zoom_levels( process_zoom_levels=raw_conf["zoom_levels"], init_zoom_levels=zoom ) @@ -221,13 +221,13 @@ def guess_geometry( ) ) ): - geom = wkt.loads(some_input) + geom = wkt.loads(str(some_input)) else: - path = MPath.from_inp(some_input) - with path.fio_env(): - with fiona_open(str(path.absolute_path(base_dir))) as src: - geom = unary_union([shape(f["geometry"]) for f in src]) - crs = src.crs + with fiona_open( + str(MPath.from_inp(some_input).absolute_path(base_dir)) + ) as src: + geom = unary_union([shape(f["geometry"]) for f in src]) + crs = src.crs # GeoJSON mapping elif isinstance(some_input, dict): geom = shape(some_input) From fc04cedb96363bf066e8afea809d64b53d4a6323 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 07:57:56 +0100 Subject: [PATCH 3/6] use IndexedFeatures to get union geometry --- mapchete/config/parse.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/mapchete/config/parse.py b/mapchete/config/parse.py index d8dfc238..5f5dbacb 100644 --- a/mapchete/config/parse.py +++ b/mapchete/config/parse.py @@ -5,14 +5,13 @@ from shapely import wkt from shapely.geometry import Point, shape from shapely.geometry.base import BaseGeometry -from shapely.ops import unary_union from mapchete.bounds import Bounds from mapchete.config.models import ProcessConfig, ZoomParameters from mapchete.errors import GeometryTypeError from mapchete.geometry import is_type, reproject_geometry from mapchete.geometry.types import Polygon, MultiPolygon -from mapchete.io.vector import fiona_open +from mapchete.io.vector.indexed_features import IndexedFeatures from mapchete.path import MPath from mapchete.tile import BufferedTilePyramid from mapchete.types import BoundsLike, MPathLike, ZoomLevelsLike @@ -223,11 +222,11 @@ def guess_geometry( ): geom = wkt.loads(str(some_input)) else: - with fiona_open( - str(MPath.from_inp(some_input).absolute_path(base_dir)) - ) as src: - geom = unary_union([shape(f["geometry"]) for f in src]) - crs = src.crs + features = IndexedFeatures.from_file( + MPath.from_inp(some_input).absolute_path(base_dir) + ) + geom = features.read_union_geometry() + crs = features.crs # GeoJSON mapping elif isinstance(some_input, dict): geom = shape(some_input) From 837de4c956465b78207994fb71339dd50d85c0a4 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 09:20:21 +0100 Subject: [PATCH 4/6] make check for known CRSes to determine their bounds more solid; fix code smells --- mapchete/geometry/reproject.py | 78 +++++++++++++++++---------------- mapchete/geometry/segmentize.py | 2 +- 2 files changed, 42 insertions(+), 38 deletions(-) diff --git a/mapchete/geometry/reproject.py b/mapchete/geometry/reproject.py index cb830e18..6665564f 100644 --- a/mapchete/geometry/reproject.py +++ b/mapchete/geometry/reproject.py @@ -8,12 +8,19 @@ from shapely.geometry import mapping, shape from mapchete.bounds import Bounds -from mapchete.errors import GeometryTypeError, ReprojectionFailed +from mapchete.errors import ReprojectionFailed from mapchete.geometry.latlon import LATLON_CRS from mapchete.geometry.repair import repair from mapchete.geometry.segmentize import get_segmentize_value, segmentize_geometry from mapchete.geometry.shape import to_shape -from mapchete.types import Geometry, GeometryLike +from mapchete.types import ( + Geometry, + GeometryLike, + Polygon, + LinearRing, + LineString, + MultiPolygon, +) from mapchete.types import CRSLike from mapchete.validate import validate_crs @@ -22,24 +29,23 @@ CRS_BOUNDS = { # http://spatialreference.org/ref/epsg/wgs-84/ - "epsg:4326": Bounds(-180.0, -90.0, 180.0, 90.0), + CRS.from_epsg(4326): Bounds(-180.0, -90.0, 180.0, 90.0), # unknown source - "epsg:3857": Bounds(-180.0, -85.0511, 180.0, 85.0511), + CRS.from_epsg(3857): Bounds(-180.0, -85.0511, 180.0, 85.0511), # http://spatialreference.org/ref/epsg/3035/ - "epsg:3035": Bounds(-10.6700, 34.5000, 31.5500, 71.0500), + CRS.from_epsg(3035): Bounds(-10.6700, 34.5000, 31.5500, 71.0500), } def get_crs_bounds(crs: CRS) -> Bounds: - if crs.is_epsg_code: - try: - # get bounds from known CRSes - return CRS_BOUNDS[crs.get("init")] - except KeyError: - # try to get bounds using pyproj - area_of_use = pyproj.CRS(crs.to_epsg()).area_of_use - if area_of_use: - return Bounds.from_inp(area_of_use.bounds) + try: + # get bounds from known CRSes + return CRS_BOUNDS[crs] + except KeyError: + # try to get bounds using pyproj + area_of_use = pyproj.CRS(crs.to_epsg()).area_of_use + if area_of_use: + return Bounds.from_inp(area_of_use.bounds) raise ValueError(f"bounds of CRS {crs} could not be determined") @@ -127,14 +133,13 @@ def reproject_geometry( # segmentize clipped geometry using one 100th of with or height depending on # which is shorter - if segmentize_on_clip or segmentize: - try: - clipped_latlon = segmentize_geometry( - clipped_latlon, - get_segmentize_value(clipped_latlon, segmentize_fraction), - ) - except GeometryTypeError: - pass + if (segmentize_on_clip or segmentize) and isinstance( + clipped_latlon, (Polygon, LinearRing, LineString, MultiPolygon) + ): + clipped_latlon = segmentize_geometry( + clipped_latlon, + get_segmentize_value(clipped_latlon, segmentize_fraction), + ) # clip geometry dst_crs boundaries and return return _reproject_geom( @@ -148,21 +153,20 @@ def reproject_geometry( # return without clipping if destination CRS does not have defined bounds try: - if segmentize: - try: - return _reproject_geom( - segmentize_geometry( - geometry, - get_segmentize_value(geometry, segmentize_fraction), - ), - src_crs, - dst_crs, - validity_check, - antimeridian_cutting, - fiona_env, - ) - except GeometryTypeError: - pass + if segmentize and isinstance( + geometry, (Polygon, LinearRing, LineString, MultiPolygon) + ): + return _reproject_geom( + segmentize_geometry( + geometry, + get_segmentize_value(geometry, segmentize_fraction), + ), + src_crs, + dst_crs, + validity_check, + antimeridian_cutting, + fiona_env, + ) return _reproject_geom( geometry, src_crs, diff --git a/mapchete/geometry/segmentize.py b/mapchete/geometry/segmentize.py index 334c303d..13ae7c0d 100644 --- a/mapchete/geometry/segmentize.py +++ b/mapchete/geometry/segmentize.py @@ -61,5 +61,5 @@ def coords_segmentize(coords: CoordArrays, segmentize_value: float) -> CoordArra def get_segmentize_value(geometry: Geometry, segmentize_fraction: float) -> float: """Divide the smaller one of geometry height or width by segmentize fraction.""" - bounds = Bounds.from_inp(geometry, strict=False) + bounds = Bounds.from_inp(geometry.bounds, strict=False) return min([bounds.height, bounds.width]) / segmentize_fraction From 86b524a5f4063f2f35db714d75b93ca1b89b0f19 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 10:22:33 +0100 Subject: [PATCH 5/6] make sure CRS bounds have latlon CRS set; fix pyproj.CRS parsing issue --- mapchete/geometry/repair.py | 22 ++++++++++++---------- mapchete/geometry/reproject.py | 26 +++++++++++++++++++------- 2 files changed, 31 insertions(+), 17 deletions(-) diff --git a/mapchete/geometry/repair.py b/mapchete/geometry/repair.py index 7d1291aa..3e8ba48e 100644 --- a/mapchete/geometry/repair.py +++ b/mapchete/geometry/repair.py @@ -4,16 +4,18 @@ from mapchete.geometry.types import Geometry, MultiPolygon, Polygon -def repair(geometry: Geometry) -> Geometry: - repaired = ( - geometry.buffer(0) - if isinstance(geometry, (Polygon, MultiPolygon)) - else geometry - ) - if repaired.is_valid: - return repaired +def repair(geometry: Geometry, normalize: bool = True) -> Geometry: + if isinstance(geometry, (Polygon, MultiPolygon)): + out = geometry.buffer(0) + else: + out = geometry + + if normalize: + out = out.normalize() + + if out.is_valid: + return out else: raise TopologicalError( - "geometry is invalid (%s) and cannot be repaired" - % explain_validity(repaired) + f"geometry is invalid ({explain_validity(out)}) and cannot be repaired" ) diff --git a/mapchete/geometry/reproject.py b/mapchete/geometry/reproject.py index 6665564f..9dba36a2 100644 --- a/mapchete/geometry/reproject.py +++ b/mapchete/geometry/reproject.py @@ -3,6 +3,7 @@ import fiona import pyproj +from pyproj.exceptions import CRSError from fiona.transform import transform_geom from rasterio.crs import CRS from shapely.geometry import mapping, shape @@ -29,11 +30,11 @@ CRS_BOUNDS = { # http://spatialreference.org/ref/epsg/wgs-84/ - CRS.from_epsg(4326): Bounds(-180.0, -90.0, 180.0, 90.0), + CRS.from_epsg(4326): Bounds(-180.0, -90.0, 180.0, 90.0, crs=LATLON_CRS), # unknown source - CRS.from_epsg(3857): Bounds(-180.0, -85.0511, 180.0, 85.0511), + CRS.from_epsg(3857): Bounds(-180.0, -85.0511, 180.0, 85.0511, crs=LATLON_CRS), # http://spatialreference.org/ref/epsg/3035/ - CRS.from_epsg(3035): Bounds(-10.6700, 34.5000, 31.5500, 71.0500), + CRS.from_epsg(3035): Bounds(-10.6700, 34.5000, 31.5500, 71.0500, crs=LATLON_CRS), } @@ -42,15 +43,26 @@ def get_crs_bounds(crs: CRS) -> Bounds: # get bounds from known CRSes return CRS_BOUNDS[crs] except KeyError: + logger.debug("try to determine CRS bounds using pyproj ...") # try to get bounds using pyproj - area_of_use = pyproj.CRS(crs.to_epsg()).area_of_use - if area_of_use: - return Bounds.from_inp(area_of_use.bounds) + try: + # on UTM CRS, the area_of_use is None if pyproj.CRS is initialized with CRS.to_proj4(), thus + # prefer using CRS.to_epsg() and only use CRS.to_proj4() as backup + pyproj_crs = ( + pyproj.CRS(crs.to_epsg()) + if crs.is_epsg_code + else pyproj.CRS(crs.to_proj4()) + ) + if pyproj_crs.area_of_use: + return Bounds.from_inp(pyproj_crs.area_of_use.bounds, crs=LATLON_CRS) + except CRSError as exc: + logger.debug(exc) + pass raise ValueError(f"bounds of CRS {crs} could not be determined") def crs_is_epsg_4326(crs: CRS) -> bool: - return crs.is_epsg_code and crs.get("init") == "epsg:4326" + return crs == LATLON_CRS def reproject_geometry( From 8c4081729ffec704bc58bcc89aa4783b44e42a37 Mon Sep 17 00:00:00 2001 From: Joachim Ungar Date: Mon, 4 Nov 2024 10:38:21 +0100 Subject: [PATCH 6/6] skip exception catch in test coverage --- mapchete/geometry/reproject.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mapchete/geometry/reproject.py b/mapchete/geometry/reproject.py index 9dba36a2..d6148fcf 100644 --- a/mapchete/geometry/reproject.py +++ b/mapchete/geometry/reproject.py @@ -55,7 +55,7 @@ def get_crs_bounds(crs: CRS) -> Bounds: ) if pyproj_crs.area_of_use: return Bounds.from_inp(pyproj_crs.area_of_use.bounds, crs=LATLON_CRS) - except CRSError as exc: + except CRSError as exc: # pragma: no cover logger.debug(exc) pass raise ValueError(f"bounds of CRS {crs} could not be determined")