diff --git a/Cargo.toml b/Cargo.toml index 51d1e41..793f557 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,17 +1,14 @@ [workspace] resolver = "2" -members = [ - "h3ronpy", - "crates/h3arrow" -] +members = ["h3ronpy", "crates/h3arrow"] [workspace.dependencies] geo = "0.28" geo-types = "0.7" h3o = { version = "0.6" } rayon = "^1" -arrow = { version = "52" } +arrow = { version = "53" } [profile.release] lto = "thin" diff --git a/crates/h3arrow/Cargo.toml b/crates/h3arrow/Cargo.toml index dee729d..d57730b 100644 --- a/crates/h3arrow/Cargo.toml +++ b/crates/h3arrow/Cargo.toml @@ -21,10 +21,13 @@ spatial_index = ["dep:rstar"] [dependencies] ahash = "0.8" arrow = { workspace = true } -geoarrow = { version = "0.3", optional = true, features = ["geozero"] } +geoarrow = { git = "https://github.com/geoarrow/geoarrow-rs", rev = "49fd4cbdc4bc08a2f1e0341ec7df700df18d2bdb", optional = true } geo-types = { workspace = true } geo = { workspace = true } -geozero = { version = "^0.13", default-features = false, features = ["with-geo", "with-wkb"], optional = true } +geozero = { version = "^0.14", default-features = false, features = [ + "with-geo", + "with-wkb", +], optional = true } h3o = { workspace = true, features = ["geo"] } nom = "7" rayon = { workspace = true, optional = true } diff --git a/crates/h3arrow/src/algorithm/string.rs b/crates/h3arrow/src/algorithm/string.rs index 6b2ed40..a065d7b 100644 --- a/crates/h3arrow/src/algorithm/string.rs +++ b/crates/h3arrow/src/algorithm/string.rs @@ -246,15 +246,13 @@ mod test { #[test] fn parse_utf8_array_cells_invalid_fail() { - let stringarray = - GenericStringArray::::from_iter(vec![Some("invalid".to_string())].into_iter()); + let stringarray = GenericStringArray::::from_iter(vec![Some("invalid".to_string())]); assert!(CellIndexArray::parse_genericstringarray(&stringarray, false).is_err()); } #[test] fn parse_utf8_array_cells_invalid_to_invalid() { - let utf8_array = - GenericStringArray::::from_iter(vec![Some("invalid".to_string())].into_iter()); + let utf8_array = GenericStringArray::::from_iter(vec![Some("invalid".to_string())]); let cell_array = CellIndexArray::parse_genericstringarray(&utf8_array, true).unwrap(); assert_eq!(1, cell_array.len()); assert!(cell_array.iter().all(|v| v.is_none())) @@ -268,9 +266,9 @@ mod test { let stringarray: GenericStringArray = cellindexarray.to_genericstringarray().unwrap(); assert_eq!(cellindexarray.len(), stringarray.len()); - assert_eq!(stringarray.is_valid(0), true); + assert!(stringarray.is_valid(0)); assert_eq!(stringarray.value(0), "89283080ddbffff"); - assert_eq!(stringarray.is_valid(1), false); + assert!(!stringarray.is_valid(1)); } #[test] diff --git a/crates/h3arrow/src/array/from_geoarrow.rs b/crates/h3arrow/src/array/from_geoarrow.rs index ee1e150..feacc74 100644 --- a/crates/h3arrow/src/array/from_geoarrow.rs +++ b/crates/h3arrow/src/array/from_geoarrow.rs @@ -9,33 +9,13 @@ use crate::error::Error; use arrow::array::OffsetSizeTrait; use geo_types::Geometry; use geoarrow::array::WKBArray; -use geoarrow::trait_::GeometryArrayAccessor; -use geoarrow::GeometryArrayTrait; +use geoarrow::trait_::ArrayAccessor; +use geoarrow::ArrayBase; use h3o::CellIndex; #[cfg(feature = "rayon")] use rayon::prelude::{IntoParallelIterator, ParallelIterator}; macro_rules! impl_to_cells { - ($array_type:ty, $offset:tt) => { - impl<$offset: OffsetSizeTrait> ToCellListArray<$offset> for $array_type { - fn to_celllistarray( - &self, - options: &ToCellsOptions, - ) -> Result, Error> { - self.iter_geo() - .map(|v| v.map(Geometry::from)) - .to_celllistarray(options) - } - } - - impl<$offset: OffsetSizeTrait> ToCellIndexArray for $array_type { - fn to_cellindexarray(&self, options: &ToCellsOptions) -> Result { - self.iter_geo() - .map(|v| v.map(Geometry::from)) - .to_cellindexarray(options) - } - } - }; ($array_type:ty) => { impl ToCellListArray for $array_type { fn to_celllistarray( @@ -58,12 +38,12 @@ macro_rules! impl_to_cells { }; } -impl_to_cells!(geoarrow::array::LineStringArray, O); -impl_to_cells!(geoarrow::array::MultiLineStringArray, O); -impl_to_cells!(geoarrow::array::MultiPointArray, O); -impl_to_cells!(geoarrow::array::MultiPolygonArray, O); +impl_to_cells!(geoarrow::array::LineStringArray<2>); +impl_to_cells!(geoarrow::array::MultiLineStringArray<2>); +impl_to_cells!(geoarrow::array::MultiPointArray<2>); +impl_to_cells!(geoarrow::array::MultiPolygonArray<2>); impl_to_cells!(geoarrow::array::PointArray<2>); -impl_to_cells!(geoarrow::array::PolygonArray, O); +impl_to_cells!(geoarrow::array::PolygonArray<2>); impl ToCellListArray for WKBArray { fn to_celllistarray( diff --git a/crates/h3arrow/src/array/to_geoarrow.rs b/crates/h3arrow/src/array/to_geoarrow.rs index 5c56c57..4d25a3f 100644 --- a/crates/h3arrow/src/array/to_geoarrow.rs +++ b/crates/h3arrow/src/array/to_geoarrow.rs @@ -14,7 +14,7 @@ pub trait ToGeoArrowPolygons { fn to_geoarrow_polygons( &self, use_degrees: bool, - ) -> Result, Self::Error>; + ) -> Result, Self::Error>; } impl ToGeoArrowPolygons for T @@ -26,7 +26,7 @@ where fn to_geoarrow_polygons( &self, use_degrees: bool, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { Ok(self.to_polygons(use_degrees)?.into()) } } @@ -51,7 +51,7 @@ pub trait ToGeoArrowLineStrings { fn to_geoarrow_lines( &self, use_degrees: bool, - ) -> Result, Self::Error>; + ) -> Result, Self::Error>; } impl ToGeoArrowLineStrings for T @@ -62,7 +62,7 @@ where fn to_geoarrow_lines( &self, use_degrees: bool, - ) -> Result, Self::Error> { + ) -> Result, Self::Error> { Ok(self.to_linestrings(use_degrees)?.into()) } } diff --git a/crates/h3arrow/src/array/validity.rs b/crates/h3arrow/src/array/validity.rs index 8031f48..1d1b511 100644 --- a/crates/h3arrow/src/array/validity.rs +++ b/crates/h3arrow/src/array/validity.rs @@ -3,6 +3,7 @@ use arrow::array::UInt64Array; /// Conversion corresponding to `From` with the difference that the validity mask /// is set accordingly to the validity to the contained values. pub trait FromWithValidity { + #[allow(dead_code)] fn from_with_validity(value: T) -> Self; } diff --git a/crates/h3arrow/src/spatial_index.rs b/crates/h3arrow/src/spatial_index.rs index 1b323f6..73a3fea 100644 --- a/crates/h3arrow/src/spatial_index.rs +++ b/crates/h3arrow/src/spatial_index.rs @@ -212,13 +212,13 @@ mod tests { assert_eq!(mask.len(), 4); assert!(mask.is_valid(0)); - assert_eq!(mask.value(0), false); + assert!(!mask.value(0)); assert!(mask.is_valid(1)); - assert_eq!(mask.value(1), true); + assert!(mask.value(1)); assert!(mask.is_valid(2)); - assert_eq!(mask.value(2), false); + assert!(!mask.value(2)); assert!(!mask.is_valid(3)); } @@ -237,13 +237,13 @@ mod tests { assert_eq!(mask.len(), 4); assert!(mask.is_valid(0)); - assert_eq!(mask.value(0), true); + assert!(mask.value(0)); assert!(mask.is_valid(1)); - assert_eq!(mask.value(1), false); + assert!(!mask.value(1)); assert!(mask.is_valid(2)); - assert_eq!(mask.value(2), false); + assert!(!mask.value(2)); assert!(!mask.is_valid(3)); } diff --git a/h3ronpy/Cargo.toml b/h3ronpy/Cargo.toml index 4a39a44..14c34bc 100644 --- a/h3ronpy/Cargo.toml +++ b/h3ronpy/Cargo.toml @@ -14,17 +14,27 @@ name = "h3ronpy" crate-type = ["cdylib"] [dependencies] -arrow = { workspace = true, features = ["pyarrow"] } +arrow = { workspace = true } env_logger = "^0.11" geo-types = { workspace = true } geo = { workspace = true } h3arrow = { path = "../crates/h3arrow", features = ["geoarrow", "rayon"] } hashbrown = "0.14" itertools = "0.13" -ndarray = { version = "0.15", features = ["rayon"] } -numpy = "0.21" +ndarray = { version = "0.16", features = ["rayon"] } +numpy = "0.22" ordered-float = ">=2.0.1" -py_geo_interface = { version = "0.8", features = ["f64", "wkb"] } -pyo3 = { version = "^0.21", features = ["extension-module", "abi3", "abi3-py39"] } -rasterh3 = { version = "^0.8", features = ["rayon"] } +py_geo_interface = { git = "https://github.com/nmandery/py_geo_interface", rev = "36723cdbabc2a7aad1746a8c06db17b4e39ce3b9", features = [ + "f64", + "wkb", +] } +pyo3 = { version = "^0.22", features = [ + "extension-module", + "abi3", + "abi3-py39", +] } +pyo3-arrow = { version = "0.5.1", default-features = false } +rasterh3 = { git = "https://github.com/kylebarron/rasterh3", branch = "kyle/bump-ndarray", features = [ + "rayon", +] } rayon = { workspace = true } diff --git a/h3ronpy/pyproject.toml b/h3ronpy/pyproject.toml index 412f92e..4f406d9 100644 --- a/h3ronpy/pyproject.toml +++ b/h3ronpy/pyproject.toml @@ -1,28 +1,30 @@ [build-system] -requires = [ - "maturin>=1.7", -] +requires = ["maturin>=1.7"] build-backend = "maturin" [tool.pytest.ini_options] minversion = "6.0" addopts = "--doctest-modules -v -s" -testpaths = [ - "tests" -] +testpaths = ["tests"] [tool.ruff] # Never enforce `E501` (line length violations). ignore = ["E501"] +select = [ + # Pyflakes + "F", + # Pycodestyle + # "E", + "W", + # isort + "I", +] [project] name = "h3ronpy" readme = "../README.rst" -dependencies = [ - "numpy<2", - "pyarrow>=17.0" -] +dependencies = ["numpy", "arro3-core>=0.4"] classifiers = [ "Programming Language :: Python :: 3", "Topic :: Scientific/Engineering :: GIS", @@ -31,20 +33,17 @@ classifiers = [ [project.optional-dependencies] -polars = [ - "polars>=1" -] -pandas = [ - "geopandas>=1" -] +polars = ["polars>=1"] +pandas = ["geopandas>=1"] test = [ - "rasterio", + "rasterio>=1.4", "Shapely>=1.7", "pytest>=6", - "h3>=3.7", - "pytest-benchmark" + "h3<4", + "pytest-benchmark", + "pyarrow>=15", ] [tool.maturin] python-source = "python" -module-name = "h3ronpy.h3ronpyrs" \ No newline at end of file +module-name = "h3ronpy.h3ronpyrs" diff --git a/h3ronpy/python/h3ronpy/__init__.py b/h3ronpy/python/h3ronpy/__init__.py index 5af66a1..e161dc8 100644 --- a/h3ronpy/python/h3ronpy/__init__.py +++ b/h3ronpy/python/h3ronpy/__init__.py @@ -8,7 +8,10 @@ if not _native.is_release_build(): import warnings - warnings.warn("h3ronpy has not been compiled in release mode. Performance will be degraded.", RuntimeWarning) + warnings.warn( + "h3ronpy has not been compiled in release mode. Performance will be degraded.", + RuntimeWarning, + ) __all__ = [ diff --git a/h3ronpy/python/h3ronpy/arrow/__init__.py b/h3ronpy/python/h3ronpy/arrow.py similarity index 71% rename from h3ronpy/python/h3ronpy/arrow/__init__.py rename to h3ronpy/python/h3ronpy/arrow.py index c99f61a..5edaed4 100644 --- a/h3ronpy/python/h3ronpy/arrow/__init__.py +++ b/h3ronpy/python/h3ronpy/arrow.py @@ -1,35 +1,50 @@ -from typing import Union +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast + +from arro3.core import Array, ChunkedArray, DataType, RecordBatch +from arro3.core.types import ( + ArrowArrayExportable, + ArrowSchemaExportable, + ArrowStreamExportable, +) -import pyarrow as pa from h3ronpy.h3ronpyrs import op -try: +if TYPE_CHECKING: import polars as pl - _HAS_POLARS = True -except ImportError: - _HAS_POLARS = False +def _to_arrow_array( + arr: Union[ArrowArrayExportable, ArrowStreamExportable, pl.Series, Sequence[Any]], + dtype: Optional[ArrowSchemaExportable] = None, +) -> Array: + if hasattr(arr, "__arrow_c_array__"): + array = Array.from_arrow(cast(ArrowArrayExportable, arr)) + elif hasattr(arr, "__arrow_c_stream__"): + ca = ChunkedArray.from_arrow(cast(ArrowStreamExportable, arr)) + array = ca.combine_chunks() + elif hasattr(arr, "to_arrow"): + ca = ChunkedArray.from_arrow(arr.to_arrow()) # type: ignore + array = ca.combine_chunks() + elif dtype is not None: + # From arbitrary non-arrow input + array = Array(cast(Sequence[Any], arr), type=dtype) + else: + raise ValueError("Unsupported input to _to_arrow_array. Expected array-like or series-like.") -def _to_arrow_array(arr, dtype) -> pa.Array: - converted = None - if _HAS_POLARS: - if isinstance(arr, pl.Series): - converted = arr.to_arrow() + # Cast if dtype was provided + if dtype is not None: + array = array.cast(dtype) - if converted is None: - converted = pa.array(arr, type=dtype) + return array - if isinstance(arr, pa.ChunkedArray): - converted = converted.combine_chunks() - return converted +def _to_uint64_array(arr) -> Array: + return _to_arrow_array(arr, DataType.uint64()) -def _to_uint64_array(arr) -> pa.Array: - return _to_arrow_array(arr, pa.uint64()) - -def change_resolution(arr, resolution: int) -> pa.Array: +def change_resolution(arr, resolution: int) -> Array: """ Change the H3 resolutions of all contained values to `resolution`. @@ -41,7 +56,7 @@ def change_resolution(arr, resolution: int) -> pa.Array: return op.change_resolution(_to_uint64_array(arr), resolution) -def change_resolution_list(arr, resolution: int) -> pa.Array: +def change_resolution_list(arr, resolution: int) -> Array: """ Change the H3 resolutions of all contained values to `resolution`. @@ -53,7 +68,7 @@ def change_resolution_list(arr, resolution: int) -> pa.Array: return op.change_resolution_list(_to_uint64_array(arr), resolution) -def change_resolution_paired(arr, resolution: int) -> pa.Table: +def change_resolution_paired(arr, resolution: int) -> RecordBatch: """ Returns a table/dataframe with two columns: `cell_before` and `cell_after` with the cells h3index before and after the resolution change. @@ -64,7 +79,7 @@ def change_resolution_paired(arr, resolution: int) -> pa.Table: return op.change_resolution_paired(_to_uint64_array(arr), resolution) -def cells_resolution(arr) -> pa.Array: +def cells_resolution(arr) -> Array: """ Generates a new array containing the resolution of each cell of the input array. @@ -75,7 +90,7 @@ def cells_resolution(arr) -> pa.Array: return op.cells_resolution(_to_uint64_array(arr)) -def cells_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: +def cells_parse(arr, set_failing_to_invalid: bool = False) -> Array: """ Parse H3 cells from string arrays. @@ -89,10 +104,13 @@ def cells_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: * numeric integer strings (Example: ``600436454824345599``) * strings like ``[x], [y], [resolution]`` or ``[x]; [y]; [resolution]``. (Example: ``10.2,45.5,5``) """ - return op.cells_parse(_to_arrow_array(arr, pa.utf8()), set_failing_to_invalid=set_failing_to_invalid) + return op.cells_parse( + _to_arrow_array(arr, DataType.utf8()), + set_failing_to_invalid=set_failing_to_invalid, + ) -def vertexes_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: +def vertexes_parse(arr, set_failing_to_invalid: bool = False) -> Array: """ Parse H3 vertexes from string arrays. @@ -100,10 +118,13 @@ def vertexes_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: the successful parsing of an individual element. Having this set to false will cause the method to fail upon encountering the first unparsable value. """ - return op.vertexes_parse(_to_arrow_array(arr, pa.utf8()), set_failing_to_invalid=set_failing_to_invalid) + return op.vertexes_parse( + _to_arrow_array(arr, DataType.utf8()), + set_failing_to_invalid=set_failing_to_invalid, + ) -def directededges_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: +def directededges_parse(arr, set_failing_to_invalid: bool = False) -> Array: """ Parse H3 directed edges from string arrays. @@ -111,10 +132,13 @@ def directededges_parse(arr, set_failing_to_invalid: bool = False) -> pa.Array: the successful parsing of an individual element. Having this set to false will cause the method to fail upon encountering the first unparsable value. """ - return op.directededges_parse(_to_arrow_array(arr, pa.utf8()), set_failing_to_invalid=set_failing_to_invalid) + return op.directededges_parse( + _to_arrow_array(arr, DataType.utf8()), + set_failing_to_invalid=set_failing_to_invalid, + ) -def compact(arr, mixed_resolutions: bool = False) -> pa.Array: +def compact(arr, mixed_resolutions: bool = False) -> Array: """ Compact the given cells @@ -124,7 +148,7 @@ def compact(arr, mixed_resolutions: bool = False) -> pa.Array: return op.compact(_to_uint64_array(arr), mixed_resolutions=mixed_resolutions) -def uncompact(arr, target_resolution: int) -> pa.Array: +def uncompact(arr, target_resolution: int) -> Array: """ Uncompact the given cells to the resolution `target_resolution`. @@ -135,13 +159,13 @@ def uncompact(arr, target_resolution: int) -> pa.Array: def _make_h3index_valid_wrapper(fn, h3index_name, wrapper_name): - def valid_wrapper(arr, booleanarray: bool = False) -> pa.Array: + def valid_wrapper(arr, booleanarray: bool = False) -> Array: return fn(_to_uint64_array(arr), booleanarray=booleanarray) valid_wrapper.__doc__ = f""" Validate an array of potentially invalid {h3index_name} values by returning a new UInt64 array with the validity mask set accordingly. - + If `booleanarray` is set to True, a boolean array describing the validity will be returned instead. """ @@ -154,50 +178,50 @@ def valid_wrapper(arr, booleanarray: bool = False) -> pa.Array: directededges_valid = _make_h3index_valid_wrapper(op.cells_valid, "directed edge", "directededges_valid") -def grid_disk(cellarray, k: int, flatten: bool = False) -> Union[pa.ListArray, pa.Array]: +def grid_disk(cellarray, k: int, flatten: bool = False) -> Array: return op.grid_disk(_to_uint64_array(cellarray), k, flatten=flatten) -def grid_disk_distances(cellarray, k: int, flatten: bool = False) -> pa.Table: +def grid_disk_distances(cellarray, k: int, flatten: bool = False) -> RecordBatch: return op.grid_disk_distances(_to_uint64_array(cellarray), k, flatten=flatten) -def grid_disk_aggregate_k(cellarray, k: int, aggregation_method: str) -> pa.Table: +def grid_disk_aggregate_k(cellarray, k: int, aggregation_method: str) -> RecordBatch: """ Valid values for `aggregation_method` are `"min"` and `"max"`. """ return op.grid_disk_aggregate_k(_to_uint64_array(cellarray), k, aggregation_method) -def grid_ring_distances(cellarray, k_min: int, k_max: int, flatten: bool = False) -> pa.Table: +def grid_ring_distances(cellarray, k_min: int, k_max: int, flatten: bool = False) -> RecordBatch: return op.grid_ring_distances(_to_uint64_array(cellarray), k_min, k_max, flatten=flatten) -def cells_area_m2(cellarray) -> pa.Array: +def cells_area_m2(cellarray) -> Array: return op.cells_area_m2(_to_uint64_array(cellarray)) -def cells_area_km2(cellarray) -> pa.Array: +def cells_area_km2(cellarray) -> Array: return op.cells_area_km2(_to_uint64_array(cellarray)) -def cells_area_rads2(cellarray) -> pa.Array: +def cells_area_rads2(cellarray) -> Array: return op.cells_area_rads2(_to_uint64_array(cellarray)) -def cells_to_string(cellarray) -> pa.Array: +def cells_to_string(cellarray) -> Array: return op.cells_to_string(_to_uint64_array(cellarray)) -def vertexes_to_string(vertexesarray) -> pa.Array: +def vertexes_to_string(vertexesarray) -> Array: return op.vertexes_to_string(_to_uint64_array(vertexesarray)) -def directededges_to_string(directededgearray) -> pa.Array: +def directededges_to_string(directededgearray) -> Array: return op.directededges_to_string(_to_uint64_array(directededgearray)) -def cells_to_localij(cellarray, anchor, set_failing_to_invalid: bool = False) -> pa.Table: +def cells_to_localij(cellarray, anchor, set_failing_to_invalid: bool = False) -> RecordBatch: """ Produces IJ coordinates for an index anchored by an origin `anchor`. @@ -215,10 +239,14 @@ def cells_to_localij(cellarray, anchor, set_failing_to_invalid: bool = False) -> """ if type(anchor) is not int: anchor = _to_uint64_array(anchor) - return op.cells_to_localij(_to_uint64_array(cellarray), anchor, set_failing_to_invalid=set_failing_to_invalid) + return op.cells_to_localij( + _to_uint64_array(cellarray), + anchor, + set_failing_to_invalid=set_failing_to_invalid, + ) -def localij_to_cells(anchor, i, j, set_failing_to_invalid: bool = False) -> pa.Array: +def localij_to_cells(anchor, i, j, set_failing_to_invalid: bool = False) -> Array: """ Produces cells from `i` and `j` coordinates and an `anchor` cell. @@ -230,8 +258,8 @@ def localij_to_cells(anchor, i, j, set_failing_to_invalid: bool = False) -> pa.A anchor = _to_uint64_array(anchor) return op.localij_to_cells( anchor, - _to_arrow_array(i, pa.int32()), - _to_arrow_array(j, pa.int32()), + _to_arrow_array(i, DataType.int32()), + _to_arrow_array(j, DataType.int32()), set_failing_to_invalid=set_failing_to_invalid, ) diff --git a/h3ronpy/python/h3ronpy/pandas/__init__.py b/h3ronpy/python/h3ronpy/pandas/__init__.py index 99af347..d1cf2bc 100644 --- a/h3ronpy/python/h3ronpy/pandas/__init__.py +++ b/h3ronpy/python/h3ronpy/pandas/__init__.py @@ -7,79 +7,3 @@ packages need to be installed separately. """ - -from .. import arrow as _arrow -import pyarrow as pa -from functools import wraps -import pandas as pd - - -def _wrap(func, ret_type=None): - @wraps(func) - def wrapper(*args, **kw): - result = func(*args, **kw) - if isinstance(result, pa.Table): - return result.to_pandas(split_blocks=True, self_destruct=True) - elif isinstance(result, pa.Array): - return result.to_pandas() - return result - - if ret_type: - # create a copy to avoid modifying the dict of the wrapped function - wrapper.__annotations__ = dict(**wrapper.__annotations__) - wrapper.__annotations__["return"] = ret_type - return wrapper - - -change_resolution = _wrap(_arrow.change_resolution, ret_type=pd.Series) -change_resolution_list = _wrap(_arrow.change_resolution, ret_type=pd.Series) -change_resolution.__annotations__["return"] = pd.Series -change_resolution_paired = _wrap(_arrow.change_resolution_paired, ret_type=pd.DataFrame) -cells_resolution = _wrap(_arrow.cells_resolution, ret_type=pd.Series) -cells_parse = _wrap(_arrow.cells_parse, ret_type=pd.Series) -vertexes_parse = _wrap(_arrow.vertexes_parse, ret_type=pd.Series) -directededges_parse = _wrap(_arrow.directededges_parse, ret_type=pd.Series) -compact = _wrap(_arrow.compact, ret_type=pd.Series) -uncompact = _wrap(_arrow.uncompact, ret_type=pd.Series) -cells_valid = _wrap(_arrow.cells_valid, ret_type=pd.Series) -vertexes_valid = _wrap(_arrow.vertexes_valid, ret_type=pd.Series) -directededges_valid = _wrap(_arrow.directededges_valid, ret_type=pd.Series) -grid_disk = _wrap(_arrow.grid_disk, ret_type=pd.Series) -grid_disk_distances = _wrap(_arrow.grid_disk_distances, ret_type=pd.DataFrame) -grid_ring_distances = _wrap(_arrow.grid_ring_distances, ret_type=pd.DataFrame) -grid_disk_aggregate_k = _wrap(_arrow.grid_disk_aggregate_k, ret_type=pd.DataFrame) -cells_area_m2 = _wrap(_arrow.cells_area_m2, ret_type=pd.Series) -cells_area_km2 = _wrap(_arrow.cells_area_km2, ret_type=pd.Series) -cells_area_rads2 = _wrap(_arrow.cells_area_rads2, ret_type=pd.Series) -cells_to_string = _wrap(_arrow.cells_to_string, ret_type=pd.Series) -vertexes_to_string = _wrap(_arrow.vertexes_to_string, ret_type=pd.Series) -directededges_to_string = _wrap(_arrow.directededges_to_string, ret_type=pd.Series) -cells_to_localij = _wrap(_arrow.cells_to_localij, ret_type=pd.DataFrame) -localij_to_cells = _wrap(_arrow.localij_to_cells, ret_type=pd.Series) - -__all__ = [ - change_resolution.__name__, - change_resolution_list.__name__, - change_resolution_paired.__name__, - cells_resolution.__name__, - cells_parse.__name__, - vertexes_parse.__name__, - directededges_parse.__name__, - compact.__name__, - uncompact.__name__, - cells_valid.__name__, - vertexes_valid.__name__, - directededges_valid.__name__, - grid_disk.__name__, - grid_disk_distances.__name__, - grid_ring_distances.__name__, - grid_disk_aggregate_k.__name__, - cells_area_m2.__name__, - cells_area_km2.__name__, - cells_area_rads2.__name__, - cells_to_string.__name__, - vertexes_to_string.__name__, - directededges_to_string.__name__, - cells_to_localij.__name__, - localij_to_cells.__name__, -] diff --git a/h3ronpy/python/h3ronpy/pandas/raster.py b/h3ronpy/python/h3ronpy/pandas/raster.py index 0cbea5f..8890700 100644 --- a/h3ronpy/python/h3ronpy/pandas/raster.py +++ b/h3ronpy/python/h3ronpy/pandas/raster.py @@ -1,19 +1,20 @@ +import typing + import geopandas as gpd import numpy as np import pandas as pd -import typing -from ..arrow import raster as arrow_raster +from .. import raster from .vector import cells_dataframe_to_geodataframe -__doc__ = arrow_raster.__doc__ +__doc__ = raster.__doc__ -nearest_h3_resolution = arrow_raster.nearest_h3_resolution -rasterize_cells = arrow_raster.rasterize_cells +nearest_h3_resolution = raster.nearest_h3_resolution +rasterize_cells = raster.rasterize_cells def raster_to_dataframe( - in_raster: np.array, + in_raster: np.ndarray, transform, h3_resolution: int, nodata_value=None, @@ -39,8 +40,13 @@ def raster_to_dataframe( :return: pandas `DataFrame` or `GeoDataFrame` """ - df = arrow_raster.raster_to_dataframe( - in_raster, transform, h3_resolution, nodata_value=nodata_value, axis_order=axis_order, compact=compact + df = raster.raster_to_dataframe( + in_raster, + transform, + h3_resolution, + nodata_value=nodata_value, + axis_order=axis_order, + compact=compact, ).to_pandas() if geo: diff --git a/h3ronpy/python/h3ronpy/pandas/vector.py b/h3ronpy/python/h3ronpy/pandas/vector.py index 75e320d..bf99946 100644 --- a/h3ronpy/python/h3ronpy/pandas/vector.py +++ b/h3ronpy/python/h3ronpy/pandas/vector.py @@ -1,69 +1,12 @@ -from . import _wrap -from ..arrow import vector as _av -from .. import ContainmentMode -from ..arrow import util as _arrow_util -import pyarrow as pa -import pandas as pd import geopandas as gpd -from functools import wraps -from typing import Optional -from .. import H3_CRS, DEFAULT_CELL_COLUMN_NAME - - -def _geoseries_from_wkb(func, doc: Optional[str] = None, name: Optional[str] = None): - @wraps(func) - def wrapper(*args, **kw): - return gpd.GeoSeries.from_wkb(func(*args, **kw), crs=H3_CRS) - - # create a copy to avoid modifying the dict of the wrapped function - wrapper.__annotations__ = dict(**wrapper.__annotations__) - wrapper.__annotations__["return"] = gpd.GeoSeries - if doc is not None: - wrapper.__doc__ = doc - if name is not None: - wrapper.__name__ = name - - return wrapper - - -cells_to_coordinates = _wrap(_av.cells_to_coordinates, ret_type=pd.DataFrame) -coordinates_to_cells = _wrap(_av.coordinates_to_cells, ret_type=pd.Series) -cells_bounds = _av.cells_bounds -cells_bounds_arrays = _wrap(_av.cells_bounds_arrays, ret_type=pd.DataFrame) -cells_to_wkb_polygons = _wrap(_av.cells_to_wkb_polygons, ret_type=pd.Series) -cells_to_polygons = _geoseries_from_wkb( - cells_to_wkb_polygons, - doc="Create a geoseries containing the polygon geometries of a cell array", - name="cells_to_polygons", -) -cells_to_wkb_points = _wrap(_av.cells_to_wkb_points, ret_type=pd.Series) -cells_to_points = _geoseries_from_wkb( - cells_to_wkb_points, - doc="Create a geoseries containing the centroid point geometries of a cell array", - name="cells_to_points", -) -vertexes_to_wkb_points = _wrap(_av.vertexes_to_wkb_points, ret_type=pd.Series) -vertexes_to_points = _geoseries_from_wkb( - vertexes_to_wkb_points, - doc="Create a geoseries containing the point geometries of a vertex array", - name="vertexes_to_points", -) -directededges_to_wkb_linestrings = _wrap(_av.directededges_to_wkb_linestrings, ret_type=pd.Series) -directededges_to_linestrings = _geoseries_from_wkb( - directededges_to_wkb_linestrings, - doc="Create a geoseries containing the linestrings geometries of a directededge array", - name="directededges_to_linestrings", -) -wkb_to_cells = _wrap(_av.wkb_to_cells, ret_type=pd.Series) -geometry_to_cells = _wrap(_av.geometry_to_cells, ret_type=pd.Series) - - -@wraps(wkb_to_cells) -def geoseries_to_cells(geoseries: gpd.GeoSeries, *args, **kw): - return _av.wkb_to_cells(geoseries.to_wkb(), *args, **kw).to_pandas() - +import pandas as pd +import pyarrow as pa +import shapely -geoseries_to_cells.__name__ = "geoseries_to_cells" +from h3ronpy import DEFAULT_CELL_COLUMN_NAME, H3_CRS, ContainmentMode +from h3ronpy import util as _arrow_util +from h3ronpy import vector as _av +from h3ronpy.vector import cells_to_wkb_polygons def cells_dataframe_to_geodataframe( @@ -76,7 +19,9 @@ def cells_dataframe_to_geodataframe( :param cell_column_name: name of the column containing the h3 indexes :return: GeoDataFrame """ - return gpd.GeoDataFrame(df, geometry=cells_to_polygons(df[cell_column_name]), crs=H3_CRS) + wkb_polygons = cells_to_wkb_polygons(df[cell_column_name]) + geometry = shapely.from_wkb(wkb_polygons) + return gpd.GeoDataFrame(df, geometry=geometry, crs=H3_CRS) def geodataframe_to_cells( @@ -118,21 +63,6 @@ def geodataframe_to_cells( __all__ = [ - cells_to_coordinates.__name__, - coordinates_to_cells.__name__, - cells_bounds.__name__, - cells_bounds_arrays.__name__, - cells_to_wkb_polygons.__name__, - cells_to_polygons.__name__, - cells_to_wkb_points.__name__, - cells_to_points.__name__, - vertexes_to_wkb_points.__name__, - vertexes_to_points.__name__, - directededges_to_wkb_linestrings.__name__, - directededges_to_linestrings.__name__, cells_dataframe_to_geodataframe.__name__, - wkb_to_cells.__name__, - geometry_to_cells.__name__, - geoseries_to_cells.__name__, geodataframe_to_cells.__name__, ] diff --git a/h3ronpy/python/h3ronpy/polars/__init__.py b/h3ronpy/python/h3ronpy/polars/__init__.py index a0fe60a..6431be8 100644 --- a/h3ronpy/python/h3ronpy/polars/__init__.py +++ b/h3ronpy/python/h3ronpy/polars/__init__.py @@ -8,52 +8,32 @@ """ -from functools import wraps +from __future__ import annotations + import typing +from functools import wraps + import polars as pl -import pyarrow as pa -from .. import arrow as _arrow +from arro3.core import ChunkedArray +from arro3.core.types import ArrowArrayExportable + +import h3ronpy.arrow as _arrow -def _wrap(func, ret_type=None): +# Wrapper for calling arrow-based operations on polars Series. +def _wrap(func: typing.Callable[..., ArrowArrayExportable]): @wraps(func, updated=()) def wrapper(*args, **kw): - result = func(*args, **kw) - if isinstance(result, pa.Table) or isinstance(result, pa.Array): - return pl.from_arrow(result) - return result - - if ret_type: - # create a copy to avoid modifying the dict of the wrapped function - wrapper.__annotations__ = dict(**wrapper.__annotations__) - wrapper.__annotations__["return"] = ret_type - return wrapper + # This _should_ always be a contiguous single-chunk Series already, because + # we're inside map_batches. So combine_chunks should be free. + ca = ChunkedArray.from_arrow(args[0]) + array = ca.combine_chunks() + new_args = list(args) + new_args[0] = array + result = func(*new_args, **kw) + return pl.Series(result) - -change_resolution = _wrap(_arrow.change_resolution, ret_type=pl.Series) -change_resolution_list = _wrap(_arrow.change_resolution, ret_type=pl.Series) -change_resolution_paired = _wrap(_arrow.change_resolution_paired, ret_type=pl.DataFrame) -cells_resolution = _wrap(_arrow.cells_resolution, ret_type=pl.Series) -cells_parse = _wrap(_arrow.cells_parse, ret_type=pl.Series) -vertexes_parse = _wrap(_arrow.vertexes_parse, ret_type=pl.Series) -directededges_parse = _wrap(_arrow.directededges_parse, ret_type=pl.Series) -compact = _wrap(_arrow.compact, ret_type=pl.Series) -uncompact = _wrap(_arrow.uncompact, ret_type=pl.Series) -cells_valid = _wrap(_arrow.cells_valid, ret_type=pl.Series) -vertexes_valid = _wrap(_arrow.vertexes_valid, ret_type=pl.Series) -directededges_valid = _wrap(_arrow.directededges_valid, ret_type=pl.Series) -grid_disk = _wrap(_arrow.grid_disk, ret_type=pl.Series) -grid_disk_distances = _wrap(_arrow.grid_disk_distances, ret_type=pl.DataFrame) -grid_ring_distances = _wrap(_arrow.grid_ring_distances, ret_type=pl.DataFrame) -grid_disk_aggregate_k = _wrap(_arrow.grid_disk_aggregate_k, ret_type=pl.DataFrame) -cells_area_m2 = _wrap(_arrow.cells_area_m2, ret_type=pl.Series) -cells_area_km2 = _wrap(_arrow.cells_area_km2, ret_type=pl.Series) -cells_area_rads2 = _wrap(_arrow.cells_area_rads2, ret_type=pl.Series) -cells_to_string = _wrap(_arrow.cells_to_string, ret_type=pl.Series) -vertexes_to_string = _wrap(_arrow.vertexes_to_string, ret_type=pl.Series) -directededges_to_string = _wrap(_arrow.directededges_to_string, ret_type=pl.Series) -cells_to_localij = _wrap(_arrow.cells_to_localij, ret_type=pl.DataFrame) -localij_to_cells = _wrap(_arrow.localij_to_cells, ret_type=pl.Series) + return wrapper @pl.api.register_expr_namespace("h3") @@ -68,71 +48,74 @@ class H3Expr: def __init__(self, expr: pl.Expr): self._expr = expr - def __expr_map_series(self, func: typing.Callable[[pl.Series], pl.Series]) -> pl.Expr: + def __expr_map_series(self, func: typing.Callable[..., ArrowArrayExportable]) -> pl.Expr: + wrapped_func = _wrap(func) + if hasattr(self._expr, "map"): # polars < 1.0 - return self._expr.map(func) - return self._expr.map_batches(func) + return self._expr.map(wrapped_func) + + return self._expr.map_batches(wrapped_func) def cells_resolution(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_resolution(s)).alias("resolution") + return self.__expr_map_series(_arrow.cells_resolution).alias("resolution") def change_resolution(self, resolution: int) -> pl.Expr: - return self.__expr_map_series(lambda s: change_resolution(s, resolution)) + return self.__expr_map_series(lambda s: _arrow.change_resolution(s, resolution)) def change_resolution_list(self, resolution: int) -> pl.Expr: - return self.__expr_map_series(lambda s: change_resolution_list(s, resolution)) + return self.__expr_map_series(lambda s: _arrow.change_resolution_list(s, resolution)) def cells_parse(self, set_failing_to_invalid: bool = False) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_parse(s, set_failing_to_invalid=set_failing_to_invalid)).alias( - "cell" - ) + return self.__expr_map_series( + lambda s: _arrow.cells_parse(s, set_failing_to_invalid=set_failing_to_invalid) + ).alias("cell") def vertexes_parse(self, set_failing_to_invalid: bool = False) -> pl.Expr: - return self.__expr_map_series(lambda s: vertexes_parse(s, set_failing_to_invalid=set_failing_to_invalid)).alias( - "vertex" - ) + return self.__expr_map_series( + lambda s: _arrow.vertexes_parse(s, set_failing_to_invalid=set_failing_to_invalid) + ).alias("vertex") def directededges_parse(self, set_failing_to_invalid: bool = False) -> pl.Expr: return self.__expr_map_series( - lambda s: directededges_parse(s, set_failing_to_invalid=set_failing_to_invalid) + lambda s: _arrow.directededges_parse(s, set_failing_to_invalid=set_failing_to_invalid) ).alias("directededge") def grid_disk(self, k: int, flatten: bool = False) -> pl.Expr: - return self.__expr_map_series(lambda s: grid_disk(s, k, flatten=flatten)) + return self.__expr_map_series(lambda s: _arrow.grid_disk(s, k, flatten=flatten)) def compact(self, mixed_resolutions: bool = False) -> pl.Expr: - return self.__expr_map_series(lambda s: compact(s, mixed_resolutions=mixed_resolutions)) + return self.__expr_map_series(lambda s: _arrow.compact(s, mixed_resolutions=mixed_resolutions)) def uncompact(self, target_resolution: int) -> pl.Expr: - return self.__expr_map_series(lambda s: uncompact(s, target_resolution)) + return self.__expr_map_series(lambda s: _arrow.uncompact(s, target_resolution)) def cells_area_m2(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_area_m2(s)).alias("area_m2") + return self.__expr_map_series(_arrow.cells_area_m2).alias("area_m2") def cells_area_km2(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_area_km2(s)).alias("area_km2") + return self.__expr_map_series(_arrow.cells_area_km2).alias("area_km2") def cells_area_rads2(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_area_rads2(s)).alias("area_rads2") + return self.__expr_map_series(_arrow.cells_area_rads2).alias("area_rads2") def cells_valid(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_valid(s)).alias("cells_valid") + return self.__expr_map_series(_arrow.cells_valid).alias("cells_valid") def vertexes_valid(self) -> pl.Expr: - return self.__expr_map_series(lambda s: vertexes_valid(s)).alias("vertexes_valid") + return self.__expr_map_series(_arrow.vertexes_valid).alias("vertexes_valid") def directededges_valid(self) -> pl.Expr: - return self.__expr_map_series(lambda s: directededges_valid(s)).alias("directededges_valid") + return self.__expr_map_series(_arrow.directededges_valid).alias("directededges_valid") def cells_to_string(self) -> pl.Expr: - return self.__expr_map_series(lambda s: cells_to_string(s)) + return self.__expr_map_series(_arrow.cells_to_string) def vertexes_to_string(self) -> pl.Expr: - return self.__expr_map_series(lambda s: vertexes_to_string(s)) + return self.__expr_map_series(_arrow.vertexes_to_string) def directededges_to_string(self) -> pl.Expr: - return self.__expr_map_series(lambda s: directededges_to_string(s)) + return self.__expr_map_series(_arrow.directededges_to_string) @pl.api.register_series_namespace("h3") @@ -148,85 +131,61 @@ def __init__(self, s: pl.Series): self._s = s def cells_resolution(self) -> pl.Series: - return cells_resolution(self._s) + return _wrap(_arrow.cells_resolution)(self._s) def change_resolution(self, resolution: int) -> pl.Series: - return change_resolution(self._s, resolution) + return _wrap(_arrow.change_resolution)(self._s, resolution) def change_resolution_list(self, resolution: int) -> pl.Series: - return change_resolution_list(self._s, resolution) + return _wrap(_arrow.change_resolution_list)(self._s, resolution) def cells_parse(self, set_failing_to_invalid: bool = False) -> pl.Series: - return cells_parse(self._s, set_failing_to_invalid=set_failing_to_invalid) + return _wrap(_arrow.cells_parse)(self._s, set_failing_to_invalid=set_failing_to_invalid) def vertexes_parse(self, set_failing_to_invalid: bool = False) -> pl.Series: - return vertexes_parse(self._s, set_failing_to_invalid=set_failing_to_invalid) + return _wrap(_arrow.vertexes_parse)(self._s, set_failing_to_invalid=set_failing_to_invalid) def directededges_parse(self, set_failing_to_invalid: bool = False) -> pl.Series: - return directededges_parse(self._s, set_failing_to_invalid=set_failing_to_invalid) + return _wrap(_arrow.directededges_parse)(self._s, set_failing_to_invalid=set_failing_to_invalid) def grid_disk(self, k: int, flatten: bool = False) -> pl.Series: - return grid_disk(self._s, k, flatten=flatten) + return _wrap(_arrow.grid_disk)(self._s, k, flatten=flatten) def compact(self, mixed_resolutions: bool = False) -> pl.Series: - return compact(self._s, mixed_resolutions=mixed_resolutions) + return _wrap(_arrow.compact)(self._s, mixed_resolutions=mixed_resolutions) def uncompact(self, target_resolution: int) -> pl.Series: - return uncompact(self._s, target_resolution) + return _wrap(_arrow.uncompact)(self._s, target_resolution) def cells_area_m2(self) -> pl.Series: - return cells_area_m2(self._s) + return _wrap(_arrow.cells_area_m2)(self._s) def cells_area_km2(self) -> pl.Series: - return cells_area_km2(self._s) + return _wrap(_arrow.cells_area_km2)(self._s) def cells_area_rads2(self) -> pl.Series: - return cells_area_rads2(self._s) + return _wrap(_arrow.cells_area_rads2)(self._s) def cells_valid(self) -> pl.Series: - return cells_valid(self._s) + return _wrap(_arrow.cells_valid)(self._s) def vertexes_valid(self) -> pl.Series: - return vertexes_valid(self._s) + return _wrap(_arrow.vertexes_valid)(self._s) def directededges_valid(self) -> pl.Series: - return directededges_valid(self._s) + return _wrap(_arrow.directededges_valid)(self._s) def cells_to_string(self) -> pl.Series: - return cells_to_string(self._s) + return _wrap(_arrow.cells_to_string)(self._s) def vertexes_to_string(self) -> pl.Series: - return vertexes_to_string(self._s) + return _wrap(_arrow.vertexes_to_string)(self._s) def directededges_to_string(self) -> pl.Series: - return directededges_to_string(self._s) + return _wrap(_arrow.directededges_to_string)(self._s) __all__ = [ - change_resolution.__name__, - change_resolution_list.__name__, - change_resolution_paired.__name__, - cells_resolution.__name__, - cells_parse.__name__, - vertexes_parse.__name__, - directededges_parse.__name__, - compact.__name__, - uncompact.__name__, - cells_valid.__name__, - vertexes_valid.__name__, - directededges_valid.__name__, - grid_disk.__name__, - grid_disk_distances.__name__, - grid_ring_distances.__name__, - grid_disk_aggregate_k.__name__, - cells_area_m2.__name__, - cells_area_km2.__name__, - cells_area_rads2.__name__, - cells_to_string.__name__, - vertexes_to_string.__name__, - directededges_to_string.__name__, - cells_to_localij.__name__, - localij_to_cells.__name__, H3Expr.__name__, H3SeriesShortcuts.__name__, ] diff --git a/h3ronpy/python/h3ronpy/polars/raster.py b/h3ronpy/python/h3ronpy/polars/raster.py index e0aea16..f7b6b8d 100644 --- a/h3ronpy/python/h3ronpy/polars/raster.py +++ b/h3ronpy/python/h3ronpy/polars/raster.py @@ -1,11 +1,11 @@ -from ..arrow import raster as arrow_raster -from . import _wrap -import polars as pl +from h3ronpy import raster -nearest_h3_resolution = arrow_raster.nearest_h3_resolution -raster_to_dataframe = _wrap(arrow_raster.raster_to_dataframe, ret_type=pl.DataFrame) -rasterize_cells = arrow_raster.rasterize_cells +nearest_h3_resolution = raster.nearest_h3_resolution +rasterize_cells = raster.rasterize_cells -__doc__ = arrow_raster.__doc__ +__doc__ = raster.__doc__ -__all__ = [nearest_h3_resolution.__name__, raster_to_dataframe.__name__, rasterize_cells.__name__] +__all__ = [ + nearest_h3_resolution.__name__, + rasterize_cells.__name__, +] diff --git a/h3ronpy/python/h3ronpy/polars/vector.py b/h3ronpy/python/h3ronpy/polars/vector.py deleted file mode 100644 index 9c734ee..0000000 --- a/h3ronpy/python/h3ronpy/polars/vector.py +++ /dev/null @@ -1,25 +0,0 @@ -from . import _wrap -from ..arrow import vector as _av -import polars as pl - -cells_to_coordinates = _wrap(_av.cells_to_coordinates, ret_type=pl.DataFrame) -coordinates_to_cells = _wrap(_av.coordinates_to_cells, ret_type=pl.Series) -cells_bounds = _av.cells_bounds -cells_bounds_arrays = _wrap(_av.cells_bounds_arrays, ret_type=pl.DataFrame) -cells_to_wkb_polygons = _wrap(_av.cells_to_wkb_polygons, ret_type=pl.Series) -cells_to_wkb_points = _wrap(_av.cells_to_wkb_points, ret_type=pl.Series) -vertexes_to_wkb_points = _wrap(_av.vertexes_to_wkb_points, ret_type=pl.Series) -wkb_to_cells = _wrap(_av.wkb_to_cells, ret_type=pl.Series) -geometry_to_cells = _wrap(_av.geometry_to_cells, ret_type=pl.Series) - -__all__ = [ - cells_to_coordinates.__name__, - coordinates_to_cells.__name__, - cells_bounds.__name__, - cells_bounds_arrays.__name__, - cells_to_wkb_polygons.__name__, - cells_to_wkb_points.__name__, - vertexes_to_wkb_points.__name__, - wkb_to_cells.__name__, - geometry_to_cells.__name__, -] diff --git a/h3ronpy/python/h3ronpy/arrow/raster.py b/h3ronpy/python/h3ronpy/raster.py similarity index 92% rename from h3ronpy/python/h3ronpy/arrow/raster.py rename to h3ronpy/python/h3ronpy/raster.py index 1dbb03b..bbce849 100644 --- a/h3ronpy/python/h3ronpy/arrow/raster.py +++ b/h3ronpy/python/h3ronpy/raster.py @@ -28,13 +28,15 @@ """ -from h3ronpy.h3ronpyrs import raster -from .. import DEFAULT_CELL_COLUMN_NAME -from . import _to_uint64_array, _to_arrow_array -from .vector import cells_to_wkb_polygons, cells_bounds +import typing + import numpy as np import pyarrow as pa -import typing + +from h3ronpy import DEFAULT_CELL_COLUMN_NAME +from h3ronpy.arrow import _to_arrow_array, _to_uint64_array +from h3ronpy.h3ronpyrs import raster +from h3ronpy.vector import cells_bounds, cells_to_wkb_polygons try: # affine library is used by rasterio @@ -74,7 +76,7 @@ def nearest_h3_resolution(shape, transform, axis_order="yx", search_mode="min_di def raster_to_dataframe( - in_raster: np.array, + in_raster: np.ndarray, transform, h3_resolution: int, nodata_value=None, @@ -124,14 +126,21 @@ def raster_to_dataframe( raise NotImplementedError(f"no raster_to_h3 implementation for dtype {dtype.name}") return pa.Table.from_arrays( - arrays=func(in_raster, _get_transform(transform), h3_resolution, axis_order, compact, nodata_value), + arrays=func( + in_raster, + _get_transform(transform), + h3_resolution, + axis_order, + compact, + nodata_value, + ), names=["value", DEFAULT_CELL_COLUMN_NAME], ) def rasterize_cells( cells, values, size: typing.Union[int, typing.Tuple[int, int]], nodata_value=0 -) -> (np.ndarray, typing.Tuple[float, float, float, float, float, float]): +) -> typing.Tuple[np.ndarray, typing.Tuple[float, float, float, float, float, float]]: """ Generate a raster numpy array from arrays of cells and values. @@ -145,9 +154,9 @@ def rasterize_cells( :return: 2D numpy array typed accordingly to the passed in values array, and the geotransform (WGS84 coordinate system, ordering used by the affine library and rasterio) """ - from rasterio.transform import from_bounds - from rasterio.features import rasterize import shapely + from rasterio.features import rasterize + from rasterio.transform import from_bounds cells = _to_uint64_array(cells) values = _to_arrow_array(values, None) @@ -189,7 +198,7 @@ def rasterize_cells( value = value.as_py() # linking cells should speed up rendering in case of large homogenous areas - polygons = cells_to_wkb_polygons(cells, link_cells=True) + polygons = pa.array(cells_to_wkb_polygons(pa.array(cells), link_cells=True)) polygons = [shapely.from_wkb(polygon.as_py()) for polygon in polygons.filter(polygons.is_valid())] # draw diff --git a/h3ronpy/python/h3ronpy/arrow/util.py b/h3ronpy/python/h3ronpy/util.py similarity index 85% rename from h3ronpy/python/h3ronpy/arrow/util.py rename to h3ronpy/python/h3ronpy/util.py index e6dcb66..000fbf3 100644 --- a/h3ronpy/python/h3ronpy/arrow/util.py +++ b/h3ronpy/python/h3ronpy/util.py @@ -1,5 +1,5 @@ -import pyarrow.compute as pc import pyarrow as pa +import pyarrow.compute as pc # from https://issues.apache.org/jira/browse/ARROW-12099 @@ -16,7 +16,10 @@ def explode_table_include_null(table: pa.Table, column: str) -> pa.Table: # Using RuntimeWarning as ResourceWarning is often not displayed to the user. import warnings - warnings.warn("This ArrowIndexError may be a sign of the process running out of memory.", RuntimeWarning) + warnings.warn( + "This ArrowIndexError may be a sign of the process running out of memory.", + RuntimeWarning, + ) raise result = result.append_column( pa.field(column, table.schema.field(column).type.value_type), diff --git a/h3ronpy/python/h3ronpy/arrow/vector.py b/h3ronpy/python/h3ronpy/vector.py similarity index 83% rename from h3ronpy/python/h3ronpy/arrow/vector.py rename to h3ronpy/python/h3ronpy/vector.py index c8a3275..0884efa 100644 --- a/h3ronpy/python/h3ronpy/arrow/vector.py +++ b/h3ronpy/python/h3ronpy/vector.py @@ -1,18 +1,20 @@ +from typing import Optional, Tuple + +from arro3.core import Array, DataType, RecordBatch + +from h3ronpy import ContainmentMode +from h3ronpy.arrow import _to_arrow_array, _to_uint64_array from h3ronpy.h3ronpyrs import vector -from .. import ContainmentMode -from . import _to_uint64_array, _HAS_POLARS, _to_arrow_array -from typing import Optional, Tuple, Union -import pyarrow as pa -def cells_to_coordinates(arr, radians: bool = False) -> pa.Table: +def cells_to_coordinates(arr, radians: bool = False) -> RecordBatch: """ convert to point coordinates in degrees """ return vector.cells_to_coordinates(_to_uint64_array(arr), radians=radians) -def coordinates_to_cells(latarray, lngarray, resarray, radians: bool = False) -> pa.Array: +def coordinates_to_cells(latarray, lngarray, resarray, radians: bool = False) -> Array: """ Convert coordinates arrays to cells. @@ -25,9 +27,12 @@ def coordinates_to_cells(latarray, lngarray, resarray, radians: bool = False) -> if type(resarray) in (int, float): res = int(resarray) else: - res = _to_arrow_array(resarray, pa.uint8()) + res = _to_arrow_array(resarray, DataType.uint8()) return vector.coordinates_to_cells( - _to_arrow_array(latarray, pa.float64()), _to_arrow_array(lngarray, pa.float64()), res, radians=radians + _to_arrow_array(latarray, DataType.float64()), + _to_arrow_array(lngarray, DataType.float64()), + res, + radians=radians, ) @@ -38,7 +43,7 @@ def cells_bounds(arr) -> Optional[Tuple]: return vector.cells_bounds(_to_uint64_array(arr)) -def cells_bounds_arrays(arr) -> pa.Table: +def cells_bounds_arrays(arr) -> RecordBatch: """ Build a table/dataframe with the columns `minx`, `miny`, `maxx` and `maxy` containing the bounds of the individual cells from the input array. @@ -46,7 +51,7 @@ def cells_bounds_arrays(arr) -> pa.Table: return vector.cells_bounds_arrays(_to_uint64_array(arr)) -def cells_to_wkb_polygons(arr, radians: bool = False, link_cells: bool = False) -> pa.Array: +def cells_to_wkb_polygons(arr, radians: bool = False, link_cells: bool = False) -> Array: """ Convert cells to polygons. @@ -60,7 +65,7 @@ def cells_to_wkb_polygons(arr, radians: bool = False, link_cells: bool = False) return vector.cells_to_wkb_polygons(_to_uint64_array(arr), radians=radians, link_cells=link_cells) -def cells_to_wkb_points(arr, radians: bool = False) -> pa.Array: +def cells_to_wkb_points(arr, radians: bool = False) -> Array: """ Convert cells to points using their centroids. @@ -72,7 +77,7 @@ def cells_to_wkb_points(arr, radians: bool = False) -> pa.Array: return vector.cells_to_wkb_points(_to_uint64_array(arr), radians=radians) -def vertexes_to_wkb_points(arr, radians: bool = False) -> pa.Array: +def vertexes_to_wkb_points(arr, radians: bool = False) -> Array: """ Convert vertexes to points. @@ -84,7 +89,7 @@ def vertexes_to_wkb_points(arr, radians: bool = False) -> pa.Array: return vector.vertexes_to_wkb_points(_to_uint64_array(arr), radians=radians) -def directededges_to_wkb_linestrings(arr, radians: bool = False) -> pa.Array: +def directededges_to_wkb_linestrings(arr, radians: bool = False) -> Array: """ Convert directed edges to linestrings. @@ -102,7 +107,7 @@ def wkb_to_cells( containment_mode: ContainmentMode = ContainmentMode.ContainsCentroid, compact: bool = False, flatten: bool = False, -) -> Union[pa.Array, pa.ListArray]: +) -> Array: """ Convert a Series/Array/List of WKB values to H3 cells. @@ -117,17 +122,7 @@ def wkb_to_cells( of that cell are part of the set. :param flatten: Return a non-nested cell array instead of a list array. """ - if _HAS_POLARS: - import polars as pl - - if isinstance(arr, pl.Series): - arr = arr.to_arrow() - - if not isinstance(arr, pa.LargeBinaryArray): - arr = pa.array(arr, type=pa.large_binary()) - - if isinstance(arr, pa.ChunkedArray): - arr = arr.combine_chunks() + arr = _to_arrow_array(arr, DataType.binary()) return vector.wkb_to_cells( arr, resolution, @@ -142,7 +137,7 @@ def geometry_to_cells( resolution: int, containment_mode: ContainmentMode = ContainmentMode.ContainsCentroid, compact: bool = False, -) -> pa.Array: +) -> Array: """ Convert a single object which supports the python `__geo_interface__` protocol to H3 cells diff --git a/h3ronpy/src/array.rs b/h3ronpy/src/array.rs new file mode 100644 index 0000000..47d98b5 --- /dev/null +++ b/h3ronpy/src/array.rs @@ -0,0 +1,119 @@ +use std::sync::Arc; + +use arrow::datatypes::{DataType, Field}; +use h3arrow::array::{CellIndexArray, DirectedEdgeIndexArray, VertexIndexArray}; +use pyo3::prelude::*; +use pyo3::types::{PyCapsule, PyTuple}; +use pyo3_arrow::ffi::to_array_pycapsules; + +use crate::arrow_interop::pyarray_to_cellindexarray; +use crate::resolution::PyResolution; + +#[pyclass(name = "CellArray")] +pub struct PyCellArray(CellIndexArray); + +impl PyCellArray { + pub fn into_inner(self) -> CellIndexArray { + self.0 + } +} + +#[pymethods] +impl PyCellArray { + #[pyo3(signature = (requested_schema = None))] + fn __arrow_c_array__<'py>( + &'py self, + py: Python<'py>, + requested_schema: Option>, + ) -> PyResult> { + let array = self.0.primitive_array(); + let field = Arc::new(Field::new("", DataType::UInt64, true)); + Ok(to_array_pycapsules(py, field, array, requested_schema)?) + } + + fn __len__(&self) -> usize { + self.0.len() + } + + fn parent(&self, resolution: PyResolution) -> Self { + Self(self.0.parent(resolution.into())) + } + + fn slice(&self, offset: usize, length: usize) -> Self { + Self(self.0.slice(offset, length)) + } +} + +impl AsRef for PyCellArray { + fn as_ref(&self) -> &CellIndexArray { + &self.0 + } +} + +impl<'py> FromPyObject<'py> for PyCellArray { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + Ok(Self(pyarray_to_cellindexarray(ob)?)) + } +} + +#[pyclass(name = "DirectedEdgeArray")] +pub struct PyDirectedEdgeArray(DirectedEdgeIndexArray); + +#[pymethods] +impl PyDirectedEdgeArray { + #[pyo3(signature = (requested_schema = None))] + fn __arrow_c_array__<'py>( + &'py self, + py: Python<'py>, + requested_schema: Option>, + ) -> PyResult> { + let array = self.0.primitive_array(); + let field = Arc::new(Field::new("", DataType::UInt64, true)); + Ok(to_array_pycapsules(py, field, array, requested_schema)?) + } + + fn __len__(&self) -> usize { + self.0.len() + } + + pub fn origin(&self) -> PyCellArray { + PyCellArray(self.0.origin()) + } + + pub fn destination(&self) -> PyCellArray { + PyCellArray(self.0.destination()) + } + + fn slice(&self, offset: usize, length: usize) -> Self { + Self(self.0.slice(offset, length)) + } +} + +#[pyclass(name = "VertexArray")] +pub struct PyVertexArray(VertexIndexArray); + +#[pymethods] +impl PyVertexArray { + #[pyo3(signature = (requested_schema = None))] + fn __arrow_c_array__<'py>( + &'py self, + py: Python<'py>, + requested_schema: Option>, + ) -> PyResult> { + let array = self.0.primitive_array(); + let field = Arc::new(Field::new("", DataType::UInt64, true)); + Ok(to_array_pycapsules(py, field, array, requested_schema)?) + } + + fn __len__(&self) -> usize { + self.0.len() + } + + pub fn owner(&self) -> PyCellArray { + PyCellArray(self.0.owner()) + } + + fn slice(&self, offset: usize, length: usize) -> Self { + Self(self.0.slice(offset, length)) + } +} diff --git a/h3ronpy/src/arrow_interop.rs b/h3ronpy/src/arrow_interop.rs index 058f91b..f2bcb11 100644 --- a/h3ronpy/src/arrow_interop.rs +++ b/h3ronpy/src/arrow_interop.rs @@ -1,6 +1,7 @@ -use arrow::array::{make_array, Array, ArrayData, UInt64Array}; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow}; +use arrow::array::{Array, UInt64Array}; +use pyo3_arrow::PyArray; use std::any::{type_name, Any}; +use std::sync::Arc; use h3arrow::array::{ CellIndexArray, DirectedEdgeIndexArray, H3Array, H3IndexArrayValue, VertexIndexArray, @@ -11,27 +12,18 @@ use pyo3::Python; use crate::error::{IntoPyErr, IntoPyResult}; -pub(crate) fn with_pyarrow(f: F) -> PyResult -where - F: FnOnce(Python, Bound) -> PyResult, -{ - Python::with_gil(|py| { - let pyarrow = py.import_bound("pyarrow")?; - f(py, pyarrow) - }) -} - #[inline] pub fn h3array_to_pyarray(h3array: H3Array, py: Python) -> PyResult where IX: H3IndexArrayValue, { let pa: UInt64Array = h3array.into(); - pa.into_data().into_pyarrow(py) + PyArray::from_array_ref(Arc::new(pa)).to_arro3(py) } pub(crate) fn pyarray_to_native(obj: &Bound) -> PyResult { - let array = make_array(ArrayData::from_pyarrow_bound(obj)?); + let array = obj.extract::()?; + let (array, _field) = array.into_inner(); let array = array .as_any() diff --git a/h3ronpy/src/lib.rs b/h3ronpy/src/lib.rs index 35c5f86..539ccbf 100644 --- a/h3ronpy/src/lib.rs +++ b/h3ronpy/src/lib.rs @@ -14,10 +14,12 @@ use crate::op::init_op_submodule; use crate::raster::init_raster_submodule; use crate::vector::{init_vector_submodule, PyContainmentMode}; +mod array; mod arrow_interop; mod error; mod op; mod raster; +mod resolution; mod transform; mod vector; diff --git a/h3ronpy/src/op/compact.rs b/h3ronpy/src/op/compact.rs index 37b2333..fa163b9 100644 --- a/h3ronpy/src/op/compact.rs +++ b/h3ronpy/src/op/compact.rs @@ -2,13 +2,14 @@ use h3arrow::algorithm::CompactOp; use h3arrow::export::h3o::Resolution; use pyo3::prelude::*; +use crate::array::PyCellArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; #[pyfunction] #[pyo3(signature = (cellarray, mixed_resolutions = false))] -pub(crate) fn compact(cellarray: &Bound, mixed_resolutions: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn compact(cellarray: PyCellArray, mixed_resolutions: bool) -> PyResult { + let cellindexarray = cellarray.into_inner(); let compacted = if mixed_resolutions { cellindexarray.compact_mixed_resolutions() } else { @@ -21,8 +22,8 @@ pub(crate) fn compact(cellarray: &Bound, mixed_resolutions: bool) -> PyRe #[pyfunction] #[pyo3(signature = (cellarray, target_resolution))] -pub(crate) fn uncompact(cellarray: &Bound, target_resolution: u8) -> PyResult { +pub(crate) fn uncompact(cellarray: PyCellArray, target_resolution: u8) -> PyResult { let target_resolution = Resolution::try_from(target_resolution).into_pyresult()?; - let out = pyarray_to_cellindexarray(cellarray)?.uncompact(target_resolution); + let out = cellarray.into_inner().uncompact(target_resolution); Python::with_gil(|py| h3array_to_pyarray(out, py)) } diff --git a/h3ronpy/src/op/localij.rs b/h3ronpy/src/op/localij.rs index 5ddcbee..edb52f7 100644 --- a/h3ronpy/src/op/localij.rs +++ b/h3ronpy/src/op/localij.rs @@ -1,46 +1,50 @@ -use crate::arrow_interop::{ - h3array_to_pyarray, pyarray_to_cellindexarray, pyarray_to_native, with_pyarrow, -}; +use crate::array::PyCellArray; +use crate::arrow_interop::{h3array_to_pyarray, pyarray_to_cellindexarray, pyarray_to_native}; use crate::error::IntoPyResult; -use arrow::array::{Array, Int32Array}; -use arrow::pyarrow::ToPyArrow; +use arrow::array::{Array, ArrayRef, Int32Array, RecordBatch}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::localij::{LocalIJArrays, ToLocalIJOp}; use h3arrow::array::CellIndexArray; use h3arrow::h3o::CellIndex; use pyo3::exceptions::PyValueError; use pyo3::prelude::PyAnyMethods; -use pyo3::{pyfunction, Bound, PyAny, PyObject, PyResult, Python, ToPyObject}; +use pyo3::{pyfunction, Bound, PyAny, PyObject, PyResult, Python}; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::PyRecordBatch; use std::iter::repeat; +use std::sync::Arc; #[pyfunction] #[pyo3(signature = (cellarray, anchor, set_failing_to_invalid = false))] pub(crate) fn cells_to_localij( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, anchor: &Bound, set_failing_to_invalid: bool, -) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +) -> PyArrowResult { + let cellindexarray = cellarray.into_inner(); let anchorarray = get_anchor_array(anchor, cellindexarray.len())?; let localij_arrays = cellindexarray .to_local_ij_array(anchorarray, set_failing_to_invalid) .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - localij_arrays.i.into_data().to_pyarrow(py)?, - localij_arrays.j.into_data().to_pyarrow(py)?, - localij_arrays - .anchors - .primitive_array() - .into_data() - .to_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, ["i", "j", "anchor"]))?; - Ok(table.to_object(py)) - }) + let i = localij_arrays.i.clone(); + let j = localij_arrays.j.clone(); + let anchor = localij_arrays.anchors.primitive_array().clone(); + + let schema = Schema::new(vec![ + Field::new("i", i.data_type().clone(), true), + Field::new("j", j.data_type().clone(), true), + Field::new("anchor", anchor.data_type().clone(), true), + ]); + let columns: Vec = vec![ + Arc::new(localij_arrays.i), + Arc::new(localij_arrays.j), + Arc::new(anchor), + ]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } #[pyfunction] diff --git a/h3ronpy/src/op/measure.rs b/h3ronpy/src/op/measure.rs index 3f5ccd3..26a3616 100644 --- a/h3ronpy/src/op/measure.rs +++ b/h3ronpy/src/op/measure.rs @@ -1,25 +1,26 @@ -use crate::arrow_interop::pyarray_to_cellindexarray; -use arrow::array::Array; -use arrow::pyarrow::IntoPyArrow; +use std::sync::Arc; + +use crate::array::PyCellArray; use pyo3::prelude::*; +use pyo3_arrow::PyArray; #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_m2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_m2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_m2(py: Python, cellarray: PyCellArray) -> PyResult { + let out = cellarray.as_ref().area_m2(); + PyArray::from_array_ref(Arc::new(out)).to_arro3(py) } #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_km2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_km2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_km2(py: Python, cellarray: PyCellArray) -> PyResult { + let out = cellarray.as_ref().area_km2(); + PyArray::from_array_ref(Arc::new(out)).to_arro3(py) } #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_area_rads2(cellarray: &Bound) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)?.area_rads2(); - Python::with_gil(|py| out.into_data().into_pyarrow(py)) +pub(crate) fn cells_area_rads2(py: Python, cellarray: PyCellArray) -> PyResult { + let out = cellarray.as_ref().area_rads2(); + PyArray::from_array_ref(Arc::new(out)).to_arro3(py) } diff --git a/h3ronpy/src/op/neighbor.rs b/h3ronpy/src/op/neighbor.rs index 8a556e5..d0e883e 100644 --- a/h3ronpy/src/op/neighbor.rs +++ b/h3ronpy/src/op/neighbor.rs @@ -1,10 +1,16 @@ -use arrow::array::{Array, GenericListArray, LargeListArray, PrimitiveArray, UInt32Array}; -use arrow::pyarrow::{IntoPyArrow, ToPyArrow}; +use arrow::array::{ + Array, ArrayRef, GenericListArray, LargeListArray, PrimitiveArray, RecordBatch, UInt32Array, +}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::{GridDiskDistances, GridOp, KAggregationMethod}; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::{PyObject, PyResult}; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::{PyArray, PyRecordBatch}; use std::str::FromStr; +use std::sync::Arc; +use crate::array::PyCellArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; use crate::DEFAULT_CELL_COLUMN_NAME; @@ -12,79 +18,92 @@ use pyo3::prelude::*; #[pyfunction] #[pyo3(signature = (cellarray, k, flatten = false))] -pub(crate) fn grid_disk(cellarray: &Bound, k: u32, flatten: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn grid_disk( + py: Python, + cellarray: PyCellArray, + k: u32, + flatten: bool, +) -> PyResult { + let cellindexarray = cellarray.into_inner(); let listarray = cellindexarray.grid_disk(k).into_pyresult()?; if flatten { let cellindexarray = listarray.into_flattened().into_pyresult()?; - Python::with_gil(|py| h3array_to_pyarray(cellindexarray, py)) + h3array_to_pyarray(cellindexarray, py) } else { - Python::with_gil(|py| LargeListArray::from(listarray).into_data().to_pyarrow(py)) + PyArray::from_array_ref(Arc::new(LargeListArray::from(listarray))).to_arro3(py) } } #[pyfunction] #[pyo3(signature = (cellarray, k, flatten = false))] pub(crate) fn grid_disk_distances( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, k: u32, flatten: bool, -) -> PyResult { - let griddiskdistances = pyarray_to_cellindexarray(cellarray)? +) -> PyArrowResult { + let griddiskdistances = cellarray + .into_inner() .grid_disk_distances(k) .into_pyresult()?; - return_griddiskdistances_table(griddiskdistances, flatten) + return_griddiskdistances_table(py, griddiskdistances, flatten) } #[pyfunction] #[pyo3(signature = (cellarray, k_min, k_max, flatten = false))] pub(crate) fn grid_ring_distances( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, k_min: u32, k_max: u32, flatten: bool, -) -> PyResult { +) -> PyArrowResult { if k_min >= k_max { - return Err(PyValueError::new_err("k_min must be less than k_max")); + return Err(PyValueError::new_err("k_min must be less than k_max").into()); } - let griddiskdistances = pyarray_to_cellindexarray(cellarray)? + let griddiskdistances = cellarray + .into_inner() .grid_ring_distances(k_min, k_max) .into_pyresult()?; - return_griddiskdistances_table(griddiskdistances, flatten) + return_griddiskdistances_table(py, griddiskdistances, flatten) } fn return_griddiskdistances_table( + py: Python, griddiskdistances: GridDiskDistances, flatten: bool, -) -> PyResult { - let (cells, distances) = if flatten { +) -> PyArrowResult { + let (cells, distances): (ArrayRef, ArrayRef) = if flatten { ( - PrimitiveArray::from(griddiskdistances.cells.into_flattened().into_pyresult()?) - .into_data(), - griddiskdistances - .distances - .values() - .as_any() - .downcast_ref::() - .ok_or_else(|| PyRuntimeError::new_err("expected primitivearray")) - .map(|pa| pa.clone().into_data())?, + Arc::new(PrimitiveArray::from( + griddiskdistances.cells.into_flattened().into_pyresult()?, + )), + Arc::new( + griddiskdistances + .distances + .values() + .as_any() + .downcast_ref::() + .ok_or_else(|| PyRuntimeError::new_err("expected primitivearray")) + .cloned()?, + ), ) } else { ( - GenericListArray::::from(griddiskdistances.cells).into_data(), - griddiskdistances.distances.into_data(), + Arc::new(GenericListArray::::from(griddiskdistances.cells)), + Arc::new(griddiskdistances.distances), ) }; - with_pyarrow(|py, pyarrow| { - let arrays = [cells.into_pyarrow(py)?, distances.into_pyarrow(py)?]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, [DEFAULT_CELL_COLUMN_NAME, "k"]))?; - Ok(table.to_object(py)) - }) + let schema = Schema::new(vec![ + Field::new(DEFAULT_CELL_COLUMN_NAME, cells.data_type().clone(), true), + Field::new("k", distances.data_type().clone(), true), + ]); + let columns = vec![cells, distances]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } struct KAggregationMethodWrapper(KAggregationMethod); @@ -104,24 +123,30 @@ impl FromStr for KAggregationMethodWrapper { #[pyfunction] #[pyo3(signature = (cellarray, k, aggregation_method))] pub(crate) fn grid_disk_aggregate_k( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, k: u32, aggregation_method: &str, -) -> PyResult { +) -> PyArrowResult { let aggregation_method = KAggregationMethodWrapper::from_str(aggregation_method)?; - let griddiskaggk = pyarray_to_cellindexarray(cellarray)? + let griddiskaggk = cellarray + .as_ref() .grid_disk_aggregate_k(k, aggregation_method.0) .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - h3array_to_pyarray(griddiskaggk.cells, py)?, - griddiskaggk.distances.into_data().into_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, [DEFAULT_CELL_COLUMN_NAME, "k"]))?; - Ok(table.to_object(py)) - }) + let schema = Schema::new(vec![ + Field::new( + DEFAULT_CELL_COLUMN_NAME, + griddiskaggk.cells.primitive_array().data_type().clone(), + true, + ), + Field::new("k", griddiskaggk.distances.data_type().clone(), true), + ]); + let columns: Vec = vec![ + Arc::new(griddiskaggk.cells.primitive_array().clone()), + Arc::new(griddiskaggk.distances), + ]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } diff --git a/h3ronpy/src/op/resolution.rs b/h3ronpy/src/op/resolution.rs index 26543da..b56bd4c 100644 --- a/h3ronpy/src/op/resolution.rs +++ b/h3ronpy/src/op/resolution.rs @@ -1,16 +1,21 @@ -use arrow::array::{Array, LargeListArray, PrimitiveArray}; -use arrow::pyarrow::{IntoPyArrow, ToPyArrow}; +use std::sync::Arc; + +use arrow::array::{Array, ArrayRef, LargeListArray, RecordBatch}; +use arrow::datatypes::{Field, Schema}; use h3arrow::algorithm::ChangeResolutionOp; use h3arrow::export::h3o::Resolution; use pyo3::prelude::*; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::{PyArray, PyRecordBatch}; +use crate::array::PyCellArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; use crate::DEFAULT_CELL_COLUMN_NAME; #[pyfunction] -pub(crate) fn change_resolution(cellarray: &Bound, h3_resolution: u8) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn change_resolution(cellarray: PyCellArray, h3_resolution: u8) -> PyResult { + let cellindexarray = cellarray.into_inner(); let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; let out = cellindexarray .change_resolution(h3_resolution) @@ -21,50 +26,56 @@ pub(crate) fn change_resolution(cellarray: &Bound, h3_resolution: u8) -> #[pyfunction] pub(crate) fn change_resolution_list( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, h3_resolution: u8, ) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; + let cellindexarray = cellarray.into_inner(); let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; let listarray = cellindexarray .change_resolution_list(h3_resolution) .into_pyresult()?; - Python::with_gil(|py| LargeListArray::from(listarray).into_data().to_pyarrow(py)) + PyArray::from_array_ref(Arc::new(LargeListArray::from(listarray))).to_arro3(py) } #[pyfunction] pub(crate) fn change_resolution_paired( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, h3_resolution: u8, -) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +) -> PyArrowResult { + let cellindexarray = cellarray.into_inner(); let h3_resolution = Resolution::try_from(h3_resolution).into_pyresult()?; let pair = cellindexarray .change_resolution_paired(h3_resolution) .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - h3array_to_pyarray(pair.before, py)?, - h3array_to_pyarray(pair.after, py)?, - ]; - let table = pyarrow.getattr("Table")?.call_method1( - "from_arrays", - ( - arrays, - [ - format!("{}_before", DEFAULT_CELL_COLUMN_NAME), - format!("{}_after", DEFAULT_CELL_COLUMN_NAME), - ], - ), - )?; - Ok(table.to_object(py)) - }) + let before = pair.before; + let after = pair.after; + + let schema = Schema::new(vec![ + Field::new( + format!("{}_before", DEFAULT_CELL_COLUMN_NAME), + before.primitive_array().data_type().clone(), + true, + ), + Field::new( + format!("{}_after", DEFAULT_CELL_COLUMN_NAME), + after.primitive_array().data_type().clone(), + true, + ), + ]); + let columns: Vec = vec![ + Arc::new(before.primitive_array().clone()), + Arc::new(after.primitive_array().clone()), + ]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } #[pyfunction] -pub(crate) fn cells_resolution(cellarray: &Bound) -> PyResult { - let resarray = pyarray_to_cellindexarray(cellarray)?.resolution(); - Python::with_gil(|py| PrimitiveArray::from(resarray).into_data().into_pyarrow(py)) +pub(crate) fn cells_resolution(py: Python, cellarray: PyCellArray) -> PyResult { + let resarray = cellarray.as_ref().resolution(); + PyArray::from_array_ref(Arc::new(resarray.into_inner())).to_arro3(py) } diff --git a/h3ronpy/src/op/string.rs b/h3ronpy/src/op/string.rs index 9bd1f09..5af79f9 100644 --- a/h3ronpy/src/op/string.rs +++ b/h3ronpy/src/op/string.rs @@ -1,20 +1,23 @@ -use arrow::array::{make_array, Array, ArrayData, LargeStringArray, StringArray}; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow}; +use std::sync::Arc; + +use arrow::array::{Array, LargeStringArray, StringArray}; use h3arrow::algorithm::{ParseGenericStringArray, ToGenericStringArray}; use h3arrow::array::{CellIndexArray, DirectedEdgeIndexArray, VertexIndexArray}; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3_arrow::PyArray; +use crate::array::PyCellArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; #[pyfunction] #[pyo3(signature = (stringarray, set_failing_to_invalid = false))] pub(crate) fn cells_parse( - stringarray: &Bound, + stringarray: PyArray, set_failing_to_invalid: bool, ) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); + let (boxed_array, _field) = stringarray.into_inner(); let cells = if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { CellIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) .into_pyresult()? @@ -33,10 +36,10 @@ pub(crate) fn cells_parse( #[pyfunction] #[pyo3(signature = (stringarray, set_failing_to_invalid = false))] pub(crate) fn vertexes_parse( - stringarray: &Bound, + stringarray: PyArray, set_failing_to_invalid: bool, ) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); + let (boxed_array, _field) = stringarray.into_inner(); let vertexes = if let Some(utf8array) = boxed_array.as_any().downcast_ref::() { VertexIndexArray::parse_genericstringarray(utf8array, set_failing_to_invalid) .into_pyresult()? @@ -55,10 +58,10 @@ pub(crate) fn vertexes_parse( #[pyfunction] #[pyo3(signature = (stringarray, set_failing_to_invalid = false))] pub(crate) fn directededges_parse( - stringarray: &Bound, + stringarray: PyArray, set_failing_to_invalid: bool, ) -> PyResult { - let boxed_array = make_array(ArrayData::from_pyarrow_bound(stringarray)?); + let (boxed_array, _field) = stringarray.into_inner(); let edges = if let Some(stringarray) = boxed_array.as_any().downcast_ref::() { DirectedEdgeIndexArray::parse_genericstringarray(stringarray, set_failing_to_invalid) .into_pyresult()? @@ -76,30 +79,29 @@ pub(crate) fn directededges_parse( #[pyfunction] #[pyo3(signature = (cellarray))] -pub(crate) fn cells_to_string(cellarray: &Bound) -> PyResult { - let stringarray: LargeStringArray = pyarray_to_cellindexarray(cellarray)? - .to_genericstringarray() - .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) +pub(crate) fn cells_to_string(py: Python, cellarray: PyCellArray) -> PyResult { + let stringarray: LargeStringArray = + cellarray.as_ref().to_genericstringarray().into_pyresult()?; + PyArray::from_array_ref(Arc::new(stringarray)).to_arro3(py) } #[pyfunction] #[pyo3(signature = (vertexarray))] -pub(crate) fn vertexes_to_string(vertexarray: &Bound) -> PyResult { +pub(crate) fn vertexes_to_string(py: Python, vertexarray: &Bound) -> PyResult { let stringarray: LargeStringArray = pyarray_to_vertexindexarray(vertexarray)? .to_genericstringarray() .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) + PyArray::from_array_ref(Arc::new(stringarray)).to_arro3(py) } #[pyfunction] #[pyo3(signature = (directededgearray))] -pub(crate) fn directededges_to_string(directededgearray: &Bound) -> PyResult { +pub(crate) fn directededges_to_string( + py: Python, + directededgearray: &Bound, +) -> PyResult { let stringarray: LargeStringArray = pyarray_to_directededgeindexarray(directededgearray)? .to_genericstringarray() .into_pyresult()?; - - Python::with_gil(|py| stringarray.into_data().into_pyarrow(py)) + PyArray::from_array_ref(Arc::new(stringarray)).to_arro3(py) } diff --git a/h3ronpy/src/op/valid.rs b/h3ronpy/src/op/valid.rs index e1c7428..78710b3 100644 --- a/h3ronpy/src/op/valid.rs +++ b/h3ronpy/src/op/valid.rs @@ -1,42 +1,45 @@ +use std::sync::Arc; + use arrow::array::{Array, BooleanArray}; use arrow::buffer::NullBuffer; -use arrow::pyarrow::IntoPyArrow; use h3arrow::array::{FromIteratorWithValidity, H3Array, H3IndexArrayValue}; use h3arrow::h3o; use h3o::{CellIndex, DirectedEdgeIndex, VertexIndex}; use pyo3::prelude::*; +use pyo3_arrow::PyArray; use crate::arrow_interop::*; -fn h3index_valid(arr: &Bound, booleanarray: bool) -> PyResult +fn h3index_valid(py: Python, arr: &Bound, booleanarray: bool) -> PyResult where IX: H3IndexArrayValue, { let u64array = pyarray_to_uint64array(arr)?; let validated = H3Array::::from_iter_with_validity(u64array.iter()); - Python::with_gil(|py| { - if booleanarray { - let nullbuffer = validated - .primitive_array() - .nulls() - .cloned() - .unwrap_or_else(|| NullBuffer::new_valid(validated.len())); - BooleanArray::from(nullbuffer.into_inner()) - .into_data() - .into_pyarrow(py) - } else { - h3array_to_pyarray(validated, py) - } - }) + if booleanarray { + let nullbuffer = validated + .primitive_array() + .nulls() + .cloned() + .unwrap_or_else(|| NullBuffer::new_valid(validated.len())); + let bools = BooleanArray::from(nullbuffer.into_inner()); + PyArray::from_array_ref(Arc::new(bools)).to_arro3(py) + } else { + h3array_to_pyarray(validated, py) + } } macro_rules! impl_h3index_valid { ($name:ident, $arr_type:ty) => { #[pyfunction] #[pyo3(signature = (array, booleanarray = false))] - pub(crate) fn $name(array: &Bound, booleanarray: bool) -> PyResult { - h3index_valid::<$arr_type>(array, booleanarray) + pub(crate) fn $name( + py: Python, + array: &Bound, + booleanarray: bool, + ) -> PyResult { + h3index_valid::<$arr_type>(py, array, booleanarray) } }; } diff --git a/h3ronpy/src/raster.rs b/h3ronpy/src/raster.rs index 2de5fa8..4ceea90 100644 --- a/h3ronpy/src/raster.rs +++ b/h3ronpy/src/raster.rs @@ -1,13 +1,14 @@ use geo_types::Point; +use pyo3_arrow::PyArray; use std::hash::Hash; use std::iter::repeat; use std::str::FromStr; +use std::sync::Arc; use arrow::array::{ - Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, + Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }; -use arrow::pyarrow::IntoPyArrow; use geo::{AffineOps, AffineTransform}; use h3arrow::array::CellIndexArray; use h3arrow::export::h3o::{CellIndex, Resolution}; @@ -141,7 +142,9 @@ where macro_rules! make_raster_to_h3_variant { ($name:ident, $dtype:ty, $array_dtype:ty) => { #[pyfunction] + #[pyo3(signature = (np_array, transform, h3_resolution, axis_order_str, compact, nodata_value=None))] fn $name( + py: Python, np_array: PyReadonlyArray2<$dtype>, transform: &Transform, h3_resolution: u8, @@ -159,12 +162,11 @@ macro_rules! make_raster_to_h3_variant { compact, )?; - Python::with_gil(|py| { - let values = <$array_dtype>::from(values).into_data().into_pyarrow(py)?; - let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; + let values = <$array_dtype>::from(values); + let values = PyArray::from_array_ref(Arc::new(values)).to_arro3(py)?; + let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; - Ok((values, cells)) - }) + Ok((values, cells)) } }; } @@ -172,7 +174,9 @@ macro_rules! make_raster_to_h3_variant { macro_rules! make_raster_to_h3_float_variant { ($name:ident, $dtype:ty, $array_dtype:ty) => { #[pyfunction] + #[pyo3(signature = (np_array, transform, h3_resolution, axis_order_str, compact, nodata_value=None))] fn $name( + py: Python, np_array: PyReadonlyArray2<$dtype>, transform: &Transform, h3_resolution: u8, @@ -193,13 +197,12 @@ macro_rules! make_raster_to_h3_float_variant { compact, )?; - Python::with_gil(|py| { - let values: Vec<$dtype> = values.into_iter().map(|v| v.into_inner()).collect(); - let values = <$array_dtype>::from(values).into_data().into_pyarrow(py)?; - let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; + let values: Vec<$dtype> = values.into_iter().map(|v| v.into_inner()).collect(); + let values = <$array_dtype>::from(values); + let values = PyArray::from_array_ref(Arc::new(values)).to_arro3(py)?; + let cells = h3array_to_pyarray(CellIndexArray::from(cells), py)?; - Ok((values, cells)) - }) + Ok((values, cells)) } }; } diff --git a/h3ronpy/src/resolution.rs b/h3ronpy/src/resolution.rs new file mode 100644 index 0000000..9014f4f --- /dev/null +++ b/h3ronpy/src/resolution.rs @@ -0,0 +1,20 @@ +use h3arrow::export::h3o::Resolution; +use pyo3::exceptions::PyValueError; +use pyo3::prelude::*; + +pub struct PyResolution(Resolution); + +impl<'py> FromPyObject<'py> for PyResolution { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let int = ob.extract::()?; + let res = + Resolution::try_from(int).map_err(|err| PyValueError::new_err(err.to_string()))?; + Ok(Self(res)) + } +} + +impl From for Resolution { + fn from(value: PyResolution) -> Self { + value.0 + } +} diff --git a/h3ronpy/src/vector.rs b/h3ronpy/src/vector.rs index 92c080d..cfcc45a 100644 --- a/h3ronpy/src/vector.rs +++ b/h3ronpy/src/vector.rs @@ -1,15 +1,18 @@ +use std::sync::Arc; + use arrow::array::{ - make_array, Array, ArrayData, BinaryArray, Float64Array, GenericBinaryArray, GenericListArray, - LargeBinaryArray, OffsetSizeTrait, UInt8Array, + ArrayRef, AsArray, Float64Array, GenericBinaryArray, GenericListArray, OffsetSizeTrait, + RecordBatch, UInt8Array, }; use arrow::buffer::NullBuffer; -use arrow::pyarrow::{FromPyArrow, IntoPyArrow, ToPyArrow}; +use arrow::datatypes::{DataType, Field, Schema}; use geo::{BoundingRect, HasDimensions}; use h3arrow::algorithm::ToCoordinatesOp; use h3arrow::array::from_geo::{ToCellIndexArray, ToCellListArray, ToCellsOptions}; use h3arrow::array::to_geoarrow::{ToWKBLineStrings, ToWKBPoints, ToWKBPolygons}; use h3arrow::array::{CellIndexArray, ResolutionArray}; use h3arrow::export::geoarrow::array::{WKBArray, WKBBuilder, WKBCapacity}; +use h3arrow::export::geoarrow::ArrayBase; use h3arrow::export::h3o::geom::{ContainmentMode, ToGeo}; use h3arrow::export::h3o::Resolution; use h3arrow::h3o::geom::PolyfillConfig; @@ -18,7 +21,10 @@ use itertools::multizip; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::PyTuple; +use pyo3_arrow::error::PyArrowResult; +use pyo3_arrow::{PyArray, PyRecordBatch}; +use crate::array::PyCellArray; use crate::arrow_interop::*; use crate::error::IntoPyResult; @@ -31,7 +37,7 @@ use crate::error::IntoPyResult; /// This is the fasted option and ensures that every cell is uniquely /// assigned (e.g. two adjacent polygon with zero overlap also have zero /// overlapping cells). -/// +/// /// On the other hand, some cells may cover area outside of the polygon /// (overshooting) and some parts of the polygon may be left uncovered. /// @@ -40,7 +46,7 @@ use crate::error::IntoPyResult; /// This ensures that every cell is uniquely assigned (e.g. two adjacent /// polygon with zero overlap also have zero overlapping cells) and avoids /// any coverage overshooting. -/// +/// /// Some parts of the polygon may be left uncovered (more than with /// `ContainsCentroid`). /// @@ -53,7 +59,7 @@ use crate::error::IntoPyResult; /// * Covers: This mode behaves the same as IntersectsBoundary, but also handles the case where the geometry is /// being covered by a cell without intersecting with its boundaries. In such cases, the covering cell is returned. /// -#[pyclass(name = "ContainmentMode")] +#[pyclass(name = "ContainmentMode", eq, eq_int)] #[derive(Copy, Clone, Eq, PartialEq)] pub enum PyContainmentMode { ContainsCentroid, @@ -81,9 +87,8 @@ impl PyContainmentMode { #[pyfunction] #[pyo3(signature = (cellarray,))] -pub(crate) fn cells_bounds(cellarray: &Bound) -> PyResult> { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; - if let Some(rect) = cellindexarray.bounding_rect() { +pub(crate) fn cells_bounds(cellarray: PyCellArray) -> PyResult> { + if let Some(rect) = cellarray.as_ref().bounding_rect() { Python::with_gil(|py| { Ok(Some( PyTuple::new_bound(py, [rect.min().x, rect.min().y, rect.max().x, rect.max().y]) @@ -97,8 +102,8 @@ pub(crate) fn cells_bounds(cellarray: &Bound) -> PyResult) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; +pub(crate) fn cells_bounds_arrays(py: Python, cellarray: PyCellArray) -> PyArrowResult { + let cellindexarray = cellarray.into_inner(); let mut minx_vec = vec![0.0f64; cellindexarray.len()]; let mut miny_vec = vec![0.0f64; cellindexarray.len()]; let mut maxx_vec = vec![0.0f64; cellindexarray.len()]; @@ -130,50 +135,46 @@ pub(crate) fn cells_bounds_arrays(cellarray: &Bound) -> PyResult = vec![ + Arc::new(Float64Array::new(minx_vec.into(), Some(validity.clone()))), + Arc::new(Float64Array::new(miny_vec.into(), Some(validity.clone()))), + Arc::new(Float64Array::new(maxx_vec.into(), Some(validity.clone()))), + Arc::new(Float64Array::new(maxy_vec.into(), Some(validity.clone()))), + ]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } #[pyfunction] #[pyo3(signature = (cellarray, radians = false))] -pub(crate) fn cells_to_coordinates(cellarray: &Bound, radians: bool) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; - +pub(crate) fn cells_to_coordinates( + py: Python, + cellarray: PyCellArray, + radians: bool, +) -> PyArrowResult { let coordinate_arrays = if radians { - cellindexarray.to_coordinates_radians() + cellarray.as_ref().to_coordinates_radians() } else { - cellindexarray.to_coordinates() + cellarray.as_ref().to_coordinates() } .into_pyresult()?; - with_pyarrow(|py, pyarrow| { - let arrays = [ - coordinate_arrays.lat.into_data().into_pyarrow(py)?, - coordinate_arrays.lng.into_data().into_pyarrow(py)?, - ]; - let table = pyarrow - .getattr("Table")? - .call_method1("from_arrays", (arrays, ["lat", "lng"]))?; - Ok(table.to_object(py)) - }) + let schema = Schema::new(vec![ + Field::new("lat", DataType::Float64, true), + Field::new("lng", DataType::Float64, true), + ]); + let columns: Vec = vec![ + Arc::new(coordinate_arrays.lat), + Arc::new(coordinate_arrays.lng), + ]; + let batch = RecordBatch::try_new(Arc::new(schema), columns)?; + Ok(PyRecordBatch::new(batch).to_arro3(py)?) } #[pyfunction] @@ -243,11 +244,12 @@ pub(crate) fn coordinates_to_cells( #[pyfunction] #[pyo3(signature = (cellarray, radians = false, link_cells = false))] pub(crate) fn cells_to_wkb_polygons( - cellarray: &Bound, + py: Python, + cellarray: PyCellArray, radians: bool, link_cells: bool, ) -> PyResult { - let cellindexarray = pyarray_to_cellindexarray(cellarray)?; + let cellindexarray = cellarray.into_inner(); let use_degrees = !radians; let out: WKBArray = if link_cells { @@ -271,22 +273,30 @@ pub(crate) fn cells_to_wkb_polygons( .expect("wkbarray") }; - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + let field = out.extension_field(); + PyArray::new(out.into_array_ref(), field).to_arro3(py) } #[pyfunction] #[pyo3(signature = (cellarray, radians = false))] -pub(crate) fn cells_to_wkb_points(cellarray: &Bound, radians: bool) -> PyResult { - let out = pyarray_to_cellindexarray(cellarray)? +pub(crate) fn cells_to_wkb_points( + py: Python, + cellarray: PyCellArray, + radians: bool, +) -> PyResult { + let out = cellarray + .as_ref() .to_wkb_points::(!radians) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + let field = out.extension_field(); + PyArray::new(out.into_array_ref(), field).to_arro3(py) } #[pyfunction] #[pyo3(signature = (vertexarray, radians = false))] pub(crate) fn vertexes_to_wkb_points( + py: Python, vertexarray: &Bound, radians: bool, ) -> PyResult { @@ -294,12 +304,14 @@ pub(crate) fn vertexes_to_wkb_points( .to_wkb_points::(!radians) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + let field = out.extension_field(); + PyArray::new(out.into_array_ref(), field).to_arro3(py) } #[pyfunction] #[pyo3(signature = (array, radians = false))] pub(crate) fn directededges_to_wkb_linestrings( + py: Python, array: &Bound, radians: bool, ) -> PyResult { @@ -307,7 +319,8 @@ pub(crate) fn directededges_to_wkb_linestrings( .to_wkb_linestrings::(!radians) .expect("wkbarray"); - Python::with_gil(|py| out.into_inner().into_data().into_pyarrow(py)) + let field = out.extension_field(); + PyArray::new(out.into_array_ref(), field).to_arro3(py) } fn get_to_cells_options( @@ -325,27 +338,36 @@ fn get_to_cells_options( #[pyfunction] #[pyo3(signature = (array, resolution, containment_mode = None, compact = false, flatten = false))] pub(crate) fn wkb_to_cells( - array: &Bound, + py: Python, + array: PyArray, resolution: u8, containment_mode: Option, compact: bool, flatten: bool, ) -> PyResult { let options = get_to_cells_options(resolution, containment_mode, compact)?; - let array_ref = make_array(ArrayData::from_pyarrow_bound(array)?); - if let Some(binarray) = array_ref.as_any().downcast_ref::() { - generic_wkb_to_cells(binarray.clone(), flatten, &options) - } else if let Some(binarray) = array_ref.as_any().downcast_ref::() { - generic_wkb_to_cells(binarray.clone(), flatten, &options) - } else { - Err(PyValueError::new_err( + match array.field().data_type() { + DataType::Binary => generic_wkb_to_cells( + py, + array.array().as_binary::().clone(), + flatten, + &options, + ), + DataType::LargeBinary => generic_wkb_to_cells( + py, + array.array().as_binary::().clone(), + flatten, + &options, + ), + _ => Err(PyValueError::new_err( "unsupported array type for WKB input", - )) + )), } } fn generic_wkb_to_cells( + py: Python, binarray: GenericBinaryArray, flatten: bool, options: &ToCellsOptions, @@ -359,7 +381,7 @@ fn generic_wkb_to_cells( } else { let listarray: GenericListArray = wkbarray.to_celllistarray(options).into_pyresult()?.into(); - Python::with_gil(|py| listarray.into_data().to_pyarrow(py)) + PyArray::from_array_ref(Arc::new(listarray)).to_arro3(py) } } diff --git a/h3ronpy/tests/polars/test_benches.py b/h3ronpy/tests/arrow/test_benches.py similarity index 94% rename from h3ronpy/tests/polars/test_benches.py rename to h3ronpy/tests/arrow/test_benches.py index ddf796c..7bc28b2 100644 --- a/h3ronpy/tests/polars/test_benches.py +++ b/h3ronpy/tests/arrow/test_benches.py @@ -1,7 +1,7 @@ -import numpy as np import h3.api.numpy_int as h3 -from h3ronpy.polars import cells_to_string +import numpy as np import polars as pl +from h3ronpy.arrow import cells_to_string def some_cells() -> np.ndarray: diff --git a/h3ronpy/tests/polars/test_compact.py b/h3ronpy/tests/arrow/test_compact.py similarity index 93% rename from h3ronpy/tests/polars/test_compact.py rename to h3ronpy/tests/arrow/test_compact.py index fbd9176..8f70d49 100644 --- a/h3ronpy/tests/polars/test_compact.py +++ b/h3ronpy/tests/arrow/test_compact.py @@ -1,8 +1,7 @@ -import pytest - -from h3ronpy.polars import compact, change_resolution, uncompact -import numpy as np import h3.api.numpy_int as h3 +import numpy as np +import pytest +from h3ronpy.arrow import change_resolution, compact, uncompact def compact_to_one(expected_cell, input_cells, **kw): diff --git a/h3ronpy/tests/polars/test_coordinates.py b/h3ronpy/tests/arrow/test_coordinates.py similarity index 68% rename from h3ronpy/tests/polars/test_coordinates.py rename to h3ronpy/tests/arrow/test_coordinates.py index de2f2d9..80dc5ba 100644 --- a/h3ronpy/tests/polars/test_coordinates.py +++ b/h3ronpy/tests/arrow/test_coordinates.py @@ -1,7 +1,12 @@ -from h3ronpy.polars.vector import cells_to_coordinates, cells_bounds, cells_bounds_arrays, coordinates_to_cells -import polars as pl -import numpy as np import h3.api.numpy_int as h3 +import numpy as np +from arro3.core import RecordBatch +from h3ronpy.vector import ( + cells_bounds, + cells_bounds_arrays, + cells_to_coordinates, + coordinates_to_cells, +) def test_cells_to_coordinates(): @@ -12,9 +17,9 @@ def test_cells_to_coordinates(): dtype=np.uint64, ) coords = cells_to_coordinates(h3indexes) - assert len(coords) == 1 - assert 10.0 < coords["lat"][0] < 11.0 - assert 45.0 < coords["lng"][0] < 46.0 + assert coords.num_rows == 1 + assert 10.0 < coords["lat"][0].as_py() < 11.0 + assert 45.0 < coords["lng"][0].as_py() < 46.0 def test_coordinates_to_cells(): @@ -62,13 +67,13 @@ def test_cells_bounds_arrays(): ) bounds_df = cells_bounds_arrays(h3indexes) assert bounds_df is not None - assert isinstance(bounds_df, pl.DataFrame) - assert len(bounds_df) == 2 - assert "minx" in bounds_df - assert "maxx" in bounds_df - assert "miny" in bounds_df - assert "maxy" in bounds_df - assert bounds_df["minx"][0] < 45.1 - assert bounds_df["maxx"][0] > 45.1 - assert bounds_df["miny"][0] < 10.3 - assert bounds_df["maxy"][0] > 10.3 + assert isinstance(bounds_df, RecordBatch) + assert bounds_df.num_rows == 2 + assert "minx" in bounds_df.schema.names + assert "maxx" in bounds_df.schema.names + assert "miny" in bounds_df.schema.names + assert "maxy" in bounds_df.schema.names + assert bounds_df["minx"][0].as_py() < 45.1 + assert bounds_df["maxx"][0].as_py() > 45.1 + assert bounds_df["miny"][0].as_py() < 10.3 + assert bounds_df["maxy"][0].as_py() > 10.3 diff --git a/h3ronpy/tests/polars/test_localij.py b/h3ronpy/tests/arrow/test_localij.py similarity index 61% rename from h3ronpy/tests/polars/test_localij.py rename to h3ronpy/tests/arrow/test_localij.py index d3ba0fe..4775c0d 100644 --- a/h3ronpy/tests/polars/test_localij.py +++ b/h3ronpy/tests/arrow/test_localij.py @@ -1,6 +1,6 @@ -from h3ronpy.polars import cells_to_localij, cells_parse, localij_to_cells -from polars.testing import assert_series_equal import polars as pl +from h3ronpy.arrow import cells_parse, cells_to_localij, localij_to_cells +from polars.testing import assert_series_equal anchors = cells_parse( [ @@ -16,16 +16,22 @@ def test_cells_to_localij_array(): df = cells_to_localij(cells, anchors) - assert len(df) == 1 - assert_series_equal(df["anchor"], anchors, check_names=False) + assert df.num_rows == 1 + + left = pl.Series(df["anchor"]) + right = pl.Series(anchors) + assert_series_equal(left, right, check_names=False) assert df["i"][0] == 25 assert df["j"][0] == 13 def test_cells_to_localij_single_anchor(): df = cells_to_localij(cells, anchors[0]) - assert len(df) == 1 - assert_series_equal(df["anchor"], anchors, check_names=False) + assert df.num_rows == 1 + + left = pl.Series(df["anchor"]) + right = pl.Series(anchors) + assert_series_equal(left, right, check_names=False) assert df["i"][0] == 25 assert df["j"][0] == 13 @@ -46,4 +52,7 @@ def test_localij_to_cells(): dtype=pl.Int32(), ), ) - assert_series_equal(cells, cells2, check_names=False) + + left = pl.Series(cells) + right = pl.Series(cells2) + assert_series_equal(left, right, check_names=False) diff --git a/h3ronpy/tests/polars/test_measure.py b/h3ronpy/tests/arrow/test_measure.py similarity index 59% rename from h3ronpy/tests/polars/test_measure.py rename to h3ronpy/tests/arrow/test_measure.py index 9e1b000..fdef5ff 100644 --- a/h3ronpy/tests/polars/test_measure.py +++ b/h3ronpy/tests/arrow/test_measure.py @@ -1,7 +1,7 @@ -import numpy as np import h3.api.numpy_int as h3 -from h3ronpy.polars import cells_area_km2 -import polars as pl +import numpy as np +from arro3.core import Array +from h3ronpy.arrow import cells_area_km2 def test_cells_area_km2(): @@ -14,8 +14,8 @@ def test_cells_area_km2(): dtype=np.uint64, ) areas = cells_area_km2(cells) - assert isinstance(areas, pl.Series) + assert isinstance(areas, Array) assert len(areas) == 3 - assert int(areas[0] * 100) == 62 - assert int(areas[1]) == 213 - assert int(areas[2]) == 10456 + assert int(areas[0].as_py() * 100) == 62 + assert int(areas[1].as_py()) == 213 + assert int(areas[2].as_py()) == 10456 diff --git a/h3ronpy/tests/polars/test_neighbor.py b/h3ronpy/tests/arrow/test_neighbor.py similarity index 50% rename from h3ronpy/tests/polars/test_neighbor.py rename to h3ronpy/tests/arrow/test_neighbor.py index dca835b..0933133 100644 --- a/h3ronpy/tests/polars/test_neighbor.py +++ b/h3ronpy/tests/arrow/test_neighbor.py @@ -1,7 +1,14 @@ -from h3ronpy.polars import grid_disk, grid_disk_distances, grid_ring_distances, grid_disk_aggregate_k -import numpy as np import h3.api.numpy_int as h3 +import numpy as np import polars as pl +import pyarrow as pa +from arro3.core import RecordBatch +from h3ronpy.arrow import ( + grid_disk, + grid_disk_aggregate_k, + grid_disk_distances, + grid_ring_distances, +) def test_grid_disk(): @@ -14,11 +21,12 @@ def test_grid_disk(): ) disks = grid_disk(h3indexes, 2) assert len(disks) == 2 - assert disks.dtype == pl.List(pl.UInt64()) + # Arro3 has some bugs to fix around data type equality for nested types + assert pa.field(disks.type).type == pa.large_list(pa.uint64()) disks_flat = grid_disk(h3indexes, 2, flatten=True) assert len(disks_flat) > 20 - assert disks_flat.dtype == pl.UInt64() + assert disks_flat.type == pa.uint64() def test_grid_disk_distances(): @@ -30,16 +38,15 @@ def test_grid_disk_distances(): dtype=np.uint64, ) disks = grid_disk_distances(h3indexes, 2) - assert type(disks) == pl.DataFrame - assert len(disks) == len(h3indexes) - assert disks["cell"].dtype == pl.List(pl.UInt64()) - assert disks["k"].dtype == pl.List(pl.UInt32()) - - centers = ( - grid_disk_distances(h3indexes, 2, flatten=True) - .lazy() - .filter(pl.col("cell").is_in(pl.Series(h3indexes))) - .collect() + assert type(disks) == RecordBatch + assert disks.num_rows == len(h3indexes) + + # Arro3 has some bugs to fix around data type equality for nested types + assert pa.field(disks["cell"].type).type == pa.large_list(pa.uint64()) + assert pa.field(disks["k"].type).type == pa.large_list(pa.uint32()) + + centers = pl.DataFrame(grid_disk_distances(h3indexes, 2, flatten=True)).filter( + pl.col("cell").is_in(pl.Series(h3indexes)) ) assert len(centers) == len(h3indexes) assert len(centers["k"].unique()) == 1 @@ -57,16 +64,15 @@ def test_grid_ring_distances(): dtype=np.uint64, ) disks = grid_ring_distances(h3indexes, 1, 2) - assert type(disks) == pl.DataFrame - assert len(disks) == len(h3indexes) - assert disks["cell"].dtype == pl.List(pl.UInt64()) - assert disks["k"].dtype == pl.List(pl.UInt32()) - - centers = ( - grid_ring_distances(h3indexes, 1, 2, flatten=True) - .lazy() - .filter(pl.col("cell").is_in(pl.Series(h3indexes))) - .collect() + assert type(disks) == RecordBatch + assert disks.num_rows == len(h3indexes) + + # Arro3 has some bugs to fix around data type equality for nested types + assert pa.field(disks["cell"].type).type == pa.large_list(pa.uint64()) + assert pa.field(disks["k"].type).type == pa.large_list(pa.uint32()) + + centers = pl.DataFrame(grid_ring_distances(h3indexes, 1, 2, flatten=True)).filter( + pl.col("cell").is_in(pl.Series(h3indexes)) ) assert len(centers) == 0 @@ -82,8 +88,8 @@ def test_grid_disk_aggregate_k(): dtype=np.uint64, ) disks = grid_disk_aggregate_k(h3indexes, 2, "max") - assert type(disks) == pl.DataFrame - assert disks["cell"].dtype == pl.UInt64() - assert disks["k"].dtype == pl.UInt32() + assert type(disks) == RecordBatch + assert disks["cell"].type == pa.uint64() + assert disks["k"].type == pa.uint32() # TODO: check values diff --git a/h3ronpy/tests/polars/test_raster.py b/h3ronpy/tests/arrow/test_raster.py similarity index 88% rename from h3ronpy/tests/polars/test_raster.py rename to h3ronpy/tests/arrow/test_raster.py index bb29711..9a064c9 100644 --- a/h3ronpy/tests/polars/test_raster.py +++ b/h3ronpy/tests/arrow/test_raster.py @@ -7,11 +7,11 @@ HAS_RASTERIO = False import numpy as np -import pytest import polars as pl - -from h3ronpy.polars.raster import raster_to_dataframe, rasterize_cells +import pyarrow as pa +import pytest from h3ronpy import DEFAULT_CELL_COLUMN_NAME, H3_CRS +from h3ronpy.raster import raster_to_dataframe, rasterize_cells from tests import TESTDATA_PATH @@ -22,18 +22,18 @@ def test_r_tiff(): band = dataset.read(1) df = raster_to_dataframe(band, dataset.transform, 8, nodata_value=0, compact=True) assert len(df) > 100 - assert df[DEFAULT_CELL_COLUMN_NAME].dtype == pl.UInt64 - assert df["value"].dtype == pl.UInt8 + assert df[DEFAULT_CELL_COLUMN_NAME].type == pa.uint64() + assert df["value"].type == pa.uint8() @pytest.mark.skipif(not HAS_RASTERIO, reason="requires rasterio") def test_r_tiff_float32(): dataset = rasterio.open(TESTDATA_PATH / "r.tiff") band = dataset.read(1).astype(np.float32) - df = raster_to_dataframe(band, dataset.transform, 8, nodata_value=np.NAN, compact=True) + df = raster_to_dataframe(band, dataset.transform, 8, nodata_value=np.nan, compact=True) assert len(df) > 100 - assert df[DEFAULT_CELL_COLUMN_NAME].dtype == pl.UInt64 - assert df["value"].dtype == pl.Float32 + assert df[DEFAULT_CELL_COLUMN_NAME].type == pa.uint64() + assert df["value"].type == pa.float32() def write_gtiff(filename, array, transform, nodata_value): diff --git a/h3ronpy/tests/pandas/test_resolution.py b/h3ronpy/tests/arrow/test_resolution.py similarity index 66% rename from h3ronpy/tests/pandas/test_resolution.py rename to h3ronpy/tests/arrow/test_resolution.py index 3c32eef..062d1b9 100644 --- a/h3ronpy/tests/pandas/test_resolution.py +++ b/h3ronpy/tests/arrow/test_resolution.py @@ -1,17 +1,17 @@ -import numpy as np import math -import h3.api.numpy_int as h3 -from h3ronpy.pandas import change_resolution, change_resolution_paired, cells_resolution +import h3.api.numpy_int as h3 +import numpy as np +from h3ronpy.arrow import cells_resolution, change_resolution, change_resolution_paired def test_change_resolution_up(): h3indexes = np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64) out_res = 9 changed = change_resolution(h3indexes, out_res) - assert changed.shape[0] == (int(math.pow(7, 4)) + 7) + assert len(changed) == (int(math.pow(7, 4)) + 7) for i in range(len(changed)): - assert h3.h3_get_resolution(changed[i]) == out_res + assert h3.h3_get_resolution(changed[i].as_py()) == out_res def test_change_resolution_paired_up(): @@ -23,19 +23,19 @@ def test_change_resolution_paired_up(): ) out_res = 9 changed_df = change_resolution_paired(h3indexes, out_res) - assert len(changed_df) == 7 - for i in range(len(changed_df)): - assert h3.h3_get_resolution(changed_df["cell_before"][i]) == 8 - assert h3.h3_get_resolution(changed_df["cell_after"][i]) == out_res + assert changed_df.num_rows == 7 + for i in range(changed_df.num_rows): + assert h3.h3_get_resolution(changed_df["cell_before"][i].as_py()) == 8 + assert h3.h3_get_resolution(changed_df["cell_after"][i].as_py()) == out_res def test_change_resolution_down(): h3indexes = np.array([h3.geo_to_h3(10.2, 45.5, 5), h3.geo_to_h3(10.3, 45.1, 8)], dtype=np.uint64) out_res = 4 changed = change_resolution(h3indexes, out_res) - assert changed.shape[0] == 2 - assert h3.h3_get_resolution(changed[0]) == out_res - assert h3.h3_get_resolution(changed[1]) == out_res + assert len(changed) == 2 + assert h3.h3_get_resolution(changed[0].as_py()) == out_res + assert h3.h3_get_resolution(changed[1].as_py()) == out_res def test_cells_resolution(): diff --git a/h3ronpy/tests/polars/test_utf8.py b/h3ronpy/tests/arrow/test_utf8.py similarity index 69% rename from h3ronpy/tests/polars/test_utf8.py rename to h3ronpy/tests/arrow/test_utf8.py index 2ee90e2..5dc9550 100644 --- a/h3ronpy/tests/polars/test_utf8.py +++ b/h3ronpy/tests/arrow/test_utf8.py @@ -1,9 +1,9 @@ -import pytest - -from h3ronpy.polars import cells_parse, cells_valid, cells_to_string -import numpy as np import h3.api.numpy_int as h3 -import polars as pl +import numpy as np +import pyarrow as pa +import pytest +from arro3.core import Array +from h3ronpy.arrow import cells_parse, cells_to_string, cells_valid def test_cells_parse(): @@ -15,7 +15,7 @@ def test_cells_parse(): def test_cells_parse_largeutf8(): # polars uses LargeUtf8 datatype for strings - cells = cells_parse(pl.Series(["801ffffffffffff"])) + cells = cells_parse(pa.array(["801ffffffffffff"], type=pa.large_utf8())) assert len(cells) == 1 @@ -37,7 +37,7 @@ def test_parse_cell_set_invalid(): ) cells = cells_parse(strings, set_failing_to_invalid=True) assert len(cells) == 1 - assert cells[0] is None + assert not cells[0].is_valid def test_cells_valid(): @@ -47,17 +47,17 @@ def test_cells_valid(): ) cells = cells_valid(input, booleanarray=False) assert len(cells) == 2 - assert cells.dtype == pl.datatypes.UInt64() - assert cells[0] is None - assert cells[1] is not None + assert cells.type == pa.uint64() + assert not cells[0].is_valid + assert cells[1].is_valid bools = cells_valid(input, booleanarray=True) assert len(bools) == 2 - assert bools.dtype == pl.datatypes.Boolean() - assert bools[0] is False - assert bools[1] is True + assert bools.type == pa.bool_() + assert bools[0].as_py() is False + assert bools[1].as_py() is True - assert cells.is_not_null().eq(bools).all() + assert pa.array(cells).is_valid() == pa.array(bools) def test_cells_to_string(): @@ -68,6 +68,6 @@ def test_cells_to_string(): ) strings = cells_to_string(cells) assert len(strings) == len(cells) - assert isinstance(strings, pl.Series) - assert strings.dtype == pl.Utf8 + assert isinstance(strings, Array) + assert strings.type == pa.large_utf8() assert strings[0] == "851f9923fffffff" diff --git a/h3ronpy/tests/arrow/test_vector.py b/h3ronpy/tests/arrow/test_vector.py index cfd7b65..7036eac 100644 --- a/h3ronpy/tests/arrow/test_vector.py +++ b/h3ronpy/tests/arrow/test_vector.py @@ -1,16 +1,16 @@ -from h3ronpy.arrow.vector import geometry_to_cells, ContainmentMode, cells_to_wkb_points -import pyarrow as pa +import h3.api.numpy_int as h3 import shapely -from shapely.geometry import Point +from arro3.core import Array, DataType, Scalar +from h3ronpy.vector import ContainmentMode, cells_to_wkb_points, geometry_to_cells from shapely import wkb -import h3.api.numpy_int as h3 +from shapely.geometry import Point def test_geometry_to_cells(): geom = shapely.Polygon(((0.0, 0.0), (0.0, 1.0), (1.0, 1.0), (1.0, 0.0), (0.0, 0.0))) cells = geometry_to_cells(geom, 5, containment_mode=ContainmentMode.IntersectsBoundary) - assert isinstance(cells, pa.Array) - assert cells.type == pa.uint64() + assert isinstance(cells, Array) + assert cells.type == DataType.uint64() assert len(cells) > 10 @@ -38,8 +38,8 @@ def test_coordinate_values_are_not_equal_issue_58(): assert len(wkb_points) == 1 # Step 4: Decode the WKB point to a Shapely geometry - for wkb_point in wkb_points: - assert isinstance(wkb_point, pa.Scalar) # Ensure it's a pyarrow Scalar - shapely_point = wkb.loads(wkb_point.as_buffer().to_pybytes()) + for wkb_point in iter(wkb_points): + assert isinstance(wkb_point, Scalar) # Ensure it's an arro3 Scalar + shapely_point = wkb.loads(wkb_point.as_py()) assert int(lat) == int(shapely_point.y) assert int(lon) == int(shapely_point.x) diff --git a/h3ronpy/tests/pandas/test_raster.py b/h3ronpy/tests/pandas/test_raster.py deleted file mode 100644 index 07006ff..0000000 --- a/h3ronpy/tests/pandas/test_raster.py +++ /dev/null @@ -1,48 +0,0 @@ -try: - import rasterio - - HAS_RASTERIO = True -except ImportError: - # rasterio is an optional dependency - HAS_RASTERIO = False - -import numpy as np -import pytest - -from h3ronpy.pandas.raster import raster_to_dataframe -from h3ronpy import DEFAULT_CELL_COLUMN_NAME - -from tests import TESTDATA_PATH - - -@pytest.mark.skipif(not HAS_RASTERIO, reason="requires rasterio") -def test_r_tiff(): - dataset = rasterio.open(TESTDATA_PATH / "r.tiff") - band = dataset.read(1) - df = raster_to_dataframe(band, dataset.transform, 8, nodata_value=0, compact=True, geo=False) - assert len(df) > 100 - assert df.dtypes[DEFAULT_CELL_COLUMN_NAME] == "uint64" - assert df.dtypes["value"] == "uint8" - - -@pytest.mark.skipif(not HAS_RASTERIO, reason="requires rasterio") -def test_r_tiff_float32(): - dataset = rasterio.open(TESTDATA_PATH / "r.tiff") - band = dataset.read(1).astype(np.float32) - df = raster_to_dataframe(band, dataset.transform, 8, nodata_value=0.0, compact=True, geo=False) - assert len(df) > 100 - assert df.dtypes[DEFAULT_CELL_COLUMN_NAME] == "uint64" - assert df.dtypes["value"] == "float32" - - -def test_preserve_nan_without_nodata_value(): - arr = np.array([[np.nan, 1.0], [np.nan, 1.0]], dtype=np.float32) - df = raster_to_dataframe(arr, [11.0, 1.0, 0.0, 10.0, 1.2, 0.2], 7, nodata_value=None) - assert df["value"].value_counts(dropna=False)[np.nan] > 100 - assert df["value"].value_counts(dropna=False)[1.0] > 100 - - -def test_preserve_nan_with_nodata_value(): - arr = np.array([[np.nan, 1.0], [np.nan, 1.0]], dtype=np.float32) - df = raster_to_dataframe(arr, [11.0, 1.0, 0.0, 10.0, 1.2, 0.2], 7, nodata_value=1.0) - assert df["value"].value_counts(dropna=False)[np.nan] > 100 diff --git a/h3ronpy/tests/pandas/test_vector.py b/h3ronpy/tests/pandas/test_vector.py index d6d66e7..c7f49bc 100644 --- a/h3ronpy/tests/pandas/test_vector.py +++ b/h3ronpy/tests/pandas/test_vector.py @@ -1,48 +1,11 @@ -import shapely - +import geopandas as gpd import pandas as pd -from shapely.geometry import Point, GeometryCollection, Polygon import pytest -from h3ronpy.pandas import change_resolution -from h3ronpy.pandas.vector import ( - cells_to_points, - cells_to_polygons, - cells_dataframe_to_geodataframe, - geodataframe_to_cells, - geoseries_to_cells, -) from h3ronpy import DEFAULT_CELL_COLUMN_NAME, ContainmentMode -import geopandas as gpd -from .. import load_africa, TESTDATA_PATH +from h3ronpy.pandas.vector import cells_dataframe_to_geodataframe, geodataframe_to_cells +from shapely.geometry import GeometryCollection, Point, Polygon - -def test_cells_to_points(): - gs = cells_to_points( - [ - 0x8009FFFFFFFFFFF, - ] - ) - assert isinstance(gs, gpd.GeoSeries) - assert gs.geom_type[0] == "Point" - - -def test_cells_to_polygons(): - cells = change_resolution( - [ - 0x8009FFFFFFFFFFF, - ], - 3, - ) - gs = cells_to_polygons(cells) - assert isinstance(gs, gpd.GeoSeries) - assert gs.geom_type[0] == "Polygon" - assert len(gs) == 286 - - linked_gs = cells_to_polygons(cells, link_cells=True) - assert isinstance(linked_gs, gpd.GeoSeries) - assert linked_gs.geom_type[0] == "Polygon" - assert len(linked_gs) == 1 - assert shapely.get_num_coordinates(linked_gs[0]) > 120 +from .. import load_africa def test_cells_dataframe_to_geodataframe(): @@ -78,13 +41,6 @@ def test_cells_geodataframe_to_cells(): assert df.dtypes[DEFAULT_CELL_COLUMN_NAME] == "uint64" -def test_geoseries_to_cells_flatten(): - africa = load_africa() - cells = geoseries_to_cells(africa.geometry, 4, flatten=True) - assert len(cells) >= len(africa) - assert cells.dtype == "uint64" - - @pytest.mark.skip( reason="GeometryCollections are unsupported until https://github.com/geoarrow/geoarrow-rs/blob/3a2aaa883126274037cabaf46b1f5f6459938297/src/io/wkb/reader/geometry_collection.rs#L23 is fixed" ) @@ -101,6 +57,9 @@ def test_empty_geometrycollection_omitted(): assert len(df) == 0 +@pytest.mark.skip( + reason="Empty points are unsupported until https://github.com/geoarrow/geoarrow-rs/issues/852 is fixed" +) def test_fail_on_empty_point(): gdf = gpd.GeoDataFrame( { @@ -110,7 +69,9 @@ def test_fail_on_empty_point(): }, crs="epsg:4326", ) - with pytest.raises(ValueError): + # Note: in geoarrow-rs this currently panics, and so raises a + # pyo3_runtime.PanicException. geoarrow-rs should be updated to not panic here. + with pytest.raises(Exception): geodataframe_to_cells(gdf, 4) diff --git a/h3ronpy/tests/polars/test_expr.py b/h3ronpy/tests/polars/test_expr.py index 21a9deb..2cbdafb 100644 --- a/h3ronpy/tests/polars/test_expr.py +++ b/h3ronpy/tests/polars/test_expr.py @@ -1,11 +1,32 @@ -import polars as pl -from . import some_cell_series +import h3.api.numpy_int as h3 # register expressions with polars import h3ronpy.polars as _ +import numpy as np +import polars as pl + + +def some_cell_series() -> pl.Series: + return pl.Series( + np.array( + [ + h3.geo_to_h3(10.3, 45.1, 8), + ], + dtype=np.uint64, + ) + ) def test_expr_cells_resolution(): + df = pl.DataFrame({"cells": some_cell_series()}) + df.lazy().with_columns( + [ + pl.col("cells").h3.cells_resolution().alias("resolution"), + ] + ).collect() + + pl.col("cells") + df = ( pl.DataFrame({"cells": some_cell_series()}) .lazy() @@ -34,3 +55,10 @@ def test_expr_grid_disk(): assert df["disk"].dtype == pl.List assert df["disk"].dtype.inner == pl.UInt64 assert len(df["disk"][0]) == 7 + + +def test_series(): + s = some_cell_series() + assert s.h3.cells_resolution()[0] == 8 + + assert s.h3.change_resolution(5)[0] == 600436446234411007 diff --git a/h3ronpy/tests/polars/test_series.py b/h3ronpy/tests/polars/test_series.py index 94c88f2..8b9b158 100644 --- a/h3ronpy/tests/polars/test_series.py +++ b/h3ronpy/tests/polars/test_series.py @@ -1,8 +1,8 @@ -import polars as pl -from . import some_cell_series - # register expressions with polars import h3ronpy.polars as _ +import polars as pl + +from . import some_cell_series def test_series_cells_resolution(): diff --git a/h3ronpy/tests/test_transform.py b/h3ronpy/tests/test_transform.py index c935c08..729b4e8 100644 --- a/h3ronpy/tests/test_transform.py +++ b/h3ronpy/tests/test_transform.py @@ -1,4 +1,4 @@ -from h3ronpy.arrow.raster import Transform +from h3ronpy.raster import Transform def test_transform_cmp():