From 6113f5f60a116f1008d0b9704f9ed6c13c63b7b4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 31 Jan 2024 15:24:27 -1000 Subject: [PATCH 1/2] Use ListColumn instead of build_list_column (#1327) This API is planned for removal in cudf: https://github.com/rapidsai/cudf/pull/14786 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Mark Harris (https://github.com/harrism) - Michael Wang (https://github.com/isVoid) URL: https://github.com/rapidsai/cuspatial/pull/1327 --- .../cuspatial/core/_column/geocolumn.py | 45 ++++++++++--------- .../cuspatial/core/binops/intersection.py | 19 ++++---- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/python/cuspatial/cuspatial/core/_column/geocolumn.py b/python/cuspatial/cuspatial/core/_column/geocolumn.py index c62c59301..5ad1ffb55 100644 --- a/python/cuspatial/cuspatial/core/_column/geocolumn.py +++ b/python/cuspatial/cuspatial/core/_column/geocolumn.py @@ -8,7 +8,7 @@ import pyarrow as pa import cudf -from cudf.core.column import ColumnBase, as_column, build_list_column +from cudf.core.column import ColumnBase, ListColumn, as_column from cuspatial.core._column.geometa import Feature_Enum, GeoMeta from cuspatial.utils.column_utils import empty_geometry_column @@ -178,10 +178,11 @@ def _from_multipoints_xy( if not multipoints_xy.dtype.kind == "f": raise ValueError("Coordinates must be floating point numbers.") - multipoint_col = build_list_column( - indices=geometry_offsets, - elements=_xy_as_variable_sized_list(multipoints_xy), + multi_elements = _xy_as_variable_sized_list(multipoints_xy) + multipoint_col = ListColumn( + dtype=cudf.ListDtype(multi_elements.dtype), size=len(geometry_offsets) - 1, + children=(geometry_offsets, multi_elements), ) num_multipoints = len(multipoint_col) @@ -232,15 +233,16 @@ def _from_linestrings_xy( if not linestrings_xy.dtype.kind == "f": raise ValueError("Coordinates must be floating point numbers.") - parts_col = build_list_column( - indices=part_offsets, - elements=_xy_as_variable_sized_list(linestrings_xy), + parts_elements = _xy_as_variable_sized_list(linestrings_xy) + parts_col = ListColumn( + dtype=cudf.ListDtype(parts_elements.dtype), size=len(part_offsets) - 1, + children=(part_offsets, parts_elements), ) - linestrings_col = build_list_column( - indices=geometry_offsets, - elements=parts_col, + linestrings_col = ListColumn( + dtype=cudf.ListDtype(parts_col.dtype), size=len(geometry_offsets) - 1, + children=(geometry_offsets, parts_col), ) num_linestrings = len(linestrings_col) @@ -292,20 +294,21 @@ def _from_polygons_xy( if not polygons_xy.dtype.kind == "f": raise ValueError("Coordinates must be floating point numbers.") - rings_col = build_list_column( - indices=ring_offsets, - elements=_xy_as_variable_sized_list(polygons_xy), + ring_elements = _xy_as_variable_sized_list(polygons_xy) + rings_col = ListColumn( + dtype=cudf.ListDtype(ring_elements.dtype), size=len(ring_offsets) - 1, + children=(ring_offsets, ring_elements), ) - parts_col = build_list_column( - indices=part_offsets, - elements=rings_col, + parts_col = ListColumn( + dtype=cudf.ListDtype(rings_col.dtype), size=len(part_offsets) - 1, + children=(part_offsets, rings_col), ) - polygons_col = build_list_column( - indices=geometry_offsets, - elements=parts_col, + polygons_col = ListColumn( + dtype=cudf.ListDtype(parts_col.dtype), size=len(geometry_offsets) - 1, + children=(geometry_offsets, parts_col), ) num_polygons = len(polygons_col) @@ -365,4 +368,6 @@ def _xy_as_variable_sized_list(xy: ColumnBase): num_points = len(xy) // 2 indices = as_column(range(0, num_points * 2 + 1, 2), dtype="int32") - return build_list_column(indices=indices, elements=xy, size=num_points) + return ListColumn( + dtype=cudf.ListDtype(xy.dtype), size=num_points, children=(indices, xy) + ) diff --git a/python/cuspatial/cuspatial/core/binops/intersection.py b/python/cuspatial/cuspatial/core/binops/intersection.py index 5a9b66bfa..b043121aa 100644 --- a/python/cuspatial/cuspatial/core/binops/intersection.py +++ b/python/cuspatial/cuspatial/core/binops/intersection.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING import cudf -from cudf.core.column import as_column, build_list_column +from cudf.core.column import ListColumn, as_column from cuspatial._lib.intersection import ( pairwise_linestring_intersection as c_pairwise_linestring_intersection, @@ -84,18 +84,21 @@ def pairwise_linestring_intersection( # Organize the look back ids into list column (lhs_linestring_id, lhs_segment_id, rhs_linestring_id, rhs_segment_id,) = [ - build_list_column( - indices=geometry_collection_offset, - elements=id_, + ListColumn( + dtype=cudf.ListDtype(id_.dtype), size=len(geometry_collection_offset) - 1, + children=(geometry_collection_offset, id_), ) for id_ in look_back_ids ] - linestring_column = build_list_column( - indices=as_column(range(0, len(segments) + 1), dtype="int32"), - elements=segments, - size=len(segments), + linestring_column = ListColumn( + dtype=cudf.ListDtype(segments.dtype), + size=segments.size, + children=( + as_column(range(0, len(segments) + 1), dtype="int32"), + segments, + ), ) coord_dtype = points.dtype.leaf_type From d185528291141efd637a61b5e1b4a6d6e7b45074 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Fri, 2 Feb 2024 06:19:45 -0800 Subject: [PATCH 2/2] Make sure cudf uses pyarrow's libarrow during wheel builds (#1334) This fixes an error introduced in #1304. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Mark Harris (https://github.com/harrism) - Paul Taylor (https://github.com/trxcllnt) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cuspatial/pull/1334 --- ci/build_wheel_cuspatial.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/build_wheel_cuspatial.sh b/ci/build_wheel_cuspatial.sh index fd37f873a..932665e27 100755 --- a/ci/build_wheel_cuspatial.sh +++ b/ci/build_wheel_cuspatial.sh @@ -3,4 +3,6 @@ set -euo pipefail +export SKBUILD_CMAKE_ARGS="-DUSE_LIBARROW_FROM_PYARROW=ON" + ci/build_wheel.sh cuspatial python/cuspatial