From 056fcb1e3d9a804c8741c8a011ed5c53bf31ac93 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 9 Sep 2024 13:52:31 -0700
Subject: [PATCH 1/7] Add partitioning APIs to pylibcudf

---
 .../user_guide/api_docs/pylibcudf/index.rst   |   1 +
 .../api_docs/pylibcudf/partitioning.rst       |   6 +
 python/pylibcudf/pylibcudf/CMakeLists.txt     |   1 +
 python/pylibcudf/pylibcudf/__init__.pxd       |   2 +
 python/pylibcudf/pylibcudf/__init__.py        |   2 +
 .../pylibcudf/libcudf/partitioning.pxd        |   7 ++
 python/pylibcudf/pylibcudf/partitioning.pxd   |  19 +++
 python/pylibcudf/pylibcudf/partitioning.pyx   | 109 ++++++++++++++++++
 8 files changed, 147 insertions(+)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst
 create mode 100644 python/pylibcudf/pylibcudf/partitioning.pxd
 create mode 100644 python/pylibcudf/pylibcudf/partitioning.pyx

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index 6a2b66e8ea0..7e4cf0356f1 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -24,6 +24,7 @@ This page provides API documentation for pylibcudf.
     lists
     merge
     null_mask
+    partitioning
     quantiles
     reduce
     replace
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst
new file mode 100644
index 00000000000..6951dbecca0
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst
@@ -0,0 +1,6 @@
+============
+partitioning
+============
+
+.. automodule:: pylibcudf.partitioning
+   :members:
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index a4f17344cb0..8241444de38 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -30,6 +30,7 @@ set(cython_sources
     lists.pyx
     merge.pyx
     null_mask.pyx
+    partitioning.pyx
     quantiles.pyx
     reduce.pyx
     replace.pyx
diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
index 841efa59bda..d59b945a652 100644
--- a/python/pylibcudf/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -16,6 +16,7 @@ from . cimport (
     lists,
     merge,
     null_mask,
+    partitioning,
     quantiles,
     reduce,
     replace,
@@ -59,6 +60,7 @@ __all__ = [
     "lists",
     "merge",
     "null_mask",
+    "partitioning",
     "quantiles",
     "reduce",
     "replace",
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index d3878a89a6a..8455cf5849d 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -27,6 +27,7 @@
     lists,
     merge,
     null_mask,
+    partitioning,
     quantiles,
     reduce,
     replace,
@@ -71,6 +72,7 @@
     "lists",
     "merge",
     "null_mask",
+    "partitioning",
     "quantiles",
     "reduce",
     "replace",
diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index 1ea10e8a194..aa42e5af007 100644
--- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -25,3 +25,10 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         const column_view& partition_map,
         int num_partitions
     ) except +
+
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \
+        round_robin_partition "cudf::round_robin_partition" (
+        const table_view& input,
+        int num_partitions,
+        int start_partition=*
+    ) except +
diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd
new file mode 100644
index 00000000000..aad60149fc4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/partitioning.pxd
@@ -0,0 +1,19 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef tuple[Table, list] hash_partition(
+    Table input,
+    list columns_to_hash,
+    int num_partitions
+)
+
+cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions)
+
+cpdef tuple[Table, list] round_robin_partition(
+    Table input,
+    int num_partitions,
+    int start_partition=*
+)
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
new file mode 100644
index 00000000000..36e7a88e063
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -0,0 +1,109 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+cimport pylibcudf.libcudf.types as libcudf_types
+from libcpp.memory cimport unique_ptr
+from libcpp.pair cimport pair
+from libcpp.utility cimport move
+from libcpp.vector cimport vector
+from pylibcudf.libcudf cimport partitioning as cpp_partitioning
+from pylibcudf.libcudf.table.table cimport table
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef tuple[Table, list] hash_partition(
+    Table input,
+    list columns_to_hash,
+    int num_partitions
+):
+    """
+    Partitions rows from the input table into multiple output tables.
+
+    Parameters
+    ----------
+    input : Table
+        The table to partition
+    columns_to_hash : list[int]
+        Indices of input columns to hash
+    num_partitions : int
+        The number of partitions to use
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        An output table and a vector of row offsets to each partition
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.hash_partition(
+                table.view(), columns_to_hash, num_partitions
+            )
+        )
+
+    return Table.from_libcudf(move(c_result.first)), c_result.second
+
+cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions):
+    """
+    Partitions rows of `t` according to the mapping specified by `partition_map`.
+
+    Parameters
+    ----------
+    t : Table
+        The table to partition
+    partition_map : list[int]
+        Non-nullable column of integer values that map each row
+        in `t` to it's partition.
+    num_partitions : int
+        The total number of partitions
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        An output table and a list of row offsets to each partition
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.partition(t.view(), partition_map.view(), num_partitions)
+        )
+
+    return Table.from_libcudf(move(c_result.first)), c_result.second
+
+
+cpdef tuple[Table, list] round_robin_partition(
+    Table input,
+    int num_partitions,
+    int start_partition=0
+):
+    """
+    Round-robin partition.
+
+    Parameters
+    ----------
+    input : Table
+        The input table to be round-robin partitioned
+    num_partitions : int
+        Number of partitions for the table
+    start_partition : int, default 0
+        Index of the 1st partition
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        The partitioned table and the partition offsets
+        for each partition within the table.
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.round_robin_partition(
+                table.view(), num_partitions, start_partition
+            )
+        )
+
+    return Table.from_libcudf(move(c_result.first)), c_result.second

From 9c606189d7cf1c4fc0c8448b89455c3f91ef8d83 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 9 Sep 2024 14:30:45 -0700
Subject: [PATCH 2/7] fix compilation failures

---
 .../pylibcudf/libcudf/partitioning.pxd          |  2 +-
 python/pylibcudf/pylibcudf/partitioning.pyx     | 17 +++++++++++------
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index aa42e5af007..89bddbffab5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -30,5 +30,5 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         round_robin_partition "cudf::round_robin_partition" (
         const table_view& input,
         int num_partitions,
-        int start_partition=*
+        int start_partition
     ) except +
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index 36e7a88e063..074137d0418 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -35,15 +35,17 @@ cpdef tuple[Table, list] hash_partition(
         An output table and a vector of row offsets to each partition
     """
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
+    cdef int c_num_partitions = num_partitions
 
     with nogil:
         c_result = move(
             cpp_partitioning.hash_partition(
-                table.view(), columns_to_hash, num_partitions
+                input.view(), c_columns_to_hash, c_num_partitions
             )
         )
 
-    return Table.from_libcudf(move(c_result.first)), c_result.second
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
 
 cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions):
     """
@@ -65,13 +67,14 @@ cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partit
         An output table and a list of row offsets to each partition
     """
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef int c_num_partitions = num_partitions
 
     with nogil:
         c_result = move(
-            cpp_partitioning.partition(t.view(), partition_map.view(), num_partitions)
+            cpp_partitioning.partition(t.view(), partition_map.view(), c_num_partitions)
         )
 
-    return Table.from_libcudf(move(c_result.first)), c_result.second
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
 
 
 cpdef tuple[Table, list] round_robin_partition(
@@ -98,12 +101,14 @@ cpdef tuple[Table, list] round_robin_partition(
         for each partition within the table.
     """
     cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef int c_num_partitions = num_partitions
+    cdef int c_start_partition = start_partition
 
     with nogil:
         c_result = move(
             cpp_partitioning.round_robin_partition(
-                table.view(), num_partitions, start_partition
+                input.view(), c_num_partitions, c_start_partition
             )
         )
 
-    return Table.from_libcudf(move(c_result.first)), c_result.second
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)

From 2fc2cd7f5081f1497f0dc2d20272dd30d368f52a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 9 Sep 2024 15:52:59 -0700
Subject: [PATCH 3/7] Add unit tests

---
 python/cudf/cudf/_lib/hash.pyx                | 35 ++++---------
 python/cudf/cudf/_lib/partitioning.pyx        | 35 +++----------
 python/pylibcudf/pylibcudf/partitioning.pyx   |  2 +-
 .../pylibcudf/tests/test_partitioning.py      | 51 +++++++++++++++++++
 4 files changed, 68 insertions(+), 55 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_partitioning.py

diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 48f75b12a73..9b7ab0888d2 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -3,11 +3,8 @@
 from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
 from libcpp.utility cimport move
-from libcpp.vector cimport vector
 
-cimport pylibcudf.libcudf.types as libcudf_types
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.hash cimport (
     md5,
@@ -19,37 +16,23 @@ from pylibcudf.libcudf.hash cimport (
     sha512,
     xxhash_64,
 )
-from pylibcudf.libcudf.partitioning cimport (
-    hash_partition as cpp_hash_partition,
-)
-from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+from cudf._lib.utils cimport table_view_from_columns
+
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
-def hash_partition(list source_columns, object columns_to_hash,
+def hash_partition(list source_columns, list columns_to_hash,
                    int num_partitions):
-    cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
-    cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_columns(source_columns)
-
-    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
-    with nogil:
-        c_result = move(
-            cpp_hash_partition(
-                c_source_view,
-                c_columns_to_hash,
-                c_num_partitions
-            )
-        )
-
-    return (
-        columns_from_unique_ptr(move(c_result.first)),
-        list(c_result.second)
+    plc_table, offsets = plc.partitioning.hash_partition(
+        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
+        columns_to_hash,
+        num_partitions
     )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()], offsets
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index d94f0e1b564..63fd26f0134 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -2,24 +2,13 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
-from libcpp.utility cimport move
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.partitioning cimport partition as cpp_partition
-from pylibcudf.libcudf.table.table cimport table
-from pylibcudf.libcudf.table.table_view cimport table_view
-
 from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+
+import pylibcudf as plc
 
 from cudf._lib.reduce import minmax
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 
-cimport pylibcudf.libcudf.types as libcudf_types
-
 
 @acquire_spill_lock()
 def partition(list source_columns, Column partition_map,
@@ -50,25 +39,15 @@ def partition(list source_columns, Column partition_map,
 
     if num_partitions is None:
         num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True)
-    cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_columns(source_columns)
-
-    cdef column_view c_partition_map_view = partition_map.view()
 
-    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     if partition_map.size > 0:
         lo, hi = minmax(partition_map)
         if lo < 0 or hi >= num_partitions:
             raise ValueError("Partition map has invalid values")
-    with nogil:
-        c_result = move(
-            cpp_partition(
-                c_source_view,
-                c_partition_map_view,
-                c_num_partitions
-            )
-        )
 
-    return (
-        columns_from_unique_ptr(move(c_result.first)), list(c_result.second)
+    plc_table, offsets = plc.partitioning.partitioning(
+        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
+        partition_map.to_pylibcudf(mode="read"),
+        num_partitions
     )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()], offsets
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index 074137d0418..30386d68dc8 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -55,7 +55,7 @@ cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partit
     ----------
     t : Table
         The table to partition
-    partition_map : list[int]
+    partition_map : Column
         Non-nullable column of integer values that map each row
         in `t` to it's partition.
     num_partitions : int
diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
new file mode 100644
index 00000000000..109f8bd47da
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+
+
+@pytest.fixture
+def pa_table():
+    return pa.table({"a": [1, 2, 3], "b": [1, 2, 5], "c": [1, 2, 10]})
+
+
+def test_partition(pa_table):
+    plc_result, result_offsets = plc.partitioning.partition(
+        plc.interop.from_arrow(pa_table),
+        plc.interop.from_arrow(pa.array([0, 0, 0])),
+        1,
+    )
+    pa_result = plc.interop.to_arrow(plc_result)
+    pa_expected = pa.table(
+        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert pa_result.equals(pa_expected)
+    assert result_offsets == [0, 3]
+
+
+def test_hash_partition(pa_table):
+    plc_result, result_offsets = plc.partitioning.hash_partition(
+        plc.interop.from_arrow(pa_table), [0, 1], 1
+    )
+    pa_result = plc.interop.to_arrow(plc_result)
+    pa_expected = pa.table(
+        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert pa_result.equals(pa_expected)
+    assert result_offsets == [0]
+
+
+def test_round_robin_partition(pa_table):
+    plc_result, result_offsets = plc.partitioning.round_robin_partition(
+        plc.interop.from_arrow(pa_table), 1, 0
+    )
+    pa_result = plc.interop.to_arrow(plc_result)
+    pa_expected = pa.table(
+        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert pa_result.equals(pa_expected)
+    assert result_offsets == [0]

From 540a3563f01843e4d3a66abfcb8553157a3e0b19 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 9 Sep 2024 19:28:45 -0700
Subject: [PATCH 4/7] Typo

---
 python/cudf/cudf/_lib/partitioning.pyx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index 63fd26f0134..13997da8403 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -45,7 +45,7 @@ def partition(list source_columns, Column partition_map,
         if lo < 0 or hi >= num_partitions:
             raise ValueError("Partition map has invalid values")
 
-    plc_table, offsets = plc.partitioning.partitioning(
+    plc_table, offsets = plc.partitioning.partition(
         plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
         partition_map.to_pylibcudf(mode="read"),
         num_partitions

From 36ae931bae9d74931ba0e8b6ad8b7be8f5f0a027 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 25 Sep 2024 09:40:25 -0700
Subject: [PATCH 5/7] address review

---
 .../pylibcudf/tests/test_partitioning.py      | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
index 109f8bd47da..2e4f33a1bff 100644
--- a/python/pylibcudf/pylibcudf/tests/test_partitioning.py
+++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
@@ -3,49 +3,52 @@
 import pyarrow as pa
 import pylibcudf as plc
 import pytest
+from utils import assert_table_eq
 
 
-@pytest.fixture
-def pa_table():
-    return pa.table({"a": [1, 2, 3], "b": [1, 2, 5], "c": [1, 2, 10]})
+@pytest.fixture(scope="module")
+def partitioning_data():
+    data = {"a": [1, 2, 3], "b": [1, 2, 5], "c": [1, 2, 10]}
+    pa_table = pa.table(data)
+    return data, pa_table
 
 
-def test_partition(pa_table):
-    plc_result, result_offsets = plc.partitioning.partition(
+def test_partition(partitioning_data):
+    raw_data, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.partition(
         plc.interop.from_arrow(pa_table),
         plc.interop.from_arrow(pa.array([0, 0, 0])),
         1,
     )
-    pa_result = plc.interop.to_arrow(plc_result)
-    pa_expected = pa.table(
-        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+    expected = pa.table(
+        list(raw_data.values()),
         schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
     )
-    assert pa_result.equals(pa_expected)
+    assert_table_eq(expected, result)
     assert result_offsets == [0, 3]
 
 
-def test_hash_partition(pa_table):
-    plc_result, result_offsets = plc.partitioning.hash_partition(
+def test_hash_partition(partitioning_data):
+    raw_data, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.hash_partition(
         plc.interop.from_arrow(pa_table), [0, 1], 1
     )
-    pa_result = plc.interop.to_arrow(plc_result)
-    pa_expected = pa.table(
-        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+    expected = pa.table(
+        list(raw_data.values()),
         schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
     )
-    assert pa_result.equals(pa_expected)
+    assert_table_eq(expected, result)
     assert result_offsets == [0]
 
 
-def test_round_robin_partition(pa_table):
-    plc_result, result_offsets = plc.partitioning.round_robin_partition(
+def test_round_robin_partition(partitioning_data):
+    raw_data, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.round_robin_partition(
         plc.interop.from_arrow(pa_table), 1, 0
     )
-    pa_result = plc.interop.to_arrow(plc_result)
-    pa_expected = pa.table(
-        [[1, 2, 3], [1, 2, 5], [1, 2, 10]],
+    expected = pa.table(
+        list(raw_data.values()),
         schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
     )
-    assert pa_result.equals(pa_expected)
+    assert_table_eq(expected, result)
     assert result_offsets == [0]

From eda42a69e99ec8f9e78fdeb08493e6d723ca2d48 Mon Sep 17 00:00:00 2001
From: Matthew Murray <matthewmurray711@gmail.com>
Date: Wed, 25 Sep 2024 10:05:35 -0700
Subject: [PATCH 6/7] add ref to cpp func

---
 python/pylibcudf/pylibcudf/partitioning.pyx | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
index 30386d68dc8..8fa70daab5a 100644
--- a/python/pylibcudf/pylibcudf/partitioning.pyx
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -20,6 +20,8 @@ cpdef tuple[Table, list] hash_partition(
     """
     Partitions rows from the input table into multiple output tables.
 
+    For details, see :cpp:func:`hash_partition`.
+
     Parameters
     ----------
     input : Table
@@ -51,6 +53,8 @@ cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partit
     """
     Partitions rows of `t` according to the mapping specified by `partition_map`.
 
+    For details, see :cpp:func:`partition`.
+
     Parameters
     ----------
     t : Table
@@ -85,6 +89,8 @@ cpdef tuple[Table, list] round_robin_partition(
     """
     Round-robin partition.
 
+    For details, see :cpp:func:`round_robin_partition`.
+
     Parameters
     ----------
     input : Table

From 67f01cedfec0d8ff43af56b2bb20627022333f93 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Wed, 25 Sep 2024 15:57:56 -0700
Subject: [PATCH 7/7] Update test_partitioning.py

---
 .../pylibcudf/tests/test_partitioning.py          | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
index 2e4f33a1bff..444d0089d2c 100644
--- a/python/pylibcudf/pylibcudf/tests/test_partitioning.py
+++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
@@ -10,13 +10,14 @@
 def partitioning_data():
     data = {"a": [1, 2, 3], "b": [1, 2, 5], "c": [1, 2, 10]}
     pa_table = pa.table(data)
-    return data, pa_table
+    plc_table = plc.interop.from_arrow(pa_table)
+    return data, plc_table, pa_table
 
 
 def test_partition(partitioning_data):
-    raw_data, pa_table = partitioning_data
+    raw_data, plc_table, pa_table = partitioning_data
     result, result_offsets = plc.partitioning.partition(
-        plc.interop.from_arrow(pa_table),
+        plc_table,
         plc.interop.from_arrow(pa.array([0, 0, 0])),
         1,
     )
@@ -29,9 +30,9 @@ def test_partition(partitioning_data):
 
 
 def test_hash_partition(partitioning_data):
-    raw_data, pa_table = partitioning_data
+    raw_data, plc_table, pa_table = partitioning_data
     result, result_offsets = plc.partitioning.hash_partition(
-        plc.interop.from_arrow(pa_table), [0, 1], 1
+        plc_table, [0, 1], 1
     )
     expected = pa.table(
         list(raw_data.values()),
@@ -42,9 +43,9 @@ def test_hash_partition(partitioning_data):
 
 
 def test_round_robin_partition(partitioning_data):
-    raw_data, pa_table = partitioning_data
+    raw_data, plc_table, pa_table = partitioning_data
     result, result_offsets = plc.partitioning.round_robin_partition(
-        plc.interop.from_arrow(pa_table), 1, 0
+        plc_table, 1, 0
     )
     expected = pa.table(
         list(raw_data.values()),