From 8458306ecbc17d3977a98e2e33752b678394f588 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri, 24 May 2024 15:04:08 -0700 Subject: [PATCH] Migrate reshape.pxd to pylibcudf (#15827) xref #15162 Authors: - Thomas Li (https://github.com/lithomas1) Approvers: - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/15827 --- .../user_guide/api_docs/pylibcudf/index.rst | 1 + .../user_guide/api_docs/pylibcudf/reshape.rst | 6 ++ .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt | 1 + python/cudf/cudf/_lib/pylibcudf/__init__.pxd | 1 + python/cudf/cudf/_lib/pylibcudf/__init__.py | 1 + python/cudf/cudf/_lib/pylibcudf/reshape.pxd | 11 ++++ python/cudf/cudf/_lib/pylibcudf/reshape.pyx | 65 +++++++++++++++++++ python/cudf/cudf/_lib/reshape.pyx | 42 +++++------- .../cudf/cudf/pylibcudf_tests/test_reshape.py | 43 ++++++++++++ 9 files changed, 147 insertions(+), 24 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst create mode 100644 python/cudf/cudf/_lib/pylibcudf/reshape.pxd create mode 100644 python/cudf/cudf/_lib/pylibcudf/reshape.pyx create mode 100644 python/cudf/cudf/pylibcudf_tests/test_reshape.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst index 8cad95f61ae..1c1b37e2c37 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst @@ -20,6 +20,7 @@ This page provides API documentation for pylibcudf. lists merge reduce + reshape rolling scalar search diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst new file mode 100644 index 00000000000..964cef04923 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/reshape.rst @@ -0,0 +1,6 @@ +======= +reshape +======= + +.. automodule:: cudf._lib.pylibcudf.reshape + :members: diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index efc978fc6d0..7d01671e84f 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -27,6 +27,7 @@ set(cython_sources merge.pyx reduce.pyx replace.pyx + reshape.pyx rolling.pyx scalar.pyx search.pyx diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index 5adefa5fd93..91c3fdf5602 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -13,6 +13,7 @@ from . cimport ( merge, reduce, replace, + reshape, rolling, search, sorting, diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py index 89f874f5fa5..fcdc4992f00 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py @@ -13,6 +13,7 @@ merge, reduce, replace, + reshape, rolling, search, sorting, diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pxd b/python/cudf/cudf/_lib/pylibcudf/reshape.pxd new file mode 100644 index 00000000000..a7cc45d7a08 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/reshape.pxd @@ -0,0 +1,11 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cudf._lib.pylibcudf.libcudf.types cimport size_type + +from .column cimport Column +from .scalar cimport Scalar +from .table cimport Table + + +cpdef Column interleave_columns(Table source_table) +cpdef Table tile(Table source_table, size_type count) diff --git a/python/cudf/cudf/_lib/pylibcudf/reshape.pyx b/python/cudf/cudf/_lib/pylibcudf/reshape.pyx new file mode 100644 index 00000000000..b68eba48cd6 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/reshape.pyx @@ -0,0 +1,65 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.pylibcudf.libcudf.column.column cimport column +from cudf._lib.pylibcudf.libcudf.reshape cimport ( + interleave_columns as cpp_interleave_columns, + tile as cpp_tile, +) +from cudf._lib.pylibcudf.libcudf.table.table cimport table +from cudf._lib.pylibcudf.libcudf.types cimport size_type + +from .column cimport Column +from .table cimport Table + + +cpdef Column interleave_columns(Table source_table): + """Interleave columns of a table into a single column. + + Converts the column major table `input` into a row major column. + + Example: + in = [[A1, A2, A3], [B1, B2, B3]] + return = [A1, B1, A2, B2, A3, B3] + + Parameters + ---------- + source_table: Table + The input table to interleave + + Returns + ------- + Column + A new column which is the result of interleaving the input columns + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = move(cpp_interleave_columns(source_table.view())) + + return Column.from_libcudf(move(c_result)) + + +cpdef Table tile(Table source_table, size_type count): + """Repeats the rows from input table count times to form a new table. + + Parameters + ---------- + source_table: Table + The input table containing rows to be repeated + count: size_type + The number of times to tile "rows". Must be non-negative + + Returns + ------- + Table + The table containing the tiled "rows" + """ + cdef unique_ptr[table] c_result + + with nogil: + c_result = move(cpp_tile(source_table.view(), count)) + + return Table.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/_lib/reshape.pyx b/python/cudf/cudf/_lib/reshape.pyx index 48e386bcf02..6bba8f0df35 100644 --- a/python/cudf/cudf/_lib/reshape.pyx +++ b/python/cudf/cudf/_lib/reshape.pyx @@ -2,39 +2,33 @@ from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from cudf._lib.column cimport Column -from cudf._lib.pylibcudf.libcudf.column.column cimport column -from cudf._lib.pylibcudf.libcudf.reshape cimport ( - interleave_columns as cpp_interleave_columns, - tile as cpp_tile, -) -from cudf._lib.pylibcudf.libcudf.table.table cimport table -from cudf._lib.pylibcudf.libcudf.table.table_view cimport table_view from cudf._lib.pylibcudf.libcudf.types cimport size_type -from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns +from cudf._lib.utils cimport columns_from_pylibcudf_table + +import cudf._lib.pylibcudf as plc @acquire_spill_lock() def interleave_columns(list source_columns): - cdef table_view c_view = table_view_from_columns(source_columns) - cdef unique_ptr[column] c_result - - with nogil: - c_result = move(cpp_interleave_columns(c_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + plc.reshape.interleave_columns( + plc.Table([ + c.to_pylibcudf(mode="read") for c in source_columns + ]) + ) + ) @acquire_spill_lock() def tile(list source_columns, size_type count): cdef size_type c_count = count - cdef table_view c_view = table_view_from_columns(source_columns) - cdef unique_ptr[table] c_result - - with nogil: - c_result = move(cpp_tile(c_view, c_count)) - return columns_from_unique_ptr(move(c_result)) + return columns_from_pylibcudf_table( + plc.reshape.tile( + plc.Table([ + c.to_pylibcudf(mode="read") for c in source_columns + ]), + c_count + ) + ) diff --git a/python/cudf/cudf/pylibcudf_tests/test_reshape.py b/python/cudf/cudf/pylibcudf_tests/test_reshape.py new file mode 100644 index 00000000000..b8b914f3f09 --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_reshape.py @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq, assert_table_eq + +from cudf._lib import pylibcudf as plc + + +@pytest.fixture(scope="module") +def reshape_data(): + data = [[1, 2, 3], [4, 5, 6]] + return data + + +@pytest.fixture(scope="module") +def reshape_plc_tbl(reshape_data): + arrow_tbl = pa.Table.from_arrays(reshape_data, names=["a", "b"]) + plc_tbl = plc.interop.from_arrow(arrow_tbl) + return plc_tbl + + +def test_interleave_columns(reshape_data, reshape_plc_tbl): + res = plc.reshape.interleave_columns(reshape_plc_tbl) + + interleaved_data = [pa.array(pair) for pair in zip(*reshape_data)] + + expect = pa.concat_arrays(interleaved_data) + + assert_column_eq(res, expect) + + +@pytest.mark.parametrize("cnt", [0, 1, 3]) +def test_tile(reshape_data, reshape_plc_tbl, cnt): + res = plc.reshape.tile(reshape_plc_tbl, cnt) + + tiled_data = [pa.array(col * cnt) for col in reshape_data] + + expect = pa.Table.from_arrays( + tiled_data, schema=plc.interop.to_arrow(reshape_plc_tbl).schema + ) + + assert_table_eq(res, expect)