diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_cmake.diff b/cpp/cmake/thirdparty/patches/nanoarrow_cmake.diff deleted file mode 100644 index 1262a38c0a4..00000000000 --- a/cpp/cmake/thirdparty/patches/nanoarrow_cmake.diff +++ /dev/null @@ -1,184 +0,0 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 8714c70..6a9e505 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -49,7 +49,6 @@ else() - endif() - - option(NANOARROW_CODE_COVERAGE "Enable coverage reporting" OFF) --add_library(coverage_config INTERFACE) - - # Avoids a warning about timestamps on downloaded files (prefer new policy - # if available)) -@@ -59,6 +58,7 @@ endif() - - configure_file(src/nanoarrow/nanoarrow_config.h.in generated/nanoarrow_config.h) - -+include(GNUInstallDirs) - if(NANOARROW_BUNDLE) - # Combine all headers into amalgamation/nanoarrow.h in the build directory - file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/amalgamation) -@@ -111,6 +111,8 @@ if(NANOARROW_BUNDLE) - if(NANOARROW_BUILD_TESTS) - include_directories(${CMAKE_BINARY_DIR}/amalgamation) - add_library(nanoarrow ${NANOARROW_C_TEMP}) -+ add_library(nanoarrow::nanoarrow ALIAS nanoarrow) -+ - target_compile_definitions(nanoarrow PUBLIC "$<$:NANOARROW_DEBUG>") - endif() - -@@ -120,10 +122,11 @@ if(NANOARROW_BUNDLE) - else() - add_library(nanoarrow src/nanoarrow/array.c src/nanoarrow/schema.c - src/nanoarrow/array_stream.c src/nanoarrow/utils.c) -+ add_library(nanoarrow::nanoarrow ALIAS nanoarrow) - - target_include_directories(nanoarrow - PUBLIC $ -- $) -+ $) - target_include_directories(nanoarrow - PUBLIC $ - ) -@@ -154,13 +157,49 @@ else() - endif() - endif() - -- install(TARGETS nanoarrow DESTINATION lib) -+ install(TARGETS nanoarrow -+ DESTINATION "${CMAKE_INSTALL_LIBDIR}" -+ EXPORT nanoarrow-exports) - install(DIRECTORY src/ -- DESTINATION include -+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}" - FILES_MATCHING -- PATTERN "*.h") -+ PATTERN "*.h*") - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/generated/nanoarrow_config.h -- DESTINATION include/nanoarrow) -+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/nanoarrow") -+ -+ # Generate package files for the build and install trees. -+ include(CMakePackageConfigHelpers) -+ -+ foreach(tree_type BUILD INSTALL) -+ if(tree_type STREQUAL "BUILD") -+ set(install_location ".") -+ else() -+ set(install_location "${CMAKE_INSTALL_LIBDIR}/cmake/nanoarrow") -+ endif() -+ -+ set(build_location "${PROJECT_BINARY_DIR}/${install_location}") -+ write_basic_package_version_file( -+ "${build_location}/nanoarrow-config-version.cmake" -+ VERSION ${nanoarrow_VERSION} -+ # After 1.0.0, we can use `SameMajorVersion` here. -+ COMPATIBILITY ExactVersion) -+ configure_package_config_file("${CMAKE_CURRENT_LIST_DIR}/cmake/config.cmake.in" -+ "${build_location}/nanoarrow-config.cmake" -+ INSTALL_DESTINATION "${install_location}") -+ -+ if(tree_type STREQUAL "BUILD") -+ export(EXPORT nanoarrow-exports -+ FILE "${build_location}/nanoarrow-targets.cmake" -+ NAMESPACE nanoarrow::) -+ -+ else() -+ install(DIRECTORY "${build_location}/" DESTINATION "${install_location}") -+ install(EXPORT nanoarrow-exports -+ DESTINATION "${install_location}" -+ FILE "nanoarrow-targets.cmake" -+ NAMESPACE nanoarrow::) -+ endif() -+ endforeach() - endif() - - # Always build integration test if building tests -@@ -171,7 +210,7 @@ if(NANOARROW_BUILD_TESTS OR NANOARROW_BUILD_INTEGRATION_TESTS) - src/nanoarrow/integration/c_data_integration.cc) - target_include_directories(nanoarrow_c_data_integration - PUBLIC $ -- $) -+ $) - target_link_libraries(nanoarrow_c_data_integration PRIVATE nanoarrow nlohmann_json) - endif() - -@@ -215,34 +254,18 @@ if(NANOARROW_BUILD_TESTS) - src/nanoarrow/integration/c_data_integration_test.cc) - - if(NANOARROW_CODE_COVERAGE) -- target_compile_options(coverage_config INTERFACE -O0 -g --coverage) -- target_link_options(coverage_config INTERFACE --coverage) -- target_link_libraries(nanoarrow coverage_config) -+ target_compile_options(nanoarrow PUBLIC -O0 -g --coverage) -+ target_link_options(nanoarrow PUBLIC --coverage) - endif() - -- target_link_libraries(utils_test -- nanoarrow -- gtest_main -- ${NANOARROW_ARROW_TARGET} -- coverage_config) -- target_link_libraries(buffer_test nanoarrow gtest_main coverage_config) -- target_link_libraries(array_test -- nanoarrow -- gtest_main -- ${NANOARROW_ARROW_TARGET} -- coverage_config) -- target_link_libraries(schema_test -- nanoarrow -- gtest_main -- ${NANOARROW_ARROW_TARGET} -- coverage_config) -- target_link_libraries(array_stream_test nanoarrow gtest_main coverage_config) -- target_link_libraries(nanoarrow_hpp_test nanoarrow gtest_main coverage_config) -- target_link_libraries(nanoarrow_testing_test -- nanoarrow -- gtest_main -- nlohmann_json::nlohmann_json -- coverage_config) -+ target_link_libraries(utils_test nanoarrow gtest_main ${NANOARROW_ARROW_TARGET}) -+ target_link_libraries(buffer_test nanoarrow gtest_main) -+ target_link_libraries(array_test nanoarrow gtest_main ${NANOARROW_ARROW_TARGET}) -+ target_link_libraries(schema_test nanoarrow gtest_main ${NANOARROW_ARROW_TARGET}) -+ target_link_libraries(array_stream_test nanoarrow gtest_main) -+ target_link_libraries(nanoarrow_hpp_test nanoarrow gtest_main) -+ target_link_libraries(nanoarrow_testing_test nanoarrow gtest_main -+ nlohmann_json::nlohmann_json) - target_link_libraries(c_data_integration_test nanoarrow nanoarrow_c_data_integration - gtest_main) - -diff --git a/cmake/config.cmake.in b/cmake/config.cmake.in -new file mode 100644 -index 0000000..021dc31 ---- /dev/null -+++ b/cmake/config.cmake.in -@@ -0,0 +1,28 @@ -+# Licensed to the Apache Software Foundation (ASF) under one -+# or more contributor license agreements. See the NOTICE file -+# distributed with this work for additional information -+# regarding copyright ownership. The ASF licenses this file -+# to you under the Apache License, Version 2.0 (the -+# "License"); you may not use this file except in compliance -+# with the License. You may obtain a copy of the License at -+# -+# http://www.apache.org/licenses/LICENSE-2.0 -+# -+# Unless required by applicable law or agreed to in writing, -+# software distributed under the License is distributed on an -+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -+# KIND, either express or implied. See the License for the -+# specific language governing permissions and limitations -+# under the License. -+ -+ -+@PACKAGE_INIT@ -+ -+cmake_minimum_required(VERSION @CMAKE_MINIMUM_REQUIRED_VERSION@) -+ -+include("${CMAKE_CURRENT_LIST_DIR}/nanoarrow-targets.cmake" REQUIRED) -+include("${CMAKE_CURRENT_LIST_DIR}/nanoarrow-config-version.cmake" REQUIRED) -+ -+set(${CMAKE_FIND_PACKAGE_NAME}_CONFIG "${CMAKE_CURRENT_LIST_FILE}") -+include(FindPackageHandleStandardArgs) -+find_package_handle_standard_args(${CMAKE_FIND_PACKAGE_NAME} CONFIG_MODE) diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json deleted file mode 100644 index 0b83d1808cb..00000000000 --- a/cpp/cmake/thirdparty/patches/nanoarrow_override.json +++ /dev/null @@ -1,18 +0,0 @@ - -{ - "packages" : { - "nanoarrow" : { - "version" : "0.4.0", - "git_url" : "https://github.com/apache/arrow-nanoarrow.git", - "git_tag" : "c97720003ff863b81805bcdb9f7c91306ab6b6a8", - "git_shallow" : false, - "patches" : [ - { - "file" : "${current_json_dir}/nanoarrow_cmake.diff", - "issue" : "Fix add support for global setup to initialize RMM in nvbench [https://github.com/NVIDIA/nvbench/pull/123]", - "fixed_in" : "0.5.0" - } - ] - } - } -} diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index bc5c45d8980..9d40c657396 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -76,16 +76,16 @@ void print_tree(host_span input, tree_meta_t const& d_gpu_tree, rmm::cuda_stream_view stream) { - print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.node_categories, stream), + print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.node_categories, stream), "node_categories", to_cat); - print_vec(cudf::detail::make_std_vector_async(d_gpu_tree.parent_node_ids, stream), + print_vec(cudf::detail::make_std_vector_sync(d_gpu_tree.parent_node_ids, stream), "parent_node_ids", to_int); print_vec( - cudf::detail::make_std_vector_async(d_gpu_tree.node_levels, stream), "node_levels", to_int); - auto node_range_begin = cudf::detail::make_std_vector_async(d_gpu_tree.node_range_begin, stream); - auto node_range_end = cudf::detail::make_std_vector_async(d_gpu_tree.node_range_end, stream); + cudf::detail::make_std_vector_sync(d_gpu_tree.node_levels, stream), "node_levels", to_int); + auto node_range_begin = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_begin, stream); + auto node_range_end = cudf::detail::make_std_vector_sync(d_gpu_tree.node_range_end, stream); print_vec(node_range_begin, "node_range_begin", to_int); print_vec(node_range_end, "node_range_end", to_int); for (int i = 0; i < int(node_range_begin.size()); i++) { @@ -333,10 +333,11 @@ rmm::device_uvector get_values_column_indices(TreeDepthT const row_a * @param stream CUDA stream * @return Vector of strings */ -std::vector copy_strings_to_host(device_span input, - device_span node_range_begin, - device_span node_range_end, - rmm::cuda_stream_view stream) +std::vector copy_strings_to_host_sync( + device_span input, + device_span node_range_begin, + device_span node_range_end, + rmm::cuda_stream_view stream) { CUDF_FUNC_RANGE(); auto const num_strings = node_range_begin.size(); @@ -371,12 +372,13 @@ std::vector copy_strings_to_host(device_span input, auto to_host = [stream](auto const& col) { if (col.is_empty()) return std::vector{}; auto const scv = cudf::strings_column_view(col); - auto const h_chars = cudf::detail::make_std_vector_sync( + auto const h_chars = cudf::detail::make_std_vector_async( cudf::device_span(scv.chars_begin(stream), scv.chars_size(stream)), stream); - auto const h_offsets = cudf::detail::make_std_vector_sync( + auto const h_offsets = cudf::detail::make_std_vector_async( cudf::device_span(scv.offsets().data() + scv.offset(), scv.size() + 1), stream); + stream.synchronize(); // build std::string vector from chars and offsets std::vector host_data; @@ -528,8 +530,9 @@ void make_device_json_column(device_span input, auto column_range_beg = cudf::detail::make_std_vector_async(d_column_tree.node_range_begin, stream); auto max_row_offsets = cudf::detail::make_std_vector_async(d_max_row_offsets, stream); - std::vector column_names = copy_strings_to_host( + std::vector column_names = copy_strings_to_host_sync( input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream); + stream.synchronize(); // array of arrays column names if (is_array_of_arrays) { TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2; @@ -537,6 +540,7 @@ void make_device_json_column(device_span input, get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream); auto h_values_column_indices = cudf::detail::make_std_vector_async(values_column_indices, stream); + stream.synchronize(); std::transform(unique_col_ids.begin(), unique_col_ids.end(), column_names.begin(), @@ -609,7 +613,7 @@ void make_device_json_column(device_span input, std::vector is_str_column_all_nulls{}; if (is_enabled_mixed_types_as_string) { - is_str_column_all_nulls = cudf::detail::make_std_vector_async( + is_str_column_all_nulls = cudf::detail::make_std_vector_sync( is_all_nulls_each_column(input, d_column_tree, tree, col_ids, options, stream), stream); } diff --git a/docs/cudf/source/user_guide/pandas-comparison.md b/docs/cudf/source/user_guide/pandas-comparison.md index 549d91b771a..4aaaa8a93df 100644 --- a/docs/cudf/source/user_guide/pandas-comparison.md +++ b/docs/cudf/source/user_guide/pandas-comparison.md @@ -87,9 +87,17 @@ using `.from_arrow()` or `.from_pandas()`. ## Result ordering -By default, `join` (or `merge`), `value_counts` and `groupby` operations in cuDF -do *not* guarantee output ordering. -Compare the results obtained from Pandas and cuDF below: +In Pandas, `join` (or `merge`), `value_counts` and `groupby` operations provide +certain guarantees about the order of rows in the result returned. In a Pandas +`join`, the order of join keys is (depending on the particular style of join +being performed) either preserved or sorted lexicographically by default. +`groupby` sorts the group keys, and preserves the order of rows within each +group. In some cases, disabling this option in Pandas can yield better +performance. + +By contrast, cuDF's default behavior is to return rows in a +non-deterministic order to maximize performance. Compare the results +obtained from Pandas and cuDF below: ```{code} python >>> import cupy as cp @@ -114,13 +122,16 @@ a 4 342.000000 ``` -To match Pandas behavior, you must explicitly pass `sort=True` -or enable the `mode.pandas_compatible` option when trying to -match Pandas behavior with `sort=False`: +In most cases, the rows of a DataFrame are accessed by index labels +rather than by position, so the order in which rows are returned +doesn't matter. However, if you require that results be returned in a +predictable (sorted) order, you can pass the `sort=True` option +explicitly or enable the `mode.pandas_compatible` option when trying +to match Pandas behavior with `sort=False`: ```{code} python ->>> df.to_pandas().groupby("a", sort=True).mean().head() - b +>>> df.groupby("a", sort=True).mean().head() + b a 0 70.000000 1 356.333333 diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt index 81d15cf95b4..c2b7cb7ca3d 100644 --- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt +++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt @@ -44,3 +44,5 @@ rapids_cython_create_modules( LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf ) link_to_pyarrow_headers(pylibcudf_interop) + +add_subdirectory(strings) diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd index 48c23a9dd4c..5adefa5fd93 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd @@ -17,6 +17,7 @@ from . cimport ( search, sorting, stream_compaction, + strings, types, unary, ) @@ -48,6 +49,7 @@ __all__ = [ "rolling", "search", "stream_compaction", + "strings", "sorting", "types", "unary", diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py index 8ccb0ecc341..89f874f5fa5 100644 --- a/python/cudf/cudf/_lib/pylibcudf/__init__.py +++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py @@ -17,6 +17,7 @@ search, sorting, stream_compaction, + strings, types, unary, ) @@ -48,6 +49,7 @@ "rolling", "search", "stream_compaction", + "strings", "sorting", "types", "unary", diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt new file mode 100644 index 00000000000..3a2a9e1e7eb --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/CMakeLists.txt @@ -0,0 +1,21 @@ +# ============================================================================= +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +# in compliance with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software distributed under the License +# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +# or implied. See the License for the specific language governing permissions and limitations under +# the License. +# ============================================================================= + +set(cython_sources case.pyx) +set(linked_libraries cudf::cudf) +rapids_cython_create_modules( + CXX + SOURCE_FILES "${cython_sources}" + LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_ ASSOCIATED_TARGETS cudf +) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd new file mode 100644 index 00000000000..ff87549b5b5 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.pxd @@ -0,0 +1,3 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from . import case diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py new file mode 100644 index 00000000000..ff87549b5b5 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/__init__.py @@ -0,0 +1,3 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from . import case diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/case.pxd b/python/cudf/cudf/_lib/pylibcudf/strings/case.pxd new file mode 100644 index 00000000000..225d566fe06 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/case.pxd @@ -0,0 +1,8 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from cudf._lib.pylibcudf.column cimport Column + + +cpdef Column to_lower(Column input) +cpdef Column to_upper(Column input) +cpdef Column swapcase(Column input) diff --git a/python/cudf/cudf/_lib/pylibcudf/strings/case.pyx b/python/cudf/cudf/_lib/pylibcudf/strings/case.pyx new file mode 100644 index 00000000000..69910fd8c50 --- /dev/null +++ b/python/cudf/cudf/_lib/pylibcudf/strings/case.pyx @@ -0,0 +1,30 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from libcpp.utility cimport move + +from cudf._lib.cpp.column.column cimport column +from cudf._lib.cpp.strings cimport case as cpp_case +from cudf._lib.pylibcudf.column cimport Column + + +cpdef Column to_lower(Column input): + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_case.to_lower(input.view()) + + return Column.from_libcudf(move(c_result)) + +cpdef Column to_upper(Column input): + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_case.to_upper(input.view()) + + return Column.from_libcudf(move(c_result)) + +cpdef Column swapcase(Column input): + cdef unique_ptr[column] c_result + with nogil: + c_result = cpp_case.swapcase(input.view()) + + return Column.from_libcudf(move(c_result)) diff --git a/python/cudf/cudf/_lib/strings/case.pyx b/python/cudf/cudf/_lib/strings/case.pyx index 09af1178946..38f242a67d6 100644 --- a/python/cudf/cudf/_lib/strings/case.pyx +++ b/python/cudf/cudf/_lib/strings/case.pyx @@ -1,48 +1,34 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. from cudf.core.buffer import acquire_spill_lock -from libcpp.memory cimport unique_ptr -from libcpp.utility cimport move - from cudf._lib.column cimport Column -from cudf._lib.cpp.column.column cimport column -from cudf._lib.cpp.column.column_view cimport column_view -from cudf._lib.cpp.strings.case cimport ( - swapcase as cpp_swapcase, - to_lower as cpp_to_lower, - to_upper as cpp_to_upper, -) + +from cudf._lib.pylibcudf.strings import case @acquire_spill_lock() def to_upper(Column source_strings): - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_to_upper(source_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + case.to_upper( + source_strings.to_pylibcudf(mode='read') + ) + ) @acquire_spill_lock() def to_lower(Column source_strings): - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_to_lower(source_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + case.to_lower( + source_strings.to_pylibcudf(mode='read') + ) + ) @acquire_spill_lock() def swapcase(Column source_strings): - cdef unique_ptr[column] c_result - cdef column_view source_view = source_strings.view() - - with nogil: - c_result = move(cpp_swapcase(source_view)) - - return Column.from_unique_ptr(move(c_result)) + return Column.from_pylibcudf( + case.swapcase( + source_strings.to_pylibcudf(mode='read') + ) + ) diff --git a/python/cudf/cudf/pylibcudf_tests/test_string_case.py b/python/cudf/cudf/pylibcudf_tests/test_string_case.py new file mode 100644 index 00000000000..ae01d953df5 --- /dev/null +++ b/python/cudf/cudf/pylibcudf_tests/test_string_case.py @@ -0,0 +1,35 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import pyarrow as pa +import pytest +from utils import assert_column_eq + +import cudf._lib.pylibcudf as plc + + +@pytest.fixture(scope="module") +def string_col(): + return pa.array( + ["AbC", "de", "FGHI", "j", "kLm", "nOPq", None, "RsT", None, "uVw"] + ) + + +def test_to_upper(string_col): + plc_col = plc.interop.from_arrow(string_col) + got = plc.strings.case.to_upper(plc_col) + expected = pa.compute.utf8_upper(string_col) + assert_column_eq(got, expected) + + +def test_to_lower(string_col): + plc_col = plc.interop.from_arrow(string_col) + got = plc.strings.case.to_lower(plc_col) + expected = pa.compute.utf8_lower(string_col) + assert_column_eq(got, expected) + + +def test_swapcase(string_col): + plc_col = plc.interop.from_arrow(string_col) + got = plc.strings.case.swapcase(plc_col) + expected = pa.compute.utf8_swapcase(string_col) + assert_column_eq(got, expected)